In [1]:
import pandas as pd
import numpy as np
from scipy.stats import mannwhitneyu
import warnings
warnings.filterwarnings('ignore')

In [None]:
group1_nd_heavy = pd.read_csv('Access/group1_nondrinker_vs_heavy_first_results.csv')
group3_nd_any = pd.read_csv('Access/group3_nondrinker_vs_any_first_results.csv')
group4_nd_mod = pd.read_csv('Access/group4_nondrinker_vs_moderate_first_results.csv')

continuous_all = pd.read_csv('Access/continuous_drinks_per_week_first_results.csv')

cirr_absent_cont = pd.read_csv('Access/cirrhosis_absent_continuous_drinks_per_week_first_results.csv')
cirr_present_cont = pd.read_csv('Access/cirrhosis_present_continuous_drinks_per_week_first_results.csv')

male_g1 = pd.read_csv('Access/gender_male_group1_nondrinker_vs_heavy_first_results.csv')
male_g3 = pd.read_csv('Access/gender_male_group3_nondrinker_vs_anydrinker_first_results.csv')
male_g4 = pd.read_csv('Access/gender_male_group4_nondrinker_vs_moderate_first_results.csv')

female_g1 = pd.read_csv('Access/gender_female_group1_nondrinker_vs_heavy_first_results.csv')
female_g3 = pd.read_csv('Access/gender_female_group3_nondrinker_vs_anydrinker_first_results.csv')
female_g4 = pd.read_csv('Access/gender_female_group4_nondrinker_vs_moderate_first_results.csv')

cirr_present_g1 = pd.read_csv('Access/cirrhosis_present_group1_nondrinker_vs_heavy_first_results.csv')
cirr_present_g4 = pd.read_csv('Access/cirrhosis_present_group4_nondrinker_vs_moderate_first_results.csv')

cirr_absent_g1 = pd.read_csv('Access/cirrhosis_absent_group1_nondrinker_vs_heavy_first_results.csv')
cirr_absent_g4 = pd.read_csv('Access/cirrhosis_absent_group4_nondrinker_vs_moderate_first_results.csv')

print("Loaded all CSV files")

cirr_present_g3 = pd.read_csv('Access/cirrhosis_present_group3_nondrinker_vs_anydrinker_first_results.csv')
cirr_absent_g3 = pd.read_csv('Access/cirrhosis_absent_group3_nondrinker_vs_anydrinker_first_results.csv')

male_continuous = pd.read_csv('Access/gender_male_continuous_drinks_per_week_first_results.csv')
female_continuous = pd.read_csv('Access/gender_female_continuous_drinks_per_week_first_results.csv')

FileNotFoundError: [Errno 2] No such file or directory: 'Output/group1_nondrinker_vs_heavy_first_results.csv'

In [None]:
def get_exog_endog_effects(df, exog_var):
    exog_effects = df[df['treatment'] == exog_var].copy()
    exog_effects = exog_effects[['outcome', 'effect', 'pvalue']].set_index('outcome')
    return exog_effects['effect'].to_dict()

def compare_two_groups(df1, df2, exog_var, group1_name, group2_name):
    effects1 = get_exog_endog_effects(df1, exog_var)
    effects2 = get_exog_endog_effects(df2, exog_var)
    
    common_mediators = set(effects1.keys()) & set(effects2.keys())
    
    effects1_values = [effects1[m] for m in sorted(common_mediators)]
    effects2_values = [effects2[m] for m in sorted(common_mediators)]
    
    if len(effects1_values) >= 3:
        stat, pval = mannwhitneyu(effects1_values, effects2_values, alternative='two-sided')
        test_statistic = stat
        test_pvalue = pval
    else:
        test_statistic = np.nan
        test_pvalue = np.nan
    
    results = []
    for mediator in sorted(common_mediators):
        e1 = effects1[mediator]
        e2 = effects2[mediator]
        
        results.append({
            'pathway': f'{exog_var}->{mediator}',
            'mediator': mediator,
            'group1': group1_name,
            'effect1': e1,
            'group2': group2_name,
            'effect2': e2,
            'difference': e2 - e1,
            'abs_difference': abs(e2 - e1),
            'mannwhitney_statistic': test_statistic,
            'mannwhitney_pvalue': test_pvalue
        })
    
    return pd.DataFrame(results)

In [None]:
print("="*80)
print("1. ALL DRINKERS: ND vs MD & ND vs HD")
print("="*80)

comp1a = compare_two_groups(group4_nd_mod, group1_nd_heavy, 'alcohol_binary', 
                             'ND_vs_Moderate', 'ND_vs_Heavy')
comp1a['comparison'] = '1_All_Drinkers_MD_vs_HD'

print("\nND vs Moderate compared to ND vs Heavy:")
print(comp1a[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])

comp1a.to_csv('wilcoxon_1_all_drinkers.csv', index=False)
print("\nSaved: wilcoxon_1_all_drinkers.csv")

In [None]:
print("\n" + "="*80)
print("2. CIRRHOSIS + vs - with drinks_per_week")
print("="*80)

comp2 = compare_two_groups(cirr_absent_cont, cirr_present_cont, 'drinks_per_week',
                           'Cirrhosis_Absent', 'Cirrhosis_Present')
comp2['comparison'] = '2_Cirrhosis_drinks_per_week'

print("\nCirrhosis Absent vs Present (drinks_per_week effects):")
print(comp2[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])

comp2.to_csv('wilcoxon_2_cirrhosis_drinks_per_week.csv', index=False)
print("\nSaved: wilcoxon_2_cirrhosis_drinks_per_week.csv")

In [None]:
print("\n" + "="*80)
print("3. CIRRHOSIS + vs - (ND vs Any)")
print("="*80)

comp3 = compare_two_groups(cirr_absent_g3, cirr_present_g3, 'alcohol_binary',
                           'CirrAbsent_ND_vs_Any', 'CirrPresent_ND_vs_Any')
comp3['comparison'] = '3_Cirrhosis_ND_vs_Any'

print("\nCirrhosis Absent vs Present (ND vs Any):")
print(comp3[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])

comp3.to_csv('wilcoxon_3_cirrhosis_nd_vs_any.csv', index=False)
print("\nSaved: wilcoxon_3_cirrhosis_nd_vs_any.csv")

In [None]:
print("\n" + "="*80)
print("4. CIRRHOSIS + vs - (ND vs MD)")
print("="*80)

comp4 = compare_two_groups(cirr_absent_g4, cirr_present_g4, 'alcohol_binary',
                           'CirrAbsent_ND_vs_Mod', 'CirrPresent_ND_vs_Mod')
comp4['comparison'] = '4_Cirrhosis_ND_vs_MD'

print("\nCirrhosis Absent vs Present (ND vs Moderate):")
print(comp4[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])

comp4.to_csv('wilcoxon_4_cirrhosis_nd_vs_md.csv', index=False)
print("\nSaved: wilcoxon_4_cirrhosis_nd_vs_md.csv")

In [None]:
print("\n" + "="*80)
print("5. CIRRHOSIS + vs - (ND vs HD)")
print("="*80)

comp5 = compare_two_groups(cirr_absent_g1, cirr_present_g1, 'alcohol_binary',
                           'CirrAbsent_ND_vs_Heavy', 'CirrPresent_ND_vs_Heavy')
comp5['comparison'] = '5_Cirrhosis_ND_vs_HD'

print("\nCirrhosis Absent vs Present (ND vs Heavy):")
print(comp5[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])

comp5.to_csv('wilcoxon_5_cirrhosis_nd_vs_hd.csv', index=False)
print("\nSaved: wilcoxon_5_cirrhosis_nd_vs_hd.csv")

In [None]:
print("\n" + "="*80)
print("6. GENDER Male vs Female (drinks_per_week)")
print("="*80)

comp6 = compare_two_groups(male_continuous, female_continuous, 'drinks_per_week',
                           'Male_drinks_per_week', 'Female_drinks_per_week')
comp6['comparison'] = '6_Gender_drinks_per_week'

print("\nMale vs Female (drinks_per_week):")
print(comp6[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])

comp6.to_csv('wilcoxon_6_gender_drinks_per_week.csv', index=False)
print("\nSaved: wilcoxon_6_gender_drinks_per_week.csv")

In [None]:
print("\n" + "="*80)
print("7. GENDER Male vs Female (ND vs HD)")
print("="*80)

comp7 = compare_two_groups(male_g1, female_g1, 'alcohol_binary',
                           'Male_ND_vs_Heavy', 'Female_ND_vs_Heavy')
comp7['comparison'] = '7_Gender_ND_vs_HD'

print("\nMale vs Female (ND vs Heavy):")
print(comp7[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])

comp7.to_csv('wilcoxon_7_gender_nd_vs_hd.csv', index=False)
print("\nSaved: wilcoxon_7_gender_nd_vs_hd.csv")

In [None]:
print("\n" + "="*80)
print("8. GENDER Male vs Female (ND vs ANY)")
print("="*80)

comp8 = compare_two_groups(male_g3, female_g3, 'alcohol_binary',
                           'Male_ND_vs_Any', 'Female_ND_vs_Any')
comp8['comparison'] = '8_Gender_ND_vs_ANY'

print("\nMale vs Female (ND vs Any):")
print(comp8[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])

comp8.to_csv('wilcoxon_8_gender_nd_vs_any.csv', index=False)
print("\nSaved: wilcoxon_8_gender_nd_vs_any.csv")

In [None]:
print("\n" + "="*80)
print("9. GENDER Male vs Female (ND vs MD)")
print("="*80)

comp9 = compare_two_groups(male_g4, female_g4, 'alcohol_binary',
                           'Male_ND_vs_Mod', 'Female_ND_vs_Mod')
comp9['comparison'] = '9_Gender_ND_vs_MD'

print("\nMale vs Female (ND vs Moderate):")
print(comp9[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])

comp9.to_csv('wilcoxon_9_gender_nd_vs_md.csv', index=False)
print("\nSaved: wilcoxon_9_gender_nd_vs_md.csv")