In [None]:
import pandas as pd
import numpy as np
import networkx as nx
from scipy.stats import mannwhitneyu
import warnings
warnings.filterwarnings('ignore')

In [None]:
files_to_analyze = {
    'continuous_all': ('Access/continuous_drinks_per_week_first_results.csv', 'drinks_per_week'),
    'group1_nd_heavy': ('Access/group1_nondrinker_vs_heavy_first_results.csv', 'alcohol_binary'),
    'group3_nd_any': ('Access/group3_nondrinker_vs_any_first_results.csv', 'alcohol_binary'),
    'group4_nd_mod': ('Access/group4_nondrinker_vs_moderate_first_results.csv', 'alcohol_binary'),
    'cirr_absent_cont': ('Access/cirrhosis_absent_continuous_drinks_per_week_first_results.csv', 'drinks_per_week'),
    'cirr_present_cont': ('Access/cirrhosis_present_continuous_drinks_per_week_first_results.csv', 'drinks_per_week'),
    'cirr_absent_g1': ('Access/cirrhosis_absent_group1_nondrinker_vs_heavy_first_results.csv', 'alcohol_binary'),
    'cirr_absent_g3': ('Access/cirrhosis_absent_group3_nondrinker_vs_anydrinker_first_results.csv', 'alcohol_binary'),
    'cirr_absent_g4': ('Access/cirrhosis_absent_group4_nondrinker_vs_moderate_first_results.csv', 'alcohol_binary'),
    'cirr_present_g1': ('Access/cirrhosis_present_group1_nondrinker_vs_heavy_first_results.csv', 'alcohol_binary'),
    'cirr_present_g3': ('Access/cirrhosis_present_group3_nondrinker_vs_anydrinker_first_results.csv', 'alcohol_binary'),
    'cirr_present_g4': ('Access/cirrhosis_present_group4_nondrinker_vs_moderate_first_results.csv', 'alcohol_binary'),
    'male_continuous': ('Access/gender_male_continuous_drinks_per_week_first_results.csv', 'drinks_per_week'),
    'female_continuous': ('Access/gender_female_continuous_drinks_per_week_first_results.csv', 'drinks_per_week'),
    'male_g1': ('Access/gender_male_group1_nondrinker_vs_heavy_first_results.csv', 'alcohol_binary'),
    'male_g3': ('Access/gender_male_group3_nondrinker_vs_anydrinker_first_results.csv', 'alcohol_binary'),
    'male_g4': ('Access/gender_male_group4_nondrinker_vs_moderate_first_results.csv', 'alcohol_binary'),
    'female_g1': ('Access/gender_female_group1_nondrinker_vs_heavy_first_results.csv', 'alcohol_binary'),
    'female_g3': ('Access/gender_female_group3_nondrinker_vs_anydrinker_first_results.csv', 'alcohol_binary'),
    'female_g4': ('Access/gender_female_group4_nondrinker_vs_moderate_first_results.csv', 'alcohol_binary')
}

datasets = {}
for name, (filepath, alcohol_var) in files_to_analyze.items():
    datasets[name] = {
        'df': pd.read_csv(filepath),
        'alcohol_var': alcohol_var,
        'filepath': filepath
    }

print('Loaded all CSV files')

In [None]:
def get_all_negative_effects(df, alcohol_var):
    level1_mediators = df[df['treatment'] == alcohol_var].copy()
    level1_negative = level1_mediators[level1_mediators['effect'] < 0]
    
    all_effects = {}
    
    for _, med_row in level1_negative.iterrows():
        mediator = med_row['outcome']
        pathway_key = f"{alcohol_var}->{mediator}"
        all_effects[pathway_key] = med_row['effect']
    
    G = nx.DiGraph()
    for _, row in df.iterrows():
        if row['effect'] < 0:
            G.add_edge(row['treatment'], row['outcome'], effect=row['effect'])
    
    for _, med_row in level1_negative.iterrows():
        mediator = med_row['outcome']
        
        if mediator in G:
            for target in G.nodes():
                if target == mediator or target == alcohol_var:
                    continue
                try:
                    for path in nx.all_simple_paths(G, mediator, target, cutoff=5):
                        if len(path) >= 2:
                            for i in range(len(path)-1):
                                edge_from = path[i]
                                edge_to = path[i+1]
                                edge_effect = G[edge_from][edge_to]['effect']
                                pathway_key = f"{edge_from}->{edge_to}"
                                all_effects[pathway_key] = edge_effect
                except nx.NetworkXNoPath:
                    pass
    
    return all_effects

def compare_negative_effects(data1, data2, group1_name, group2_name):
    effects1 = get_all_negative_effects(data1['df'], data1['alcohol_var'])
    effects2 = get_all_negative_effects(data2['df'], data2['alcohol_var'])
    
    common_pathways = set(effects1.keys()) & set(effects2.keys())
    
    if len(common_pathways) == 0:
        return None
    
    effects1_values = [effects1[p] for p in sorted(common_pathways)]
    effects2_values = [effects2[p] for p in sorted(common_pathways)]
    
    if len(effects1_values) >= 3:
        stat, pval = mannwhitneyu(effects1_values, effects2_values, alternative='two-sided')
        test_statistic = stat
        test_pvalue = pval
    else:
        test_statistic = np.nan
        test_pvalue = np.nan
    
    results = []
    for pathway in sorted(common_pathways):
        e1 = effects1[pathway]
        e2 = effects2[pathway]
        results.append({
            'pathway': pathway,
            'group1': group1_name,
            'effect1': e1,
            'group2': group2_name,
            'effect2': e2,
            'difference': e2 - e1,
            'abs_difference': abs(e2 - e1),
            'mannwhitney_statistic': test_statistic,
            'mannwhitney_pvalue': test_pvalue
        })
    
    return pd.DataFrame(results)

In [None]:
print('='*80)
print('1. ALL DRINKERS: ND vs MD & ND vs HD')
print('='*80)

comp1 = compare_negative_effects(datasets['group4_nd_mod'], datasets['group1_nd_heavy'],
                                 'ND_vs_Moderate', 'ND_vs_Heavy')

if comp1 is not None:
    comp1['comparison'] = '1_All_Drinkers_MD_vs_HD'
    print('\nND vs Moderate compared to ND vs Heavy:')
    print(comp1[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])
    comp1.to_csv('negative_paths_wilcoxon_1_all_drinkers.csv', index=False)
    print('\nSaved: negative_paths_wilcoxon_1_all_drinkers.csv')
else:
    print('No common negative pathways found')

In [None]:
print('\n' + '='*80)
print('2. CIRRHOSIS + vs - with drinks_per_week')
print('='*80)

comp2 = compare_negative_effects(datasets['cirr_absent_cont'], datasets['cirr_present_cont'],
                                 'Cirrhosis_Absent', 'Cirrhosis_Present')

if comp2 is not None:
    comp2['comparison'] = '2_Cirrhosis_drinks_per_week'
    print('\nCirrhosis Absent vs Present (drinks_per_week):')
    print(comp2[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])
    comp2.to_csv('negative_paths_wilcoxon_2_cirrhosis_drinks_per_week.csv', index=False)
    print('\nSaved: negative_paths_wilcoxon_2_cirrhosis_drinks_per_week.csv')
else:
    print('No common negative pathways found')

In [None]:
print('\n' + '='*80)
print('3. CIRRHOSIS + vs - (ND vs Any)')
print('='*80)

comp3 = compare_negative_effects(datasets['cirr_absent_g3'], datasets['cirr_present_g3'],
                                 'CirrAbsent_ND_vs_Any', 'CirrPresent_ND_vs_Any')

if comp3 is not None:
    comp3['comparison'] = '3_Cirrhosis_ND_vs_Any'
    print('\nCirrhosis Absent vs Present (ND vs Any):')
    print(comp3[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])
    comp3.to_csv('negative_paths_wilcoxon_3_cirrhosis_nd_vs_any.csv', index=False)
    print('\nSaved: negative_paths_wilcoxon_3_cirrhosis_nd_vs_any.csv')
else:
    print('No common negative pathways found')

In [None]:
print('\n' + '='*80)
print('4. CIRRHOSIS + vs - (ND vs MD)')
print('='*80)

comp4 = compare_negative_effects(datasets['cirr_absent_g4'], datasets['cirr_present_g4'],
                                 'CirrAbsent_ND_vs_Mod', 'CirrPresent_ND_vs_Mod')

if comp4 is not None:
    comp4['comparison'] = '4_Cirrhosis_ND_vs_MD'
    print('\nCirrhosis Absent vs Present (ND vs Moderate):')
    print(comp4[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])
    comp4.to_csv('negative_paths_wilcoxon_4_cirrhosis_nd_vs_md.csv', index=False)
    print('\nSaved: negative_paths_wilcoxon_4_cirrhosis_nd_vs_md.csv')
else:
    print('No common negative pathways found')

In [None]:
print('\n' + '='*80)
print('5. CIRRHOSIS + vs - (ND vs HD)')
print('='*80)

comp5 = compare_negative_effects(datasets['cirr_absent_g1'], datasets['cirr_present_g1'],
                                 'CirrAbsent_ND_vs_Heavy', 'CirrPresent_ND_vs_Heavy')

if comp5 is not None:
    comp5['comparison'] = '5_Cirrhosis_ND_vs_HD'
    print('\nCirrhosis Absent vs Present (ND vs Heavy):')
    print(comp5[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])
    comp5.to_csv('negative_paths_wilcoxon_5_cirrhosis_nd_vs_hd.csv', index=False)
    print('\nSaved: negative_paths_wilcoxon_5_cirrhosis_nd_vs_hd.csv')
else:
    print('No common negative pathways found')

In [None]:
print('\n' + '='*80)
print('6. GENDER Male vs Female (drinks_per_week)')
print('='*80)

comp6 = compare_negative_effects(datasets['male_continuous'], datasets['female_continuous'],
                                 'Male_drinks_per_week', 'Female_drinks_per_week')

if comp6 is not None:
    comp6['comparison'] = '6_Gender_drinks_per_week'
    print('\nMale vs Female (drinks_per_week):')
    print(comp6[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])
    comp6.to_csv('negative_paths_wilcoxon_6_gender_drinks_per_week.csv', index=False)
    print('\nSaved: negative_paths_wilcoxon_6_gender_drinks_per_week.csv')
else:
    print('No common negative pathways found')

In [None]:
print('\n' + '='*80)
print('7. GENDER Male vs Female (ND vs HD)')
print('='*80)

comp7 = compare_negative_effects(datasets['male_g1'], datasets['female_g1'],
                                 'Male_ND_vs_Heavy', 'Female_ND_vs_Heavy')

if comp7 is not None:
    comp7['comparison'] = '7_Gender_ND_vs_HD'
    print('\nMale vs Female (ND vs Heavy):')
    print(comp7[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])
    comp7.to_csv('negative_paths_wilcoxon_7_gender_nd_vs_hd.csv', index=False)
    print('\nSaved: negative_paths_wilcoxon_7_gender_nd_vs_hd.csv')
else:
    print('No common negative pathways found')

In [None]:
print('\n' + '='*80)
print('8. GENDER Male vs Female (ND vs ANY)')
print('='*80)

comp8 = compare_negative_effects(datasets['male_g3'], datasets['female_g3'],
                                 'Male_ND_vs_Any', 'Female_ND_vs_Any')

if comp8 is not None:
    comp8['comparison'] = '8_Gender_ND_vs_ANY'
    print('\nMale vs Female (ND vs Any):')
    print(comp8[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])
    comp8.to_csv('negative_paths_wilcoxon_8_gender_nd_vs_any.csv', index=False)
    print('\nSaved: negative_paths_wilcoxon_8_gender_nd_vs_any.csv')
else:
    print('No common negative pathways found')

In [None]:
print('\n' + '='*80)
print('9. GENDER Male vs Female (ND vs MD)')
print('='*80)

comp9 = compare_negative_effects(datasets['male_g4'], datasets['female_g4'],
                                 'Male_ND_vs_Mod', 'Female_ND_vs_Mod')

if comp9 is not None:
    comp9['comparison'] = '9_Gender_ND_vs_MD'
    print('\nMale vs Female (ND vs Moderate):')
    print(comp9[['pathway', 'effect1', 'effect2', 'difference', 'mannwhitney_pvalue']])
    comp9.to_csv('negative_paths_wilcoxon_9_gender_nd_vs_md.csv', index=False)
    print('\nSaved: negative_paths_wilcoxon_9_gender_nd_vs_md.csv')
else:
    print('No common negative pathways found')

In [None]:
print('\n' + '='*80)
print('SUMMARY: ALL NEGATIVE PATHWAY WILCOXON COMPARISONS')
print('='*80)

all_comparisons = []

comparison_pairs = [
    ('group4_nd_mod', 'group1_nd_heavy', 'ND_vs_Moderate', 'ND_vs_Heavy', '1_All_Drinkers_MD_vs_HD'),
    ('cirr_absent_cont', 'cirr_present_cont', 'Cirrhosis_Absent', 'Cirrhosis_Present', '2_Cirrhosis_drinks_per_week'),
    ('cirr_absent_g3', 'cirr_present_g3', 'CirrAbsent_ND_vs_Any', 'CirrPresent_ND_vs_Any', '3_Cirrhosis_ND_vs_Any'),
    ('cirr_absent_g4', 'cirr_present_g4', 'CirrAbsent_ND_vs_Mod', 'CirrPresent_ND_vs_Mod', '4_Cirrhosis_ND_vs_MD'),
    ('cirr_absent_g1', 'cirr_present_g1', 'CirrAbsent_ND_vs_Heavy', 'CirrPresent_ND_vs_Heavy', '5_Cirrhosis_ND_vs_HD'),
    ('male_continuous', 'female_continuous', 'Male_drinks_per_week', 'Female_drinks_per_week', '6_Gender_drinks_per_week'),
    ('male_g1', 'female_g1', 'Male_ND_vs_Heavy', 'Female_ND_vs_Heavy', '7_Gender_ND_vs_HD'),
    ('male_g3', 'female_g3', 'Male_ND_vs_Any', 'Female_ND_vs_Any', '8_Gender_ND_vs_ANY'),
    ('male_g4', 'female_g4', 'Male_ND_vs_Mod', 'Female_ND_vs_Mod', '9_Gender_ND_vs_MD')
]

for ds1_name, ds2_name, g1_name, g2_name, comp_name in comparison_pairs:
    comp = compare_negative_effects(datasets[ds1_name], datasets[ds2_name], g1_name, g2_name)
    if comp is not None and len(comp) > 0:
        comp['comparison'] = comp_name
        all_comparisons.append(comp)

if all_comparisons:
    summary_df = pd.concat(all_comparisons, ignore_index=True)
    summary_df.to_csv('negative_paths_wilcoxon_all_comparisons.csv', index=False)
    print(f'\nTotal comparisons: {len(all_comparisons)}')
    print(f'Total negative pathway effect comparisons: {len(summary_df)}')
    print('\nSaved: negative_paths_wilcoxon_all_comparisons.csv')
    print('\nSummary by comparison:')
    for comp_name in summary_df['comparison'].unique():
        comp_data = summary_df[summary_df['comparison'] == comp_name]
        print(f'\n{comp_name}: {len(comp_data)} negative pathways compared')
        if len(comp_data) > 0:
            pval = comp_data['mannwhitney_pvalue'].iloc[0]
            print(f'  Mann-Whitney p-value: {pval:.6f}')
            if pval < 0.05:
                print(f'  Significant difference in negative pathway effects')
            else:
                print(f'  No significant difference in negative pathway effects')
else:
    print('No comparisons with common negative pathways found')