In [1]:
import os
import pandas as pd
import numpy as np

from scipy.stats import wilcoxon

In [20]:
p_cond_single = './results_paper/baseline/mlp'
p_cond_mtl = './results_paper/mtl/mlp'
p_sex = './results_paper/sex/'
p_age = './results_paper/age/'

cases = ['ASD',
        'SZ',
        'BIP',
        'ADHD',
        'DEL15q11_2',
        'DEL22q11_2',
        'DUP22q11_2',
        'DEL16p11_2',
        'DUP16p11_2',
        'DEL1q21_1',
        'DUP1q21_1']

sites_sex = ['SZ3',
        'SZ6',
        'Svip2',
        'ADHD6',
        'HSJ',
        'UCLA_CB',
        'Svip1',
        'UKBB11026',
        'UKBB11027',
        'UKBB11025',
        'ADHD1',
        'ADHD3',
        'ADHD5',
        'UCLA_DS1']

sites_age = ['USM', # why this missing?
        'SZ3',
        'SZ6',
        'Svip2',
        'ADHD6',
        'HSJ',
        'SZ2',
        'SZ1',
        'UCLA_CB',
        'Svip1',
        'ADHD1',
        'ADHD3',
        'NYU',
        'ADHD5',
        'UCLA_DS1',
        'UKBB11026',
        'UKBB11027',
        'UKBB11025']

# Load data
## Conditions

In [21]:
# Single Task
st_data_conn = []
for case in cases:
    folds_conn = []
    for fold in range(5):
        # Connectomes
        p_parent = os.path.join(p_cond_single, f"{case}/fold_{fold}")
        file = [f for f in os.listdir(p_parent) if f[-4:]=='.csv'][0]
        folds_conn.append(pd.read_csv(os.path.join(p_parent,file),index_col=0,header=[0,1])[case].loc[99]['Accuracy/test'])

    st_data_conn.append(folds_conn)
st_data_conn = dict(zip(cases,st_data_conn))
df_st_cond = pd.DataFrame(st_data_conn).transpose()

# Multi-task
mtl_data_conn = []
for fold in range(5):
    # Connectomes
    p_parent = os.path.join(p_cond_mtl, f"fold_{fold}")
    file = [f for f in os.listdir(p_parent) if f[-4:]=='.csv'][0]
    ddd = pd.read_csv(os.path.join(p_parent,file),index_col=0,header=[0,1]).loc[99]
    mtl_data_conn.append(ddd.reset_index()[ddd.reset_index()['level_1']=='Accuracy/test'].set_index('level_0')[99])
df_mtl_cond = pd.concat(mtl_data_conn,axis=1, keys= [0,1,2,3,4])


## Sex

In [24]:
# Single Task
st_data_sex = []
for site in sites_sex:
    folds_conn = []
    for fold in range(5):
        # Connectomes
        p_parent = os.path.join(p_sex, f"{site}/fold_{fold}")
        file = [f for f in os.listdir(p_parent) if f[-4:]=='.csv'][0]
        folds_conn.append(pd.read_csv(os.path.join(p_parent,file),index_col=0,header=[0,1])[site].loc[99]['Accuracy/test'])

    st_data_sex.append(folds_conn)
st_data_sex = dict(zip(sites_sex,st_data_sex))
df_st_sex = pd.DataFrame(st_data_sex).transpose()

# Multi-task
mtl_data_sex = []
for fold in range(5):
    # Connectomes
    p_parent = os.path.join(p_sex, f"all/fold_{fold}")
    file = [f for f in os.listdir(p_parent) if f[-4:]=='.csv'][0]
    ddd = pd.read_csv(os.path.join(p_parent,file),index_col=0,header=[0,1]).loc[99]
    mtl_data_sex.append(ddd.reset_index()[ddd.reset_index()['level_1']=='Accuracy/test'].set_index('level_0')[99])
df_mtl_sex = pd.concat(mtl_data_sex,axis=1, keys= [0,1,2,3,4])


## Age

In [25]:
# Single Task
st_data_age = []
for site in sites_age:
    folds_conn = []
    for fold in range(5):
        # Connectomes
        p_parent = os.path.join(p_age, f"{site}/fold_{fold}")
        file = [f for f in os.listdir(p_parent) if f[-4:]=='.csv'][0]
        folds_conn.append(pd.read_csv(os.path.join(p_parent,file),index_col=0,header=[0,1])[site].loc[99]['Loss/test'])

    st_data_age.append(folds_conn)
st_data_age = dict(zip(sites_age,st_data_age))
df_st_age = pd.DataFrame(st_data_age).transpose()

# Multi-task
mtl_data_age = []
for fold in range(5):
    # Connectomes
    p_parent = os.path.join(p_age, f"all/fold_{fold}")
    file = [f for f in os.listdir(p_parent) if f[-4:]=='.csv'][0]
    ddd = pd.read_csv(os.path.join(p_parent,file),index_col=0,header=[0,1]).loc[99]
    mtl_data_age.append(ddd.reset_index()[ddd.reset_index()['level_1']=='Loss/test'].set_index('level_0')[99])
df_mtl_age = pd.concat(mtl_data_age,axis=1, keys= [0,1,2,3,4])


# Wilcoxon signed rank test

In [27]:
mtl_cond_flat = pd.concat([df_mtl_cond[0],df_mtl_cond[1],df_mtl_cond[2],df_mtl_cond[3],df_mtl_cond[4]])
st_cond_flat = pd.concat([df_st_cond[0],df_st_cond[1],df_st_cond[2],df_st_cond[3],df_st_cond[4]])
wilcoxon(mtl_cond_flat - st_cond_flat, alternative='greater')

WilcoxonResult(statistic=390.5, pvalue=0.9668319594203625)

In [28]:
mtl_sex_flat = pd.concat([df_mtl_sex[0],df_mtl_sex[1],df_mtl_sex[2],df_mtl_sex[3],df_mtl_sex[4]])
st_sex_flat = pd.concat([df_st_sex[0],df_st_sex[1],df_st_sex[2],df_st_sex[3],df_st_sex[4]])
wilcoxon(mtl_sex_flat - st_sex_flat, alternative='greater')

WilcoxonResult(statistic=28740.0, pvalue=7.011120300088531e-06)

In [17]:
mtl_age_flat = pd.concat([df_mtl_age[0],df_mtl_age[1],df_mtl_age[2],df_mtl_age[3],df_mtl_age[4]])
st_age_flat = pd.concat([df_st_age[0],df_st_age[1],df_st_age[2],df_st_age[3],df_st_age[4]])
wilcoxon(mtl_age_flat - st_age_flat, alternative='less')

WilcoxonResult(statistic=28291.0, pvalue=2.1113370683412383e-16)