In [1]:
import gudhi as gd
import numpy as np
import pandas as pd
import os, time, math
import pickle as pkl
from gudhi.wasserstein import wasserstein_distance
from itertools import combinations, product
#import matplotlib.pyplot as plt

In [2]:
# dataset = 'MPI_LEMON'
dataset = 'ABIDE'
path_file = f'../PersistentHomology_fMRIdata/Data/{dataset}/FCM_DistMat/'

files_list = os.listdir(path_file)
all_subs_dict = {name.split('.')[0].split('_')[-1]:order for order,name in enumerate(files_list)}
print(len(files_list), len(all_subs_dict), files_list[0], type(all_subs_dict), len(all_subs_dict))

RSNs_details = pd.read_csv('../PersistentHomology_fMRIdata/Data/SchaeferAtlas_Regions_details.csv')
RSNs7 = RSNs_details['RSN'].unique().tolist()
result_dict = {key: RSNs_details.loc[RSNs_details['RSN'] == key, 'Node_number'].tolist() for key in RSNs_details['RSN'].unique()}
print(len(result_dict),[ len(x) for x in result_dict.values()],RSNs7)

820 820 ABIDE1_CALTECH_51491.csv <class 'dict'> 820
7 [29, 35, 26, 22, 12, 30, 46] ['Visual', 'Somato Motor', 'Dorsal Attention', 'Salient Ventral Attention', 'Limbic', 'Control', 'Default']


In [3]:
if dataset == 'MPI_LEMON':
    Detailsfile = pd.read_csv('../PersistentHomology_fMRIdata/Data/MPI_LEMON/MPILemon_Subject_details.csv')
    Young = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'Young','Subject'])))
    Elder = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'Elderly','Subject'])))
    All_subs = Young + Elder
    Group1, Group2 = 'Young', 'Elderly'
    GR1, GR2 = Young, Elder

elif dataset == 'ABIDE':
    Detailsfile = pd.read_csv('../PersistentHomology_fMRIdata/Data/ABIDE/ABIDE_Subject_details.csv')
    ASD = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'ASD','Subject identifier'])))
    Healthy = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'HC','Subject identifier'])))
    All_subs = ASD + Healthy
    Group1, Group2 = 'ASD', 'Healthy'
    GR1, GR2 = ASD, Healthy

print(Group1, len(GR1), '\t', Group2, len(GR2), '\tTotal:', len(All_subs))
print(GR1[0], GR2[0], math.comb(820,2))

ASD 395 	 Healthy 425 	Total: 820
50601 50551 335790


In [4]:
all_combinations_ingroup1 = list(combinations(GR1,2))
all_combinations_ingroup2 = list(combinations(GR2,2))
all_combinations_betweengroups = list(product(GR1, GR2))
all_combinations_len = len(all_combinations_ingroup1) + len(all_combinations_ingroup2) + len(all_combinations_betweengroups)

print(Group1, len(GR1), len(all_combinations_ingroup1), all_combinations_ingroup1[0], '\n',
      Group2, len(GR2), len(all_combinations_ingroup2), all_combinations_ingroup2[0], '\n',
      'Both groups', len(All_subs), len(all_combinations_betweengroups), all_combinations_betweengroups[0], 
      '\n total_combinations:', all_combinations_len, math.comb(len(All_subs),2))

ASD 395 77815 ('50601', '50602') 
 Healthy 425 90100 ('50551', '50552') 
 Both groups 820 167875 ('50601', '50551') 
 total_combinations: 335790 335790


### Global level

In [None]:
# path = f'../PersistentHomology_fMRIdata/OutputFiles/Positive_weight/{dataset}/'
# outpath = f'../OutputFiles_Javaplex/PosCorr/{dataset}/'

path = f'../PersistentHomology_fMRIdata/OutputFiles/{dataset}/'
outpath = f'../OutputFiles_Javaplex/{dataset}/'

with open(path +f'barcode1_list.pkl','rb') as f:
    barcode1_list = pkl.load(f)

In [6]:
combination_list, outfile, C1, C2 = all_combinations_ingroup1, f'Wasserstein_distance_ingroup_{Group1}', f'{Group1}_sub1', f'{Group1}_sub2'
# combination_list, outfile, C1, C2 = all_combinations_ingroup2, f'Wasserstein_distance_ingroup_{Group2}', f'{Group2}_sub1', f'{Group2}_sub2'
# combination_list, outfile, C1, C2 = all_combinations_betweengroups, f'Wasserstein_distance_betweengroups_{Group1}_{Group2}', f'{Group1}', f'{Group2}'

print(dataset, outpath,  outfile, C1, C2)
outdf = {C1:[], C2:[], '1_wasserstein_distance':[], '2_wasserstein_distance':[], 'bottleneck_distance':[]}
 
t0 = time.time()
for sub1,sub2 in combination_list:
    indx1, indx2 = all_subs_dict[sub1], all_subs_dict[sub2]
    dgm1 = barcode1_list[indx1][sub1]
    dgm2 = barcode1_list[indx2][sub2]
    # print(sub1, sub2, indx1, indx2, len(dgm1), len(dgm2))
    wasserstein_distance1 = wasserstein_distance(dgm1, dgm2, order=1)
    wasserstein_distance2 = wasserstein_distance(dgm1, dgm2, order=2)
    bottleneck_distance = gd.bottleneck_distance(dgm1, dgm2)
    outdf[C1].append(sub1)
    outdf[C2].append(sub2)
    outdf['1_wasserstein_distance'].append(wasserstein_distance1)
    outdf['2_wasserstein_distance'].append(wasserstein_distance2)
    outdf['bottleneck_distance'].append(bottleneck_distance)
    #print('wasserstein_distance1  ', wasserstein_distance1, 'wasserstein_distance2  ', wasserstein_distance2,'bottleneck_distance  ',bottleneck_distance )
    break

pd.DataFrame(outdf).to_csv(outpath + f'Z_{outfile}.txt', sep = '\t', index=None)
print('Done for ', outfile, time.time() -t0)

ABIDE ../OutputFiles_Javaplex/ABIDE/ Wasserstein_distance_ingroup_ASD ASD_sub1 ASD_sub2
Done for  Wasserstein_distance_ingroup_ASD 1557.2989509105682


In [10]:
pd.DataFrame(outdf)
# outdf

Unnamed: 0,ASD_sub1,ASD_sub2,1_wasserstein_distance,2_wasserstein_distance,bottleneck_distance
0,50601,50602,3.816973,0.309521,0.076545
1,50601,50603,3.702369,0.329487,0.089434
2,50601,50604,4.561162,0.383814,0.104638
3,50601,50605,2.571120,0.246141,0.067157
4,50601,50606,2.567787,0.248827,0.060002
...,...,...,...,...,...
77810,50529,50531,2.719615,0.217701,0.042856
77811,50529,50532,3.085471,0.243742,0.043377
77812,50530,50531,2.791110,0.227207,0.044083
77813,50530,50532,2.808056,0.224667,0.049778


### RSNs level

In [5]:
combination_list, outfile, C1, C2 = all_combinations_ingroup1, f'Wasserstein_distance_ingroup_{Group1}', f'{Group1}_sub1', f'{Group1}_sub2'
# combination_list, outfile, C1, C2 = all_combinations_ingroup2, f'Wasserstein_distance_ingroup_{Group2}', f'{Group2}_sub1', f'{Group2}_sub2'
# combination_list, outfile, C1, C2 = all_combinations_betweengroups, f'Wasserstein_distance_betweengroups_{Group1}_{Group2}', f'{Group1}', f'{Group2}'

print(dataset, outfile, C1, C2)
t0 = time.time()
for RSN in RSNs7:
    rsn = RSN.replace(' ','')
    path = f'../OutputFiles_Javaplex/PosCorr/{dataset}/Barcodes_GlobalMeasures/RSNs/{rsn}/'
    with open(path +f'{rsn}_barcode1_list.pkl','rb') as f:
        barcode1_list = pkl.load(f)
        
    outdf = {C1:[], C2:[], '1_wasserstein_distance':[], '2_wasserstein_distance':[], 'bottleneck_distance':[]}
    t1 = time.time()
    for sub1,sub2 in combination_list:
        indx1, indx2 = all_subs_dict[sub1], all_subs_dict[sub2]
        dgm1 = barcode1_list[indx1][sub1]
        dgm2 = barcode1_list[indx2][sub2]
        # print(sub1, sub2, indx1, indx2, len(dgm1), len(dgm2))
        wasserstein_distance1 = wasserstein_distance(dgm1, dgm2, order=1)
        wasserstein_distance2 = wasserstein_distance(dgm1, dgm2, order=2)
        bottleneck_distance = gd.bottleneck_distance(dgm1, dgm2)
        outdf[C1].append(sub1)
        outdf[C2].append(sub2)
        outdf['1_wasserstein_distance'].append(wasserstein_distance1)
        outdf['2_wasserstein_distance'].append(wasserstein_distance2)
        outdf['bottleneck_distance'].append(bottleneck_distance)
        #print('wasserstein_distance1  ', wasserstein_distance1, 'wasserstein_distance2  ', wasserstein_distance2,'bottleneck_distance  ',bottleneck_distance )
        # break

    pd.DataFrame(outdf).to_csv(path + f'A_{rsn}_{outfile}.txt', sep = '\t', index=None)
    print('Done for ', rsn, time.time() -t1)
    
print('Done for ', outfile, time.time() -t0)

ABIDE Wasserstein_distance_ingroup_ASD ASD_sub1 ASD_sub2
Done for  Visual 0.02372598648071289
Done for  SomatoMotor 0.0
Done for  DorsalAttention 0.005920886993408203
Done for  SalientVentralAttention 0.0049724578857421875
Done for  Limbic 0.0
Done for  Control 0.0
Done for  Default 0.0
Done for  Wasserstein_distance_ingroup_ASD 0.1674952507019043
