In [1]:
import gudhi as gd
import numpy as np
import pandas as pd
import os, time, math
import pickle as pkl
from gudhi.wasserstein import wasserstein_distance
from itertools import combinations, product

In [2]:
dataset = 'MPI_LEMON'
# dataset = 'ABIDE'

CorrType = 'PosCorr'
# CorrType = 'AllCorr'

# Data

In [None]:
path_file = f'../Data/{dataset}/FCM_DistMat/' 

files_list = os.listdir(path_file)
all_subs_dict = {name.split('.')[0].split('_')[-1]:order for order,name in enumerate(files_list)}
print(dataset, len(files_list))

RSNs_details = pd.read_csv('../Data/SchaeferAtlas_Regions_details.csv')
RSNs7 = RSNs_details['RSN'].unique().tolist()
result_dict = {key: RSNs_details.loc[RSNs_details['RSN'] == key, 'Node_number'].tolist() for key in RSNs_details['RSN'].unique()}
print(len(result_dict),[ len(x) for x in result_dict.values()],RSNs7)

In [4]:
if dataset == 'MPI_LEMON':
    Detailsfile = pd.read_csv('../Data/MPI_LEMON/MPILemon_Subject_details.csv')
    Young = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'Young','Subject'])))
    Elder = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'Elderly','Subject'])))
    All_subs = Young + Elder
    Group1, Group2 = 'Young', 'Elderly'
    GR1, GR2 = Young, Elder

elif dataset == 'ABIDE':
    Detailsfile = pd.read_csv('../Data/ABIDE/ABIDE_Subject_details.csv')
    ASD = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'ASD','Subject identifier'])))
    Healthy = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'HC','Subject identifier'])))
    All_subs = ASD + Healthy
    Group1, Group2 = 'ASD', 'Healthy'
    GR1, GR2 = ASD, Healthy

print(Group1, len(GR1), '\t', Group2, len(GR2), '\tTotal:', len(All_subs))
print(GR1[0], GR2[0], math.comb(820,2))

Young 153 	 Elderly 72 	Total: 225
32302 32301 335790


In [5]:
all_combinations_ingroup1 = list(combinations(GR1,2))
all_combinations_ingroup2 = list(combinations(GR2,2))
all_combinations_betweengroups = list(product(GR1, GR2))
all_combinations_len = len(all_combinations_ingroup1) + len(all_combinations_ingroup2) + len(all_combinations_betweengroups)

print(Group1, len(GR1), len(all_combinations_ingroup1), all_combinations_ingroup1[0], '\n',
      Group2, len(GR2), len(all_combinations_ingroup2), all_combinations_ingroup2[0], '\n',
      'Both groups', len(All_subs), len(all_combinations_betweengroups), all_combinations_betweengroups[0], 
      '\n total_combinations:', all_combinations_len, math.comb(len(All_subs),2))

Young 153 11628 ('32302', '32304') 
 Elderly 72 2556 ('32301', '32303') 
 Both groups 225 11016 ('32302', '32301') 
 total_combinations: 25200 25200


### Global level

Calculates the 1-wasserstein, 2-wasserstein, and Bottleneck distances separately for intra-group and inter-group persistence diagrams

In [None]:
path = f'../OutputFiles/{CorrType}/{dataset}/'

with open(path +f'barcode1_list.pkl','rb') as f:
    barcode1_list = pkl.load(f)

# combination_list, outfile, C1, C2 = all_combinations_ingroup1, f'Wasserstein_distance_ingroup_{Group1}', f'{Group1}_sub1', f'{Group1}_sub2'
combination_list, outfile, C1, C2 = all_combinations_ingroup2, f'Wasserstein_distance_ingroup_{Group2}', f'{Group2}_sub1', f'{Group2}_sub2'
# combination_list, outfile, C1, C2 = all_combinations_betweengroups, f'Wasserstein_distance_betweengroups_{Group1}_{Group2}', f'{Group1}', f'{Group2}'

print(dataset, path,  outfile, C1, C2)
outdf = {C1:[], C2:[], '1_wasserstein_distance':[], '2_wasserstein_distance':[], 'bottleneck_distance':[]}
 
t0 = time.time()
for sub1,sub2 in combination_list:
    indx1, indx2 = all_subs_dict[sub1], all_subs_dict[sub2]
    dgm1 = barcode1_list[indx1][sub1]
    dgm2 = barcode1_list[indx2][sub2]
    # print(sub1, sub2, indx1, indx2, len(dgm1), len(dgm2))
    wasserstein_distance1 = wasserstein_distance(dgm1, dgm2, order=1)
    wasserstein_distance2 = wasserstein_distance(dgm1, dgm2, order=2)
    bottleneck_distance = gd.bottleneck_distance(dgm1, dgm2)
    outdf[C1].append(sub1)
    outdf[C2].append(sub2)
    outdf['1_wasserstein_distance'].append(wasserstein_distance1)
    outdf['2_wasserstein_distance'].append(wasserstein_distance2)
    outdf['bottleneck_distance'].append(bottleneck_distance)
    # break

# pd.DataFrame(outdf).to_csv(path + f'{outfile}.txt', sep = '\t', index=None)
print('Done for ', outfile, time.time() -t0)

MPI_LEMON ../OutputFiles/AllCorr/MPI_LEMON/ Wasserstein_distance_ingroup_Elderly Elderly_sub1 Elderly_sub2


In [None]:
pd.DataFrame(outdf)
# outdf

### RSNs level

Calculates the 1-wasserstein, 2-wasserstein, and Bottleneck distances separately for intra-group and inter-group persistence diagrams

In [None]:
# combination_list, outfile, C1, C2 = all_combinations_ingroup1, f'Wasserstein_distance_ingroup_{Group1}', f'{Group1}_sub1', f'{Group1}_sub2'
combination_list, outfile, C1, C2 = all_combinations_ingroup2, f'Wasserstein_distance_ingroup_{Group2}', f'{Group2}_sub1', f'{Group2}_sub2'
# combination_list, outfile, C1, C2 = all_combinations_betweengroups, f'Wasserstein_distance_betweengroups_{Group1}_{Group2}', f'{Group1}', f'{Group2}'

print(dataset, outfile, C1, C2)
t0 = time.time()
for RSN in RSNs7:
    rsn = RSN.replace(' ','')
    path = f'../OutputFiles/PosCorr/{dataset}/Output_RSNs/{rsn}/'
    with open(path +f'{rsn}_barcode1_list.pkl','rb') as f:
        barcode1_list = pkl.load(f)
        
    outdf = {C1:[], C2:[], '1_wasserstein_distance':[], '2_wasserstein_distance':[], 'bottleneck_distance':[]}
    t1 = time.time()
    for sub1,sub2 in combination_list:
        indx1, indx2 = all_subs_dict[sub1], all_subs_dict[sub2]
        dgm1 = barcode1_list[indx1][sub1]
        dgm2 = barcode1_list[indx2][sub2]
        wasserstein_distance1 = wasserstein_distance(dgm1, dgm2, order=1)
        wasserstein_distance2 = wasserstein_distance(dgm1, dgm2, order=2)
        bottleneck_distance = gd.bottleneck_distance(dgm1, dgm2)
        outdf[C1].append(sub1)
        outdf[C2].append(sub2)
        outdf['1_wasserstein_distance'].append(wasserstein_distance1)
        outdf['2_wasserstein_distance'].append(wasserstein_distance2)
        outdf['bottleneck_distance'].append(bottleneck_distance)
        # break

    pd.DataFrame(outdf).to_csv(path + f'{outfile}.txt', sep = '\t', index=None)
    print('Done for ', rsn, time.time() -t1)
    
print('Done for ', outfile, time.time() -t0)

MPI_LEMON Wasserstein_distance_ingroup_Young Young_sub1 Young_sub2
