In [1]:
import pandas as pd
import gudhi as gd
import gudhi.representations
import os, re, time
import numpy as np
import pickle as pkl

# Data

In [3]:
RSNs_details = pd.read_csv('../Data/SchaeferAtlas_Regions_details.csv')
RSNs7 = RSNs_details['RSN'].unique().tolist()

## Stores the node_ID for each RSNs
result_dict = {key: RSNs_details.loc[RSNs_details['RSN'] == key, 'Node_number'].tolist() for key in RSNs7}

print(len(result_dict),[ len(x) for x in result_dict.values()],RSNs7)

7 [29, 35, 26, 22, 12, 30, 46] ['Visual', 'Somato Motor', 'Dorsal Attention', 'Salient Ventral Attention', 'Limbic', 'Control', 'Default']


In [None]:
# dataset = 'MPI_LEMON'
dataset = 'ABIDE'
path_file = f'../Data/{dataset}/FCM_DistMat/'

files_list = os.listdir(path_file)
print(len(files_list))

In [4]:
if dataset == 'MPI_LEMON':
    Detailsfile = pd.read_csv('../Data/MPI_LEMON/MPILemon_Subject_details.csv')
    Young = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'Young','Subject'])))
    Elder = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'Elderly','Subject'])))
    All_subs = Young + Elder
    Group1, Group2 = 'Young', 'Elderly'
    GR1, GR2 = Young, Elder

elif dataset == 'ABIDE':
    Detailsfile = pd.read_csv('../Data/ABIDE/ABIDE_Subject_details.csv')
    ASD = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'ASD','Subject identifier'])))
    Healthy = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'HC','Subject identifier'])))
    All_subs = ASD + Healthy
    Group1, Group2 = 'ASD', 'Healthy'
    GR1, GR2 = ASD, Healthy

print(Group1, len(GR1), '\t', Group2, len(GR2), '\tTotal:', len(All_subs))
print(GR1[0], GR2[0])

ASD 395 	 Healthy 425 	Total: 820
50601 50551


# Persistent Homology Computation

In [6]:
t0 = time.time()

for RSN in RSNs7:
    t1 = time.time()
    dgms_list = list()
    barcode0_list = list()
    barcode1_list = list()
    barcode2_list = list()
    indices = result_dict[RSN]
    print(len(indices), indices)
    for i in range(len(files_list)):
        t2 = time.time()
        SubID = files_list[i].split('.')[0].split('_')[-1]
        DisMat = pd.read_csv(path_file + files_list[i], header = None, sep = ',').values
        thres = np.sqrt(2)
        # Extract specific rows and columns
        DisMat_rsn = DisMat[np.ix_(indices, indices)]
        DisMat_rsn[DisMat_rsn > np.sqrt(2)] = thres
        # print(DisMat_rsn.shape)
        skeleton = gd.RipsComplex(distance_matrix = DisMat_rsn, max_edge_length=thres, sparse=None)
        Rips_simplex_tree = skeleton.create_simplex_tree(max_dimension=3)
        BarCodes_Rips = Rips_simplex_tree.persistence()
        dgms_list.append({SubID: np.array([list(bars[1]) for bars in BarCodes_Rips])})
        barcode0_list.append({SubID: Rips_simplex_tree.persistence_intervals_in_dimension(0)})
        barcode1_list.append({SubID: Rips_simplex_tree.persistence_intervals_in_dimension(1)})
        barcode2_list.append({SubID: Rips_simplex_tree.persistence_intervals_in_dimension(2)})
        # break


    # Stores the barcodes separately for each RSNs
    rsn = RSN.replace(' ','')
    outpath = f'../OutputFiles/PosCorr/{dataset}/Output_RSNs/{rsn}/'
    os.makedirs(outpath, exist_ok=True)
    with open(outpath +f'{rsn}_dgms_list.pkl','wb') as f:
        pkl.dump(dgms_list, f)    
    with open(outpath +f'{rsn}_barcode0_list.pkl','wb') as f:
        pkl.dump(barcode0_list, f)
    with open(outpath +f'{rsn}_barcode1_list.pkl','wb') as f:
        pkl.dump(barcode1_list, f)
    with open(outpath +f'{rsn}_barcode2_list.pkl','wb') as f:
        pkl.dump(barcode2_list, f)
    print('Done for ',RSN,f'for {i+1} subjects. ',time.time() - t1, '-'*50, '\n')

print('DONE', dataset, time.time() - t0)

29 [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114]
Done for  Visual for 820 subjects.  65.89000988006592 -------------------------------------------------- 

35 [14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133]
Done for  Somato Motor for 820 subjects.  85.0373547077179 -------------------------------------------------- 

26 [30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146]
Done for  Dorsal Attention for 820 subjects.  50.07950568199158 -------------------------------------------------- 

22 [43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157]
Done for  Salient Ventral Attention for 820 subjects.  42.59439420700073 -------------------------------------------------- 

12 [54, 55, 56, 57, 58, 59

In [7]:
for RSN in RSNs7:
    rsn = RSN.replace(' ','')
    outpath = f'../OutputFiles/PosCorr/{dataset}/Output_RSNs/{rsn}/'
    with open(outpath +f'{rsn}_dgms_list.pkl','rb') as f:
        dgms_list = pkl.load(f)
    with open(outpath +f'{rsn}_barcode0_list.pkl','rb') as f:
        barcode0_list = pkl.load(f)
    with open(outpath +f'{rsn}_barcode1_list.pkl','rb') as f:
        barcode1_list = pkl.load(f)
    with open(outpath +f'{rsn}_barcode2_list.pkl','rb') as f:
        barcode2_list = pkl.load(f)

    print('Opened input files for   ', RSN)
    print(len(dgms_list), len(barcode0_list), len(barcode1_list), len(barcode2_list))
    Group1_dgms_list, Group2_dgms_list = list(), list()
    Group1_barcode0_list, Group2_barcode0_list = list(), list()
    Group1_barcode1_list, Group2_barcode1_list = list(), list()
    Group1_barcode2_list, Group2_barcode2_list = list(), list()
    G1_ID_list, G2_ID_list = list(), list()
    
    for i in range(len(files_list)):
        t1 = time.time()
        SubID = files_list[i].split('.')[0].split('_')[-1]
        # print(i,SubID,type(SubID))
        if SubID in GR1:
            Group1_dgms_list.append(dgms_list[i][SubID])
            Group1_barcode0_list.append(barcode0_list[i][SubID])
            Group1_barcode1_list.append(barcode1_list[i][SubID])
            Group1_barcode2_list.append(barcode2_list[i][SubID])
            G1_ID_list.append(SubID)
            #print(f'Done for {Group1} ',i,SubID,time.time() - t1)
        elif SubID in GR2:
            Group2_dgms_list.append(dgms_list[i][SubID])
            Group2_barcode0_list.append(barcode0_list[i][SubID])
            Group2_barcode1_list.append(barcode1_list[i][SubID])
            Group2_barcode2_list.append(barcode2_list[i][SubID])
            G2_ID_list.append(SubID)
    
    with open(outpath +f'{rsn}_{Group1}_dgms_list.pkl','wb') as f:
        pkl.dump(Group1_dgms_list, f)
    with open(outpath +f'{rsn}_{Group1}_barcode0_list.pkl','wb') as f:
        pkl.dump(Group1_barcode0_list, f)
    with open(outpath +f'{rsn}_{Group1}_barcode1_list.pkl','wb') as f:
        pkl.dump(Group1_barcode1_list, f)
    with open(outpath +f'{rsn}_{Group1}_barcode2_list.pkl','wb') as f:
        pkl.dump(Group1_barcode2_list, f)
    
    with open(outpath +f'{rsn}_{Group2}_dgms_list.pkl','wb') as f:
        pkl.dump(Group2_dgms_list, f)
    with open(outpath +f'{rsn}_{Group2}_barcode0_list.pkl','wb') as f:
        pkl.dump(Group2_barcode0_list, f)
    with open(outpath +f'{rsn}_{Group2}_barcode1_list.pkl','wb') as f:
        pkl.dump(Group2_barcode1_list, f)
    with open(outpath +f'{rsn}_{Group2}_barcode2_list.pkl','wb') as f:
        pkl.dump(Group2_barcode2_list, f)
    print('Done for  ', RSN)
    # break
print('DONE')
print(Group1, len(Group1_barcode0_list), '\t', Group2, len(Group2_barcode0_list))

Opened input files for    Visual
820 820 820 820
Done for   Visual
Opened input files for    Somato Motor
820 820 820 820
Done for   Somato Motor
Opened input files for    Dorsal Attention
820 820 820 820
Done for   Dorsal Attention
Opened input files for    Salient Ventral Attention
820 820 820 820
Done for   Salient Ventral Attention
Opened input files for    Limbic
820 820 820 820
Done for   Limbic
Opened input files for    Control
820 820 820 820
Done for   Control
Opened input files for    Default
820 820 820 820
Done for   Default
DONE
ASD 395 	 Healthy 425


## Persistence Landscape and Persistent Entropy

In [None]:
def replace_infinity_by_thres(barcode):
    for bars in barcode:
        for bar in bars:
            if bar[1] == np.inf:
                bar[1]=np.sqrt(2)
    return barcode

In [8]:
t1 = time.time()
for RSN in RSNs7:
    rsn = RSN.replace(' ','')
    outpath = f'../OutputFiles/PosCorr/{dataset}/Output_RSNs/{rsn}/'
    outpath = f'../Outputs_RSN/Positive_weight/{dataset}/Barcodes_GlobalMeasures/{rsn}/'
    for grp in [Group1, Group2]:
        with open(outpath +f'{rsn}_{grp}_dgms_list.pkl','rb') as f:
            dgms_list = pkl.load(f)
        with open(outpath +f'{rsn}_{grp}_barcode1_list.pkl','rb') as f:
            barcode1_list = pkl.load(f)

        ## Persistence Landscape
        barcode1_list_re2 = replace_infinity_by_thres(barcode1_list)
        LS = gd.representations.Landscape(num_landscapes=1)
        LS_fit = LS.fit_transform(barcode1_list_re2)
        
        L1_norm = np.linalg.norm(LS_fit, 1, axis=1)
        L2_norm = np.linalg.norm(LS_fit, axis=1)
        
        with open(outpath +f'{rsn}_{grp}_L1_norm_1dim.pkl','wb') as f:
            pkl.dump(L1_norm, f)
        with open(outpath +f'{rsn}_{grp}_L2_norm_1dim.pkl','wb') as f:
            pkl.dump(L2_norm, f)

        ## Persistent Entropy
        dgms_list_re2 = replace_infinity_by_thres(dgms_list)
        PE = gd.representations.Entropy()
        pe_dim = PE.fit_transform(dgms_list_re2)
        
        with open(outpath +f'{rsn}_{grp}_persistence_entropy.pkl','wb') as f:
            pkl.dump(pe_dim, f)
    
        ## Store the Global data
        Global_data = pd.DataFrame()
        Global_data['SubID'] =  G1_ID_list if grp == Group1 else G2_ID_list
        Global_data['L1_norm'] = L1_norm
        Global_data['L2_norm'] = L2_norm
        Global_data['pe_dim'] = pe_dim 
        
        Global_data.to_csv(outpath + f'{rsn}_{grp}_L1L2PE.txt', sep = '\t', index=False)
        print('Done ', grp, '\t', Global_data.shape)
    print('Done for', rsn, '-'*80)
    # break
print('Done', time.time() - t1)

Done  ASD 	 (395, 4)
Done  Healthy 	 (425, 4)
Done for Visual --------------------------------------------------------------------------------
Done  ASD 	 (395, 4)
Done  Healthy 	 (425, 4)
Done for SomatoMotor --------------------------------------------------------------------------------
Done  ASD 	 (395, 4)
Done  Healthy 	 (425, 4)
Done for DorsalAttention --------------------------------------------------------------------------------
Done  ASD 	 (395, 4)
Done  Healthy 	 (425, 4)
Done for SalientVentralAttention --------------------------------------------------------------------------------
Done  ASD 	 (395, 4)
Done  Healthy 	 (425, 4)
Done for Limbic --------------------------------------------------------------------------------
Done  ASD 	 (395, 4)
Done  Healthy 	 (425, 4)
Done for Control --------------------------------------------------------------------------------
Done  ASD 	 (395, 4)
Done  Healthy 	 (425, 4)
Done for Default -----------------------------------------------