In [1]:
import pandas as pd
import gudhi as gd
import gudhi.representations
import os, re, time
import numpy as np
import pickle as pkl

# Data

In [2]:
dataset = 'MPI_LEMON'
# dataset = 'ABIDE'

# CorrType = 'PosCorr'
CorrType = 'AllCorr'

In [3]:
if dataset == 'MPI_LEMON':
    Detailsfile = pd.read_csv('../Data/MPI_LEMON/MPILemon_Subject_details.csv')
    Young = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'Young','Subject'])))
    Elder = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'Elderly','Subject'])))
    All_subs = Young + Elder
    Group1, Group2 = 'Young', 'Elderly'
    GR1, GR2 = Young, Elder

elif dataset == 'ABIDE':
    Detailsfile = pd.read_csv('../Data/ABIDE/ABIDE_Subject_details.csv')
    ASD = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'ASD','Subject identifier'])))
    Healthy = list(map(str, list(Detailsfile.loc[Detailsfile['Cohort'] == 'HC','Subject identifier'])))
    All_subs = ASD + Healthy
    Group1, Group2 = 'ASD', 'TD'
    GR1, GR2 = ASD, Healthy

print(Group1, len(GR1), '\t', Group2, len(GR2), '\tTotal:', len(All_subs))
print(GR1[0], GR2[0])

Young 153 	 Elderly 72 	Total: 225
32302 32301


In [None]:
path_file = f'../Data/{dataset}/FCM_DistMat/' 
files_list = os.listdir(path_file)
len(files_list)

# Persistent Homology Computation

In [None]:
dgms_list = list()
barcode0_list = list()
barcode1_list = list()
barcode2_list = list()
t0 = time.time()

if CorrType == 'AllCorr':
    thres = 2
elif CorrType == 'PosCorr':
    thres = np.sqrt(2)

for i in range(len(files_list)):
    t1 = time.time()
    SubID = files_list[i].split('.')[0].split('_')[-1]
    #print(i,SubID)
    DisMat = pd.read_csv(path_file + files_list[i], header = None, sep = ',').values
    skeleton = gd.RipsComplex(distance_matrix = DisMat, max_edge_length=thres, sparse=None)
    Rips_simplex_tree = skeleton.create_simplex_tree(max_dimension=3)
    BarCodes_Rips = Rips_simplex_tree.persistence()
    dgms_list.append({SubID: np.array([list(bars[1]) for bars in BarCodes_Rips])})
    barcode0_list.append({SubID: Rips_simplex_tree.persistence_intervals_in_dimension(0)})
    barcode1_list.append({SubID: Rips_simplex_tree.persistence_intervals_in_dimension(1)})
    barcode2_list.append({SubID: Rips_simplex_tree.persistence_intervals_in_dimension(2)})
    print('Done for ',i,SubID,'  Number of 1Dholes = ', len(barcode1_list[i][SubID]), '  ',time.time() - t1)
    # break
print('Done for', dataset, CorrType, thres)
print(time.time() - t0)

### Stores the barcodes

In [6]:
outpath = f'../OutputFiles/{CorrType}/{dataset}/'
os.makedirs(outpath, exist_ok=True)
with open(outpath +'dgms_list.pkl','wb') as f:
    pkl.dump(dgms_list, f)
    
with open(outpath +'barcode0_list.pkl','wb') as f:
    pkl.dump(barcode0_list, f)
    
with open(outpath +'barcode1_list.pkl','wb') as f:
    pkl.dump(barcode1_list, f)
    
with open(outpath +'barcode2_list.pkl','wb') as f:
    pkl.dump(barcode2_list, f)
print('Done')

Done


In [4]:
outpath = f'../OutputFiles/{CorrType}/{dataset}/'
with open(outpath +'dgms_list.pkl','rb') as f:
    dgms_list = pkl.load(f)
    
with open(outpath +'barcode0_list.pkl','rb') as f:
    barcode0_list = pkl.load(f)
    
with open(outpath +'barcode1_list.pkl','rb') as f:
    barcode1_list = pkl.load(f)
    
with open(outpath +'barcode2_list.pkl','rb') as f:
    barcode2_list = pkl.load(f)

print('Opened input files')

Opened input files


In [5]:
len(dgms_list), len(barcode0_list), len(barcode1_list), len(barcode2_list)
# len(barcode2_list[4]['32305'])

(225, 225, 225, 225)

In [None]:
Group1_dgms_list, Group2_dgms_list = list(), list()
Group1_barcode0_list, Group2_barcode0_list = list(), list()
Group1_barcode1_list, Group2_barcode1_list = list(), list()
Group1_barcode2_list, Group2_barcode2_list = list(), list()
G1_ID_list, G2_ID_list = list(), list()

for i in range(len(files_list)):
    t1 = time.time()
    SubID = files_list[i].split('.')[0].split('_')[-1]
    # print(i,SubID,type(SubID))
    if SubID in GR1:
        Group1_dgms_list.append(dgms_list[i][SubID])
        Group1_barcode0_list.append(barcode0_list[i][SubID])
        Group1_barcode1_list.append(barcode1_list[i][SubID])
        Group1_barcode2_list.append(barcode2_list[i][SubID])
        G1_ID_list.append(SubID)
        #print(f'Done for {Group1} ',i,SubID,time.time() - t1)
    elif SubID in GR2:
        Group2_dgms_list.append(dgms_list[i][SubID])
        Group2_barcode0_list.append(barcode0_list[i][SubID])
        Group2_barcode1_list.append(barcode1_list[i][SubID])
        Group2_barcode2_list.append(barcode2_list[i][SubID])
        G2_ID_list.append(SubID)
        #print(f'Done for {Group2}',i,SubID,time.time() - t1)
print('DONE')
print(Group1, len(Group1_barcode0_list), '\t', Group2, len(Group2_barcode0_list))

In [23]:
with open(outpath +f'{Group1}_dgms_list.pkl','wb') as f:
    pkl.dump(Group1_dgms_list, f)
    
with open(outpath +f'{Group1}_barcode0_list.pkl','wb') as f:
    pkl.dump(Group1_barcode0_list, f)
    
with open(outpath +f'{Group1}_barcode1_list.pkl','wb') as f:
    pkl.dump(Group1_barcode1_list, f)
    
with open(outpath +f'{Group1}_barcode2_list.pkl','wb') as f:
    pkl.dump(Group1_barcode2_list, f)

with open(outpath +f'{Group2}_dgms_list.pkl','wb') as f:
    pkl.dump(Group2_dgms_list, f)
    
with open(outpath +f'{Group2}_barcode0_list.pkl','wb') as f:
    pkl.dump(Group2_barcode0_list, f)
    
with open(outpath +f'{Group2}_barcode1_list.pkl','wb') as f:
    pkl.dump(Group2_barcode1_list, f)
    
with open(outpath +f'{Group2}_barcode2_list.pkl','wb') as f:
    pkl.dump(Group2_barcode2_list, f)

print('Done')

Done


# Persistence Landscape

In [11]:
LS = gd.representations.Landscape(num_landscapes=1)
Group1_LS = LS.fit_transform(Group1_barcode1_list)
Group2_LS = LS.fit_transform(Group2_barcode1_list)

Group1_L1_norm = np.linalg.norm(Group1_LS, 1, axis=1)
Group1_L2_norm = np.linalg.norm(Group1_LS, axis=1)
Group2_L1_norm = np.linalg.norm(Group2_LS, 1, axis=1)
Group2_L2_norm = np.linalg.norm(Group2_LS, axis=1)

# Persistence Entropy

In [12]:
def replace_infinity_by_thres(barcode, thres):
    for bars in barcode:
        for bar in bars:
            if bar[1] == np.inf:
                bar[1] = thres
    return barcode

In [13]:
if CorrType == 'AllCorr':
    thres = 2
elif CorrType == 'PosCorr':
    thres = np.sqrt(2)

Group1_dgms_list = replace_infinity_by_thres(Group1_dgms_list, thres)
Group2_dgms_list = replace_infinity_by_thres(Group2_dgms_list, thres)

PE = gd.representations.Entropy()
Group1_pe_dim = PE.fit_transform(Group1_dgms_list)
Group2_pe_dim = PE.fit_transform(Group2_dgms_list)

In [None]:
Group1_Global_data, Group2_Global_data = pd.DataFrame(), pd.DataFrame()

Group1_Global_data['SubID'] = G1_ID_list
Group1_Global_data['L1_norm'] = Group1_L1_norm
Group1_Global_data['L2_norm'] = Group1_L2_norm
Group1_Global_data['pe_dim'] = Group1_pe_dim 

Group2_Global_data['SubID'] = G2_ID_list
Group2_Global_data['L1_norm'] = Group2_L1_norm
Group2_Global_data['L2_norm'] = Group2_L2_norm
Group2_Global_data['pe_dim'] = Group2_pe_dim

print(Group1,'\n', Group1_Global_data[['L1_norm', 'L2_norm', 'pe_dim']].mean())
print(Group2,'\n', Group2_Global_data[['L1_norm', 'L2_norm', 'pe_dim']].mean())
# Group1_Global_data.to_csv(outpath + f'{Group1}_L1L2PE.txt', sep = '\t', index=False)
# Group2_Global_data.to_csv(outpath + f'{Group2}_L1L2PE.txt', sep = '\t', index=False)