# Imports

In [1]:
import os
import pandas as pd
import numpy as np
import scipy
import pickle
import sys

## Helper Functions

In [2]:
%store -r loadData_hf
sys.path.insert(0, loadData_hf)
import LoadNetworkDataByID as lndbi

# Set Directory Paths

In [3]:
# Location of the data folder
%store -r dataDir

# Directory path where Data will be saved to
%store -r path_dataDir

# Directory path where Data will be saved to
%store -r thick_dataDir

# [1] Loading Raw Thickness Data

In [4]:
# MRI Thickness value for All Subjects - schaefer400x7
thicknessAllraw = pd.read_csv(os.path.join(dataDir, 'NewFTDData', 
                                           'invivoPathCohort_quantsSubSesSchaefer400_tian12.csv'), dtype={'id': str})

### Loading Look Up Table for Type of MRI Thickness Subjects

In [5]:
thicknessPathLUT = pd.read_excel(os.path.join(dataDir, 'NewFTDData', 'InvivoPathCohort_03172023.xls'), 
                                 dtype={'INDDID': str})

### Join the Above two dataframes on INDDID (Keep only the ones that INDDID are overlapping)

In [6]:
thicknessAll = pd.merge(thicknessAllraw, thicknessPathLUT, left_on='id', right_on='INDDID', how='inner') 

# We only lose INDDID 108783x09 in the thicknessAllraw (849 rows lost)

### Group by path type

In [25]:
thicknessAllraw

Unnamed: 0,id,date,system,label,name,measure,metric,value
0,100387,20081117x1323,AntsBrain,1,Brain,volume,numeric,1.747789e+06
1,100387,20081117x1323,Schaefer400x7v1,1,7Networks_LH_Vis_1,volume,numeric,1.967430e+03
2,100387,20081117x1323,Schaefer400x7v1,2,7Networks_LH_Vis_2,volume,numeric,1.935005e+03
3,100387,20081117x1323,Schaefer400x7v1,3,7Networks_LH_Vis_3,volume,numeric,1.082420e+03
4,100387,20081117x1323,Schaefer400x7v1,4,7Networks_LH_Vis_4,volume,numeric,1.568794e+03
...,...,...,...,...,...,...,...,...
94234,125098,20190923x1402,Tian_Subcortex_S2_3T,28,aGP-lh,volume,numeric,2.677760e+02
94235,125098,20190923x1402,Tian_Subcortex_S2_3T,29,aPUT-lh,volume,numeric,1.287680e+03
94236,125098,20190923x1402,Tian_Subcortex_S2_3T,30,pPUT-lh,volume,numeric,1.457152e+03
94237,125098,20190923x1402,Tian_Subcortex_S2_3T,31,aCAU-lh,volume,numeric,7.485440e+02


In [26]:
thicknessPathLUT

Unnamed: 0,INDDID,AgeAtMR,Sex,Education,MRScanDate,ScanID,Group
0,101045,54,Male,20,15-Mar-2007,1.2.124.113532.170.212.240.250.20070314.155101...,tdp
1,101483,66,Male,16,29-Nov-2011,1.2.840.113745.101000.1002000.40873.5892.31971737,tau
2,104093,58,Male,22,28-Jul-2011,1.2.840.113745.101000.1002000.40749.5887.30684264,tau
3,104094,51,Female,12,14-May-2007,1.2.840.113745.101000.1002000.39204.5416.22484724,tdp
4,104190,63,Male,18,08-Aug-2017,1.2.840.114350.2.87.2.798268.2.183265757.1,tau
...,...,...,...,...,...,...,...
109,119949,62,Male,22,'20150428x0858',,HC
110,101080,55,Female,16,'20100310x1251',,HC
111,102967,71,Female,16,'20150311x0850',,HC
112,113326,54,Male,21,'20040602x1534',,HC


In [24]:
thicknessAll

Unnamed: 0,id,date,system,label,name,measure,metric,value,INDDID,AgeAtMR,Sex,Education,MRScanDate,ScanID,Group
0,100387,20081117x1323,AntsBrain,1,Brain,volume,numeric,1.747789e+06,100387,74,Male,20,'20090818x1211',,HC
1,100387,20081117x1323,Schaefer400x7v1,1,7Networks_LH_Vis_1,volume,numeric,1.967430e+03,100387,74,Male,20,'20090818x1211',,HC
2,100387,20081117x1323,Schaefer400x7v1,2,7Networks_LH_Vis_2,volume,numeric,1.935005e+03,100387,74,Male,20,'20090818x1211',,HC
3,100387,20081117x1323,Schaefer400x7v1,3,7Networks_LH_Vis_3,volume,numeric,1.082420e+03,100387,74,Male,20,'20090818x1211',,HC
4,100387,20081117x1323,Schaefer400x7v1,4,7Networks_LH_Vis_4,volume,numeric,1.568794e+03,100387,74,Male,20,'20090818x1211',,HC
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
93385,125098,20190923x1402,Tian_Subcortex_S2_3T,28,aGP-lh,volume,numeric,2.677760e+02,125098,62,Female,13,23-Sep-2019,1.3.12.2.1107.5.2.43.167024.300000190923120503...,tdp
93386,125098,20190923x1402,Tian_Subcortex_S2_3T,29,aPUT-lh,volume,numeric,1.287680e+03,125098,62,Female,13,23-Sep-2019,1.3.12.2.1107.5.2.43.167024.300000190923120503...,tdp
93387,125098,20190923x1402,Tian_Subcortex_S2_3T,30,pPUT-lh,volume,numeric,1.457152e+03,125098,62,Female,13,23-Sep-2019,1.3.12.2.1107.5.2.43.167024.300000190923120503...,tdp
93388,125098,20190923x1402,Tian_Subcortex_S2_3T,31,aCAU-lh,volume,numeric,7.485440e+02,125098,62,Female,13,23-Sep-2019,1.3.12.2.1107.5.2.43.167024.300000190923120503...,tdp


In [7]:
thickness_path_type = thicknessAll.groupby('Group')

# MRI Thickness values for Healthy Control
thicknessHC = thickness_path_type.get_group('HC')
# MRI Thickness values for Patient (TAU)
thicknessPatientTAU = thickness_path_type.get_group('tau')
# MRI Thickness values for Patient (TDP)
thicknessPatientTDP = thickness_path_type.get_group('tdp')

In [8]:
# IDs
HC_IDs = np.unique(thicknessHC.INDDID)
TAU_IDs = np.unique(thicknessPatientTAU.INDDID)
TDP_IDs = np.unique(thicknessPatientTDP.INDDID)

# Get only that Matches Pathology

In [9]:
thick_id_TAU = TAU_IDs.astype(np.float64)
thick_id_TDP = TDP_IDs.astype(np.float64)

In [10]:
# Loading path_id_TAU
with open(os.path.join(path_dataDir, 'path_id_TAU.pkl'), 'rb') as f:
    path_id_TAU = pickle.load(f)
f.close()
    
# Loading path_id_TDP
with open(os.path.join(path_dataDir, 'path_id_TDP.pkl'), 'rb') as f:
    path_id_TDP = pickle.load(f)
f.close()

In [11]:
mr_path_tau = np.isin(thick_id_TAU, path_id_TAU)
print(f"Number of overlap (Tau - Path vs MR): {mr_path_tau.sum()}/{len(thick_id_TAU)}")

Number of overlap (Tau - Path vs MR): 13/26


In [12]:
mr_path_tdp = np.isin(thick_id_TDP, path_id_TDP)
print(f"Number of overlap (TDP - Path vs MR): {mr_path_tdp.sum()}/{len(thick_id_TDP)}")

Number of overlap (TDP - Path vs MR): 21/30


In [13]:
mr_path_tau_id = TAU_IDs[mr_path_tau]
mr_path_tdp_id = TDP_IDs[mr_path_tdp]

In [14]:
mr_path_tdp_id.dtype

dtype('O')

## TAU

In [15]:
thicknessPatientTAU_Path = thicknessPatientTAU[thicknessPatientTAU['INDDID'].isin(mr_path_tau_id)]

## TDP

In [16]:
thicknessPatientTDP_Path = thicknessPatientTDP[thicknessPatientTDP['INDDID'].isin(mr_path_tdp_id)]

# [2] Formatting Thickness Data

### numLab (Number of Label in Schaefer400x7 Atlas)

In [17]:
numLab=400

### Thickness data for HC, TAU, TDP

In [18]:
# Get thickness mean and volume total values for [Control MRI data IDs (54) x lables (numLab)] 26 x 400
HCResults = lndbi.LoadNetworkDataByID(HC_IDs, thicknessHC,'Schaefer400x7v1', numLab = 400)

# Get thickness mean and volume total values for 
# [Patient (TAU) MRI data IDs (26) x lables (numLab = 400 regions in the sch region)] / 26 x 400
PatientTAUResults = lndbi.LoadNetworkDataByID(TAU_IDs, thicknessPatientTAU,'Schaefer400x7v1', 
                                        numLab = 400)

# Get thickness mean and volume total values for 
# [Patient (TDP) MRI data IDs (30) x lables (numLab = 400 regions in the sch region)] / 26 x 400
PatientTDPResults = lndbi.LoadNetworkDataByID(TDP_IDs, thicknessPatientTDP,'Schaefer400x7v1', 
                                        numLab = 400)

1
100115
2
100338


  AllResults['Thickness']['Mean'][i, L] = currThickness[currLabelsThick == L+1]
  AllResults['Volume']['Total'][i, L] = currVolume[currLabelsVol == L+1]


3
100387
4
100488
5
100742
6
101080
7
102187
8
102429
9
102967
10
102974
11
103023
12
103073
13
103118
14
103824
15
104102
16
105521
17
105601
18
106036
19
106135
20
106732
21
106897
22
107045
23
107700
24
108014
25
108560
26
108853
27
108856
28
109075
29
109194
30
109212
31
109325
32
109346
33
109417
34
109481
35
110342
36
111151
37
111611
38
112359
39
112916
40
113326
41
115226
42
115311
43
115629
44
116543
45
117957
46
117964
47
118177
48
118752
49
118885
50
119158
51
119535
52
119949
53
120506
54
124747
1
101483
2
104093
3
104190
4
105564
5
106309
6
106814
7
107516
8
107677
9
108026
10
109300
11
113113
12
115001
13
116591
14
116607
15
118064
16
118148
17
118410
18
118780
19
119089
20
120594
21
122104
22
122375
23
122376
24
122419
25
122601
26
123352
1
101045
2
104094
3
104862
4
105223
5
108276
6
108344
7
108790
8
109050
9
109073
10
109476
11
112273
12
112780
13
112974
14
114753
15
115264
16
117589
17
117637
18
118234
19
118430
20
118914
21
119349
22
119454
23
119502
24
120141
25
12

In [34]:
group_type = PatientTDPResults

age_list = group_type['Age']
# Calculate the minimum value
mean_value = np.mean(age_list)
# Calculate the standard deviation
std_dev = np.std(age_list)
# Print the results
print(f"Mean Value: {mean_value}")
print(f"Standard Deviation: {std_dev}")

sex_list = group_type['Sex']
# Using numpy.unique to get unique elements and their counts
unique_elements, counts = np.unique(sex_list, return_counts=True)
# Creating a counter dictionary from unique elements and counts
counter_dict = dict(zip(unique_elements, counts))
print(counter_dict)

ICV_list = group_type['Volume']['ICV']
# Calculate the minimum value
mean_value = np.mean(ICV_list)
# Calculate the standard deviation
std_dev = np.std(ICV_list)
# Print the results
print(f"Mean Value ICV: {mean_value}")
print(f"Standard Deviation ICV: {std_dev}")

Mean Value: 61.266666666666666
Standard Deviation: 8.621420352174512
{'Female': 12, 'Male': 18}
Mean Value ICV: 1505748.6774
Standard Deviation ICV: 136533.30870218764


## Matching Path INDDIDs

In [19]:
PatientTAUResults_Path = lndbi.LoadNetworkDataByID(mr_path_tau_id, thicknessPatientTAU_Path,'Schaefer400x7v1', 
                                        numLab = 400)
PatientTDPResults_Path = lndbi.LoadNetworkDataByID(mr_path_tdp_id, thicknessPatientTDP_Path,'Schaefer400x7v1', 
                                        numLab = 400)

1
101483
2
105564
3
106309
4
106814
5
107516
6
107677
7
108026
8
113113
9
115001
10
116591
11
116607
12
118410
13
118780
1
101045
2
104094
3
104862
4
108276
5
108344
6
108790
7
109050
8
109073
9
109476
10
112273
11
112780
12
112974
13
114753
14
117589
15
117637
16
118234
17
118430
18
118914
19
119454
20
120950
21
121199


# Save the Dataset and Variables

#### Save HCResults, PatientTAUResults, and PatientTDPResults

In [20]:
# Save HCResults
with open(os.path.join(thick_dataDir, 'HCResults.pkl'), 'wb') as f:
    pickle.dump(HCResults, f)
f.close()

# Save PatientTAUResults
with open(os.path.join(thick_dataDir, 'PatientTAUResults.pkl'), 'wb') as f:
    pickle.dump(PatientTAUResults, f)
f.close()

# Save PatientTDPResults
with open(os.path.join(thick_dataDir, 'PatientTDPResults.pkl'), 'wb') as f:
    pickle.dump(PatientTDPResults, f)
f.close()

# Save PatientTAUResults_Path
with open(os.path.join(thick_dataDir, 'PatientTAUResults_Path.pkl'), 'wb') as f:
    pickle.dump(PatientTAUResults_Path, f)
f.close()

# Save PatientTDPResults_Path
with open(os.path.join(thick_dataDir, 'PatientTDPResults_Path.pkl'), 'wb') as f:
    pickle.dump(PatientTDPResults_Path, f)
f.close()

# Data Summary

In [21]:
print("Length of HC IDs in Thickness: " + str(len(HC_IDs)))
print("Length of TAU IDs in Thickness: " + str(len(TAU_IDs)))
print("Length of TDP IDs in Thickness: " + str(len(TDP_IDs)))

print("Length of TAU IDs in Thickness (Matching path INDDID): " + str(len(mr_path_tau_id)))
print("Length of TDP IDs in Thickness (Matching path INDDID): " + str(len(mr_path_tdp_id)))

Length of HC IDs in Thickness: 54
Length of TAU IDs in Thickness: 26
Length of TDP IDs in Thickness: 30
Length of TAU IDs in Thickness (Matching path INDDID): 13
Length of TDP IDs in Thickness (Matching path INDDID): 21
