In [1]:
import pandas as pd
import nibabel as nib
import os
import numpy as np
import matplotlib.pyplot as plt
from nilearn import image
import ants
from datetime import datetime, timedelta
import pydicom as dicom
import pickle
import matplotlib.image as mpimg
from collections import defaultdict, Counter
from scipy import stats
from glob import glob

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-0beo461u because the default path (/gpfs/home/lc3424/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [56]:
dataset_path_list = ['/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/', '/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/']
dataset_file_path = []

for dataset_path in dataset_path_list:
    temp = glob(dataset_path + '*/*/t1/spm/segmentation/normalized_space/*')
    temp = [f for f in temp if 'Space_T1w.nii.gz' in f]
    files = []

    for f in temp:
        res = f.split('/')
        if '_' in res[10]:
            print(f)
        files.append((res[9].split('-')[1], res[10].split('-')[1], f))  # (sub, ses, file_path)

    dataset_file_path = dataset_file_path + files


In [55]:
file_df = pd.DataFrame(dataset_file_path, columns=['Subject', 'Session', 'Path'])
file_df.loc[file_df.Session.str.contains('_')]

Unnamed: 0,Subject,Session,Path


In [25]:
label_df = pd.read_csv('/gpfs/home/lc3424/capstone/2021_dementia/lc3424_workspace/experiments/20211102/label_20211102.tsv', sep='\t')
label_df.columns = ['Session', 'Subject', 'Label']
label_df

Unnamed: 0,Session,Subject,Label
0,100027089657,f37b1d2e3cec40ba88ec39be79577f65,1
1,100039817943,210909b9725245c5a09e052b931447f4,0
2,100086429574,ad32654d20f345b0bab70ffba08df770,1
3,100166373876,bcc51523e7014bdda6b5867de8e7a6a5,2
4,100197260038,d8200ab1eb464d72998f497732d921a3,1
...,...,...,...
3255,997728775571,d71ac2a05099410b82d8a070b59ec48d,0
3256,998819920173,bbc73dd2480442108e5307511a1b9b75,1
3257,999155686841,b3ddd61266c842a9b98597579f1054df,0
3258,999370949215,4813b0173c2544f3b0bd987300cb8021,2


In [30]:
temp_df = label_df.merge(file_df, on=['Subject', 'Session'], how='left')
temp_df

Unnamed: 0,Session,Subject,Label,Path
0,100027089657,f37b1d2e3cec40ba88ec39be79577f65,1,
1,100039817943,210909b9725245c5a09e052b931447f4,0,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-210909b9725245c5a09e052b931447f4/ses-100039817943/t1/spm/segmentation/normalized_space/sub-210909b9725245c5a09e052b931447f4_ses-100039817943_20120107-AX_MPR_RECON-13_space-Ixi549Space_T1w.nii.gz
2,100086429574,ad32654d20f345b0bab70ffba08df770,1,
3,100166373876,bcc51523e7014bdda6b5867de8e7a6a5,2,
4,100197260038,d8200ab1eb464d72998f497732d921a3,1,
...,...,...,...,...
4767,998819920173,bbc73dd2480442108e5307511a1b9b75,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-bbc73dd2480442108e5307511a1b9b75/ses-998819920173/t1/spm/segmentation/normalized_space/sub-bbc73dd2480442108e5307511a1b9b75_ses-998819920173_2_20080408-AX_MPR_1MM_POST-102_space-Ixi549Space_T1w.nii.gz
4768,999155686841,b3ddd61266c842a9b98597579f1054df,0,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-b3ddd61266c842a9b98597579f1054df/ses-999155686841/t1/spm/segmentation/normalized_space/sub-b3ddd61266c842a9b98597579f1054df_ses-999155686841_2_20130130-SAG_3D_T1___MS_P_MPR_Ax_mpr_recon-19_space-Ixi549Space_T1w.nii.gz
4769,999155686841,b3ddd61266c842a9b98597579f1054df,0,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-b3ddd61266c842a9b98597579f1054df/ses-999155686841/t1/spm/segmentation/normalized_space/sub-b3ddd61266c842a9b98597579f1054df_ses-999155686841_20130130-SAG_3D_T1___MS_P_MPR_Ax_mpr_recon-16_space-Ixi549Space_T1w.nii.gz
4770,999370949215,4813b0173c2544f3b0bd987300cb8021,2,


In [58]:
final_df = temp_df[~temp_df.isna().any(axis=1)]
final_df[final_df.Session == 999155686841]

Unnamed: 0,Session,Subject,Label,Path
4768,999155686841,b3ddd61266c842a9b98597579f1054df,0,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-b3ddd61266c842a9b98597579f1054df/ses-999155686841/t1/spm/segmentation/normalized_space/sub-b3ddd61266c842a9b98597579f1054df_ses-999155686841_2_20130130-SAG_3D_T1___MS_P_MPR_Ax_mpr_recon-19_space-Ixi549Space_T1w.nii.gz
4769,999155686841,b3ddd61266c842a9b98597579f1054df,0,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-b3ddd61266c842a9b98597579f1054df/ses-999155686841/t1/spm/segmentation/normalized_space/sub-b3ddd61266c842a9b98597579f1054df_ses-999155686841_20130130-SAG_3D_T1___MS_P_MPR_Ax_mpr_recon-16_space-Ixi549Space_T1w.nii.gz


In [59]:
final_df

Unnamed: 0,Session,Subject,Label,Path
1,100039817943,210909b9725245c5a09e052b931447f4,0,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-210909b9725245c5a09e052b931447f4/ses-100039817943/t1/spm/segmentation/normalized_space/sub-210909b9725245c5a09e052b931447f4_ses-100039817943_20120107-AX_MPR_RECON-13_space-Ixi549Space_T1w.nii.gz
5,100402864124,fc9d3132547b40d88b14b49f22059a7d,0,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-fc9d3132547b40d88b14b49f22059a7d/ses-100402864124/t1/spm/segmentation/normalized_space/sub-fc9d3132547b40d88b14b49f22059a7d_ses-100402864124_20111201-AX_3D_MPR-15_space-Ixi549Space_T1w.nii.gz
6,100402864124,fc9d3132547b40d88b14b49f22059a7d,0,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-fc9d3132547b40d88b14b49f22059a7d/ses-100402864124/t1/spm/segmentation/normalized_space/sub-fc9d3132547b40d88b14b49f22059a7d_ses-100402864124_2_20111201-AX_3D_MPR_MPR_3mm_sag_mpr-17_space-Ixi549Space_T1w.nii.gz
9,100561623079,19a7816500184206baae665fb54d9486,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-19a7816500184206baae665fb54d9486/ses-100561623079/t1/spm/segmentation/normalized_space/sub-19a7816500184206baae665fb54d9486_ses-100561623079_20111115-SAG_3D_MPR-10_space-Ixi549Space_T1w.nii.gz
11,100705385639,4c1833793b70449da1017a8eb7592b12,2,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-4c1833793b70449da1017a8eb7592b12/ses-100705385639/t1/spm/segmentation/normalized_space/sub-4c1833793b70449da1017a8eb7592b12_ses-100705385639_20100307-SAG_MPR_ISO-3_space-Ixi549Space_T1w.nii.gz
...,...,...,...,...
4765,998819920173,bbc73dd2480442108e5307511a1b9b75,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-bbc73dd2480442108e5307511a1b9b75/ses-998819920173/t1/spm/segmentation/normalized_space/sub-bbc73dd2480442108e5307511a1b9b75_ses-998819920173_4_20080408-SAG_3D_MPR-19_space-Ixi549Space_T1w.nii.gz
4766,998819920173,bbc73dd2480442108e5307511a1b9b75,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-bbc73dd2480442108e5307511a1b9b75/ses-998819920173/t1/spm/segmentation/normalized_space/sub-bbc73dd2480442108e5307511a1b9b75_ses-998819920173_3_20080408-SAG_3D_MPR-10_space-Ixi549Space_T1w.nii.gz
4767,998819920173,bbc73dd2480442108e5307511a1b9b75,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-bbc73dd2480442108e5307511a1b9b75/ses-998819920173/t1/spm/segmentation/normalized_space/sub-bbc73dd2480442108e5307511a1b9b75_ses-998819920173_2_20080408-AX_MPR_1MM_POST-102_space-Ixi549Space_T1w.nii.gz
4768,999155686841,b3ddd61266c842a9b98597579f1054df,0,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-b3ddd61266c842a9b98597579f1054df/ses-999155686841/t1/spm/segmentation/normalized_space/sub-b3ddd61266c842a9b98597579f1054df_ses-999155686841_2_20130130-SAG_3D_T1___MS_P_MPR_Ax_mpr_recon-19_space-Ixi549Space_T1w.nii.gz


In [63]:
print(final_df.Subject.unique().shape)
print(final_df.groupby('Subject').size().value_counts())

(1182,)
2     454
1     323
3     187
4      90
5      43
6      25
7      16
9       7
8       5
11      5
17      4
20      4
12      4
13      3
15      2
18      2
19      2
10      2
14      1
23      1
24      1
29      1
dtype: int64


In [65]:
final_df[final_df.Subject == 'c3c9b03bd3a34702ba236fca4051394c']
df_s = final_df.groupby('Subject').size()
df_s[df_s == 29]

Subject
1ef4d27d69264e11b0fc6053478a763a    29
dtype: int64

In [66]:
final_df[final_df.Subject == '1ef4d27d69264e11b0fc6053478a763a']

Unnamed: 0,Session,Subject,Label,Path
1215,179266678864,1ef4d27d69264e11b0fc6053478a763a,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-1ef4d27d69264e11b0fc6053478a763a/ses-179266678864/t1/spm/segmentation/normalized_space/sub-1ef4d27d69264e11b0fc6053478a763a_ses-179266678864_2_20120819-SAG_MPR_RECON-7_space-Ixi549Space_T1w.nii.gz
1216,179266678864,1ef4d27d69264e11b0fc6053478a763a,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-1ef4d27d69264e11b0fc6053478a763a/ses-179266678864/t1/spm/segmentation/normalized_space/sub-1ef4d27d69264e11b0fc6053478a763a_ses-179266678864_20120819-AX_3D_MPR_NEW-6_space-Ixi549Space_T1w.nii.gz
1336,186484167275,1ef4d27d69264e11b0fc6053478a763a,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-1ef4d27d69264e11b0fc6053478a763a/ses-186484167275/t1/spm/segmentation/normalized_space/sub-1ef4d27d69264e11b0fc6053478a763a_ses-186484167275_20120330-AX_3D_MPR-4_space-Ixi549Space_T1w.nii.gz
1337,186484167275,1ef4d27d69264e11b0fc6053478a763a,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-1ef4d27d69264e11b0fc6053478a763a/ses-186484167275/t1/spm/segmentation/normalized_space/sub-1ef4d27d69264e11b0fc6053478a763a_ses-186484167275_2_20120330-SAG_MPR_RECON-101_space-Ixi549Space_T1w.nii.gz
1563,200310516467,1ef4d27d69264e11b0fc6053478a763a,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-1ef4d27d69264e11b0fc6053478a763a/ses-200310516467/t1/spm/segmentation/normalized_space/sub-1ef4d27d69264e11b0fc6053478a763a_ses-200310516467_20111126-AX_3D_MPR-10_space-Ixi549Space_T1w.nii.gz
1564,200310516467,1ef4d27d69264e11b0fc6053478a763a,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-1ef4d27d69264e11b0fc6053478a763a/ses-200310516467/t1/spm/segmentation/normalized_space/sub-1ef4d27d69264e11b0fc6053478a763a_ses-200310516467_2_20111126-SAG_MPR-104_space-Ixi549Space_T1w.nii.gz
1605,203953868512,1ef4d27d69264e11b0fc6053478a763a,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-1ef4d27d69264e11b0fc6053478a763a/ses-203953868512/t1/spm/segmentation/normalized_space/sub-1ef4d27d69264e11b0fc6053478a763a_ses-203953868512_20120809-AX_3D_MPR-13_space-Ixi549Space_T1w.nii.gz
1606,203953868512,1ef4d27d69264e11b0fc6053478a763a,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-1ef4d27d69264e11b0fc6053478a763a/ses-203953868512/t1/spm/segmentation/normalized_space/sub-1ef4d27d69264e11b0fc6053478a763a_ses-203953868512_2_20120809-SAG_MPR_RECON-104_space-Ixi549Space_T1w.nii.gz
1625,205281305305,1ef4d27d69264e11b0fc6053478a763a,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-1ef4d27d69264e11b0fc6053478a763a/ses-205281305305/t1/spm/segmentation/normalized_space/sub-1ef4d27d69264e11b0fc6053478a763a_ses-205281305305_2_20110415-AX_3D_MPR-17_space-Ixi549Space_T1w.nii.gz
1626,205281305305,1ef4d27d69264e11b0fc6053478a763a,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-1ef4d27d69264e11b0fc6053478a763a/ses-205281305305/t1/spm/segmentation/normalized_space/sub-1ef4d27d69264e11b0fc6053478a763a_ses-205281305305_3_20110415-AX_MPR_RECON-18_space-Ixi549Space_T1w.nii.gz


In [61]:
print(label_df.Subject.unique().shape)

(1910,)


In [43]:
df1 = pd.read_csv('/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1/participant_table.tsv', sep='\t')
df2 = pd.read_csv('/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1/participant_table_2.tsv', sep='\t')

In [50]:
df_temp = pd.concat([df1, df2], axis=0)
df_temp

Unnamed: 0,participant_id,session_id
0,sub-31b509f05cc14f6baafa1e00da757ba6,ses-140967841958_2
1,sub-52c9dddc00324e4fafdabee7dc481762,ses-177266196693_2
2,sub-0b0a3ab599ef4649a522f37593eb9d7f,ses-248414200344_2
3,sub-cb07e3f8c6bf43ea967cd88e28d2b1a6,ses-577518746824
4,sub-73a281d2122342c2b1cfde7e678184c8,ses-275107855313
...,...,...
3694,sub-d9362cc19b954e8787e3eb856fa5614e,ses-628974684015_2
3695,sub-f871293aa0c4438baab5e10894626d8c,ses-176266627563
3696,sub-8f55c3e279dc42f6a579925fbca0e7e9,ses-300291254972
3697,sub-e645567533d3422ebc85be6ab9a210fc,ses-294879129463


In [69]:
final_df

Unnamed: 0,Session,Subject,Label,Path
0,100039817943,210909b9725245c5a09e052b931447f4,0,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-210909b9725245c5a09e052b931447f4/ses-100039817943/t1/spm/segmentation/normalized_space/sub-210909b9725245c5a09e052b931447f4_ses-100039817943_20120107-AX_MPR_RECON-13_space-Ixi549Space_T1w.nii.gz
1,100402864124,fc9d3132547b40d88b14b49f22059a7d,0,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-fc9d3132547b40d88b14b49f22059a7d/ses-100402864124/t1/spm/segmentation/normalized_space/sub-fc9d3132547b40d88b14b49f22059a7d_ses-100402864124_20111201-AX_3D_MPR-15_space-Ixi549Space_T1w.nii.gz
2,100402864124,fc9d3132547b40d88b14b49f22059a7d,0,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-fc9d3132547b40d88b14b49f22059a7d/ses-100402864124/t1/spm/segmentation/normalized_space/sub-fc9d3132547b40d88b14b49f22059a7d_ses-100402864124_2_20111201-AX_3D_MPR_MPR_3mm_sag_mpr-17_space-Ixi549Space_T1w.nii.gz
3,100561623079,19a7816500184206baae665fb54d9486,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-19a7816500184206baae665fb54d9486/ses-100561623079/t1/spm/segmentation/normalized_space/sub-19a7816500184206baae665fb54d9486_ses-100561623079_20111115-SAG_3D_MPR-10_space-Ixi549Space_T1w.nii.gz
4,100705385639,4c1833793b70449da1017a8eb7592b12,2,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-4c1833793b70449da1017a8eb7592b12/ses-100705385639/t1/spm/segmentation/normalized_space/sub-4c1833793b70449da1017a8eb7592b12_ses-100705385639_20100307-SAG_MPR_ISO-3_space-Ixi549Space_T1w.nii.gz
...,...,...,...,...
3231,998819920173,bbc73dd2480442108e5307511a1b9b75,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-bbc73dd2480442108e5307511a1b9b75/ses-998819920173/t1/spm/segmentation/normalized_space/sub-bbc73dd2480442108e5307511a1b9b75_ses-998819920173_4_20080408-SAG_3D_MPR-19_space-Ixi549Space_T1w.nii.gz
3232,998819920173,bbc73dd2480442108e5307511a1b9b75,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-bbc73dd2480442108e5307511a1b9b75/ses-998819920173/t1/spm/segmentation/normalized_space/sub-bbc73dd2480442108e5307511a1b9b75_ses-998819920173_3_20080408-SAG_3D_MPR-10_space-Ixi549Space_T1w.nii.gz
3233,998819920173,bbc73dd2480442108e5307511a1b9b75,1,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_b/subjects/sub-bbc73dd2480442108e5307511a1b9b75/ses-998819920173/t1/spm/segmentation/normalized_space/sub-bbc73dd2480442108e5307511a1b9b75_ses-998819920173_2_20080408-AX_MPR_1MM_POST-102_space-Ixi549Space_T1w.nii.gz
3234,999155686841,b3ddd61266c842a9b98597579f1054df,0,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_A_part_a/subjects/sub-b3ddd61266c842a9b98597579f1054df/ses-999155686841/t1/spm/segmentation/normalized_space/sub-b3ddd61266c842a9b98597579f1054df_ses-999155686841_2_20130130-SAG_3D_T1___MS_P_MPR_Ax_mpr_recon-19_space-Ixi549Space_T1w.nii.gz


In [70]:
final_df.reset_index(inplace=True, drop=True)
final_df.to_csv('/gpfs/home/lc3424/capstone/2021_dementia/lc3424_workspace/experiments/20211102/label_with_file_path_20211102.tsv', sep='\t')