In [1]:
import pandas as pd
import nibabel as nib
import os
import numpy as np
import matplotlib.pyplot as plt
from nilearn import image
import ants
from datetime import datetime, timedelta
import pydicom as dicom
import pickle
import matplotlib.image as mpimg
from collections import defaultdict, Counter
from scipy import stats
from glob import glob

pd.set_option('display.max_colwidth', None)
pd.set_option('display.max_columns', None)

Matplotlib created a temporary config/cache directory at /tmp/matplotlib-4bcagk4v because the default path (/gpfs/home/lc3424/.config/matplotlib) is not a writable directory; it is highly recommended to set the MPLCONFIGDIR environment variable to a writable directory, in particular to speed up the import of Matplotlib and to better support multiprocessing.


In [4]:
dataset_path_list = ['/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/']
dataset_file_path_crop = []
dataset_file_path_non_crop = []

for dataset_path in dataset_path_list:
    temp = glob(dataset_path + '*/*/t1_linear/*.nii.gz')
    temp_crop = [f for f in temp if 'crop' in f.lower()]
    temp_non_crop = [f for f in temp if 'crop' not in f.lower()]

    for f in temp_crop:
        res = f.split('/')
        if '_' in res[10]:
            print(f)
        dataset_file_path_crop.append((res[9].split('-')[1], res[10].split('-')[1], f))  # (sub, ses, file_path)

    for f in temp_non_crop:
        res = f.split('/')
        if '_' in res[10]:
            print(f)
        dataset_file_path_non_crop.append((res[9].split('-')[1], res[10].split('-')[1], f))  # (sub, ses, file_path)



In [6]:
file_df_crop = pd.DataFrame(dataset_file_path_crop, columns=['Subject', 'Session', 'Path'])
file_df_non_crop = pd.DataFrame(dataset_file_path_non_crop, columns=['Subject', 'Session', 'Path'])

file_df_crop['Session'] = file_df_crop['Session'].astype(int)
file_df_non_crop['Session'] = file_df_non_crop['Session'].astype(int)

print(file_df_crop.shape)
print(file_df_non_crop.shape)


(7107, 3)
(7107, 3)


In [19]:
file_df_crop

Unnamed: 0,Subject,Session,Path
0,26a0b76ad3804709969fc4ee26bb6a35,146241246534,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-146241246534/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-146241246534_20100111-AX_MPR_RECON-23_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
1,26a0b76ad3804709969fc4ee26bb6a35,146241246534,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-146241246534/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-146241246534_4_20100111-SAG_MPR_RECON-22_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
2,26a0b76ad3804709969fc4ee26bb6a35,146241246534,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-146241246534/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-146241246534_3_20100111-Head_SAG_3D_MPR_PRE_COG-21_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
3,26a0b76ad3804709969fc4ee26bb6a35,148803941489,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-148803941489/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-148803941489_20150305-T1_3D-501_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
4,23d448d44b6d405db15b2d7c39530bf1,620076860576,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-23d448d44b6d405db15b2d7c39530bf1/ses-620076860576/t1_linear/sub-23d448d44b6d405db15b2d7c39530bf1_ses-620076860576_20150804-AX_MPR_RECON-100_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
...,...,...,...
7102,92a60c6773744442bf8a5384d245a4a4,227760717173,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-92a60c6773744442bf8a5384d245a4a4/ses-227760717173/t1_linear/sub-92a60c6773744442bf8a5384d245a4a4_ses-227760717173_20140521-SAG_MPR_ISO-3_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
7103,8f4775b55c434d6d933942eb0cc36108,864946661458,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-8f4775b55c434d6d933942eb0cc36108/ses-864946661458/t1_linear/sub-8f4775b55c434d6d933942eb0cc36108_ses-864946661458_2_20080926-SAG_3D_MPR_CP_GW-6_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
7104,8f4775b55c434d6d933942eb0cc36108,864946661458,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-8f4775b55c434d6d933942eb0cc36108/ses-864946661458/t1_linear/sub-8f4775b55c434d6d933942eb0cc36108_ses-864946661458_20080926-AX_MPR-8_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
7105,f66300b6f0154904b830d4354ed9b68a,279917155131,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-f66300b6f0154904b830d4354ed9b68a/ses-279917155131/t1_linear/sub-f66300b6f0154904b830d4354ed9b68a_ses-279917155131_2_20120216-SAG_3D_MPR-6_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz


In [7]:
# get age for subject and session
# age is the scan age, derived by the original age, scan date and visit date

orig_label_file = pd.read_csv('/gpfs/home/lc3424/capstone/de_id_MRIs_Specialists_diag.csv')
orig_label_file.drop('Unnamed: 0', axis=1, inplace=True)
orig_label_file = orig_label_file[['random_pat_id', 'de-identified acc', 'Age', 'scan_date_time', 'visit_date']]
orig_label_file.dropna(axis=0, inplace=True)
orig_label_file['de-identified acc'] = orig_label_file['de-identified acc'].astype(int)
orig_label_file.scan_date_time = pd.to_datetime(orig_label_file.scan_date_time)
orig_label_file.visit_date = pd.to_datetime(orig_label_file.visit_date)
orig_label_file['scan_age'] = orig_label_file.apply(lambda x: x[2] - (x[4] - x[3]) / timedelta(days=365), axis=1)
orig_label_file = orig_label_file[['random_pat_id', 'de-identified acc', 'scan_age']]
orig_label_file = orig_label_file.groupby(['random_pat_id', 'de-identified acc']).mean()
orig_label_file.reset_index(inplace=True)
orig_label_file['scan_age'] = orig_label_file['scan_age'].apply(lambda x: round(x)).astyoe(int)
orig_label_file.rename({'random_pat_id': 'Subject', 'de-identified acc': 'Session', 'scan_age':'Age'}, axis=1, inplace=True)
print(orig_label_file.shape)

label_df = pd.read_csv('/gpfs/home/lc3424/capstone/2021_dementia/lc3424_workspace/experiments/20211102/label_20211102.tsv', sep='\t')
label_df.columns = ['Session', 'Subject', 'Label']
print(label_df.shape)
label_df = label_df.merge(orig_label_file, on=['Session'], how='left')
label_df.drop('Subject_y', inplace=True, axis=1)
label_df.rename({'Subject_x':'Subject'}, axis=1, inplace=True)
label_df.head()

(10316, 3)
(3260, 3)


Unnamed: 0,Session,Subject,Label,Age
0,100027089657,f37b1d2e3cec40ba88ec39be79577f65,1,84
1,100039817943,210909b9725245c5a09e052b931447f4,0,52
2,100086429574,ad32654d20f345b0bab70ffba08df770,1,63
3,100166373876,bcc51523e7014bdda6b5867de8e7a6a5,2,69
4,100197260038,d8200ab1eb464d72998f497732d921a3,1,70


In [18]:
file_df_crop

Unnamed: 0,Subject,Session,Path
0,26a0b76ad3804709969fc4ee26bb6a35,146241246534,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-146241246534/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-146241246534_20100111-AX_MPR_RECON-23_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
1,26a0b76ad3804709969fc4ee26bb6a35,146241246534,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-146241246534/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-146241246534_4_20100111-SAG_MPR_RECON-22_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
2,26a0b76ad3804709969fc4ee26bb6a35,146241246534,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-146241246534/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-146241246534_3_20100111-Head_SAG_3D_MPR_PRE_COG-21_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
3,26a0b76ad3804709969fc4ee26bb6a35,148803941489,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-148803941489/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-148803941489_20150305-T1_3D-501_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
4,23d448d44b6d405db15b2d7c39530bf1,620076860576,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-23d448d44b6d405db15b2d7c39530bf1/ses-620076860576/t1_linear/sub-23d448d44b6d405db15b2d7c39530bf1_ses-620076860576_20150804-AX_MPR_RECON-100_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
...,...,...,...
7102,92a60c6773744442bf8a5384d245a4a4,227760717173,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-92a60c6773744442bf8a5384d245a4a4/ses-227760717173/t1_linear/sub-92a60c6773744442bf8a5384d245a4a4_ses-227760717173_20140521-SAG_MPR_ISO-3_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
7103,8f4775b55c434d6d933942eb0cc36108,864946661458,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-8f4775b55c434d6d933942eb0cc36108/ses-864946661458/t1_linear/sub-8f4775b55c434d6d933942eb0cc36108_ses-864946661458_2_20080926-SAG_3D_MPR_CP_GW-6_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
7104,8f4775b55c434d6d933942eb0cc36108,864946661458,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-8f4775b55c434d6d933942eb0cc36108/ses-864946661458/t1_linear/sub-8f4775b55c434d6d933942eb0cc36108_ses-864946661458_20080926-AX_MPR-8_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
7105,f66300b6f0154904b830d4354ed9b68a,279917155131,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-f66300b6f0154904b830d4354ed9b68a/ses-279917155131/t1_linear/sub-f66300b6f0154904b830d4354ed9b68a_ses-279917155131_2_20120216-SAG_3D_MPR-6_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz


In [20]:
file_df_crop

Unnamed: 0,Subject,Session,Path
0,26a0b76ad3804709969fc4ee26bb6a35,146241246534,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-146241246534/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-146241246534_20100111-AX_MPR_RECON-23_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
1,26a0b76ad3804709969fc4ee26bb6a35,146241246534,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-146241246534/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-146241246534_4_20100111-SAG_MPR_RECON-22_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
2,26a0b76ad3804709969fc4ee26bb6a35,146241246534,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-146241246534/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-146241246534_3_20100111-Head_SAG_3D_MPR_PRE_COG-21_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
3,26a0b76ad3804709969fc4ee26bb6a35,148803941489,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-148803941489/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-148803941489_20150305-T1_3D-501_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
4,23d448d44b6d405db15b2d7c39530bf1,620076860576,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-23d448d44b6d405db15b2d7c39530bf1/ses-620076860576/t1_linear/sub-23d448d44b6d405db15b2d7c39530bf1_ses-620076860576_20150804-AX_MPR_RECON-100_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
...,...,...,...
7102,92a60c6773744442bf8a5384d245a4a4,227760717173,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-92a60c6773744442bf8a5384d245a4a4/ses-227760717173/t1_linear/sub-92a60c6773744442bf8a5384d245a4a4_ses-227760717173_20140521-SAG_MPR_ISO-3_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
7103,8f4775b55c434d6d933942eb0cc36108,864946661458,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-8f4775b55c434d6d933942eb0cc36108/ses-864946661458/t1_linear/sub-8f4775b55c434d6d933942eb0cc36108_ses-864946661458_2_20080926-SAG_3D_MPR_CP_GW-6_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
7104,8f4775b55c434d6d933942eb0cc36108,864946661458,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-8f4775b55c434d6d933942eb0cc36108/ses-864946661458/t1_linear/sub-8f4775b55c434d6d933942eb0cc36108_ses-864946661458_20080926-AX_MPR-8_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz
7105,f66300b6f0154904b830d4354ed9b68a,279917155131,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-f66300b6f0154904b830d4354ed9b68a/ses-279917155131/t1_linear/sub-f66300b6f0154904b830d4354ed9b68a_ses-279917155131_2_20120216-SAG_3D_MPR-6_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz


In [15]:
df_crop = file_df_crop.merge(label_df, on=['Subject', 'Session'], how='left')
df_crop = df_crop[~df_crop.isna().any(axis=1)]
df_non_crop = file_df_non_crop.merge(label_df, on=['Subject', 'Session'], how='left')
df_non_crop = df_non_crop[~df_non_crop.isna().any(axis=1)]


In [25]:
df_crop = file_df_crop.merge(label_df, on=['Subject', 'Session'], how='left')
df_crop = df_crop[~df_crop.isna().any(axis=1)]

In [26]:
print(df_crop.shape, df_non_crop.shape)

(3306, 5) (3306, 5)


In [28]:
df_crop.reset_index(inplace=True, drop=True)
df_crop.to_csv('/gpfs/home/lc3424/capstone/2021_dementia/lc3424_workspace/experiments/20211102/linear_crop_label_with_file_path_with_age_20211102.tsv', sep='\t')
df_non_crop.reset_index(inplace=True, drop=True)
df_non_crop.to_csv('/gpfs/home/lc3424/capstone/2021_dementia/lc3424_workspace/experiments/20211102/linear_non_crop_label_with_file_path_with_age_20211102.tsv', sep='\t')

In [27]:
df_crop

Unnamed: 0,Subject,Session,Path,Label,Age
0,26a0b76ad3804709969fc4ee26bb6a35,146241246534,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-146241246534/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-146241246534_20100111-AX_MPR_RECON-23_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz,1.0,67.0
1,26a0b76ad3804709969fc4ee26bb6a35,146241246534,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-146241246534/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-146241246534_4_20100111-SAG_MPR_RECON-22_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz,1.0,67.0
2,26a0b76ad3804709969fc4ee26bb6a35,146241246534,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-26a0b76ad3804709969fc4ee26bb6a35/ses-146241246534/t1_linear/sub-26a0b76ad3804709969fc4ee26bb6a35_ses-146241246534_3_20100111-Head_SAG_3D_MPR_PRE_COG-21_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz,1.0,67.0
5,e11038a612204528b0797519337fd5a8,151767910304,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-e11038a612204528b0797519337fd5a8/ses-151767910304/t1_linear/sub-e11038a612204528b0797519337fd5a8_ses-151767910304_20080714-AX_3D_MPR_1MMiso_gw-7_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz,2.0,74.0
6,e11038a612204528b0797519337fd5a8,151767910304,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-e11038a612204528b0797519337fd5a8/ses-151767910304/t1_linear/sub-e11038a612204528b0797519337fd5a8_ses-151767910304_3_20080714-SAG_MPR_3MM-10_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz,2.0,74.0
...,...,...,...,...,...
7099,63ab908c9047411998a221cc438bcf3d,168336444002,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-63ab908c9047411998a221cc438bcf3d/ses-168336444002/t1_linear/sub-63ab908c9047411998a221cc438bcf3d_ses-168336444002_3_20071228-SAG_MPR_RECON-103_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz,0.0,54.0
7100,63ab908c9047411998a221cc438bcf3d,168336444002,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-63ab908c9047411998a221cc438bcf3d/ses-168336444002/t1_linear/sub-63ab908c9047411998a221cc438bcf3d_ses-168336444002_2_20071228-AX_MPR_RECON-104_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz,0.0,54.0
7102,92a60c6773744442bf8a5384d245a4a4,227760717173,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-92a60c6773744442bf8a5384d245a4a4/ses-227760717173/t1_linear/sub-92a60c6773744442bf8a5384d245a4a4_ses-227760717173_20140521-SAG_MPR_ISO-3_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz,1.0,63.0
7103,8f4775b55c434d6d933942eb0cc36108,864946661458,/gpfs/data/razavianlab/data/mri/nyu/barlow_bids_t1_preprocess_lin/subjects/sub-8f4775b55c434d6d933942eb0cc36108/ses-864946661458/t1_linear/sub-8f4775b55c434d6d933942eb0cc36108_ses-864946661458_2_20080926-SAG_3D_MPR_CP_GW-6_T1w_space-MNI152NLin2009cSym_desc-Crop_res-1x1x1_T1w.nii.gz,1.0,78.0
