## Extract total brain volume and data collection site

In [2]:
from os.path import join
import nibabel as nib
import pandas as pd
import numpy as np
from glob import glob
import json
from datetime import date

today = str(date.today())

In [55]:
# Read in paths for data and subject list
bidspath = '/gpfs/milgram/pi/gee_dylan/candlab/data/mri/bids_recon/shapes'
datapath = '/gpfs/milgram/pi/gee_dylan/candlab/analyses/shapes/dwi/QSIPrep/output_data/qsiprep'
analysis = '/gpfs/milgram/pi/gee_dylan/candlab/analyses/shapes/Shapes_Phenotyping/Analysis'
subjects = pd.read_csv('/gpfs/milgram/pi/gee_dylan/candlab/analyses/shapes/Shapes_Phenotyping/Analysis/subjectlist_2022-10-26.csv')
sublist = subjects.iloc[:,0].tolist()

In [50]:
# Pull ICV from Freesurfer output
volumes = []

for i in range(0, len(sublist)):
    try:
        sub = sublist[i]
        stats = pd.read_csv('/gpfs/milgram/project/gee_dylan/candlab/data/mri/shapes_freesurfer/{}/stats/aseg.stats'.format(sub),
                           sep='\n', error_bad_lines=False, header = 1, engine = 'python')
        etiv = stats.iloc[32,0].split(',')[3] #Extract eITV volume from Freesurfer stats file
        volumes.append([sub, etiv])
    except:
        print('Error accessing stats for {}'.format(sub))

Error accessing stats for sub-A200
Error accessing stats for sub-A680


In [52]:
# # Calculate ICV (compared this output to FSLstats, it is the same)

# volumes = []
# for i in range(0, len(sublist)):
#     sub = sublist[i]
#     # Calculate ICV
#     bm_file = datapath + '/{}/anat/{}_desc-brain_mask.nii.gz'.format(sub, sub) # File format for brain mask files
#     image_file = nib.load(bm_file) # Load brain mask
#     image_values = image_file.get_fdata().flatten() # Load nifti values and reshape to single dim array
#     icv = len(image_values[image_values > 0]) # ICV is number of non-zero voxels in brain mask

#     #Append to data frame 
#     volumes.append([sub, icv])

In [53]:
sites = []

for i in range(0, len(sublist)):
    sub = sublist[i]
    file = bidspath + '/{}/ses-shapesV1/anat/{}_ses-shapesV1_T1w.json'.format(sub, sub)
    data = json.load(open(file))
    site = data['InstitutionName']
    sites.append([sub, site])

In [57]:
# Put ICV data in dataframe
voxdf = pd.DataFrame(volumes, columns = ['subjectid', 'icv_voxels']).sort_values(by=['subjectid'])   

# Put site ID info in data frame
sitedf = pd.DataFrame(sites, columns = ['subjectid', 'site'])

# Merge dataframe, rename scan sites, write to CSV
finaldf = pd.merge(voxdf, sitedf, on='subjectid', how = 'inner').replace('Yale_University_-_Dunham_Lab._Bldg.', 'BIC').replace('Cedar_300_New_Haven_CT_US_06510', 'MRRC')
finaldf.to_csv(analysis + '/IntracranialVolumes_ScanSites_{}.csv'.format(today), index=False)