In [2]:
import pandas as pd
import nibabel as nib
import numpy as np


In [43]:
class Patient:
    def __init__(self, i, metadata):
        self.id = metadata.loc[i, 'subj_id']
        self.dataset = metadata.loc[i, 'dataset']
        self.site = metadata.loc[i, 'site_string']
        self.age = metadata.loc[i, 'age']
        self.sex = metadata.loc[i, 'sex_string']
        self.diagnosis = metadata.loc[i, 'diagnosis']
        self.ses = metadata.loc[i, 'ses']

        if type(self.ses) == float:
            patient_dir = f"/fs04/kg98/trangc/VBM/data/{self.dataset}/{self.id}/anat/"
            self.img = patient_dir + 's6mwp1' + self.id + '_T1w.nii'
        else:
            patient_dir = f"/fs04/kg98/trangc/VBM/data/{self.dataset}/{self.id}/{self.ses}/anat/"
            self.img = patient_dir + 's6mwp1' + self.id + '_' + self.ses + '_T1w.nii'

    def get_patient_rois(self, atlas, n_parcs=66):
        img = nib.load(self.img)
        gmv = img.get_fdata()
            
        # Get the indices of voxels that are within the ROI range
        mask = (atlas > 0) & (atlas <= n_parcs)        
        indices = np.where(mask)
                
        # Get the ROI indices (atlas parcel numbers) and corresponding gray values"
        roi_indices = atlas[indices]
        gray_values = gmv[indices]

        rois = {}
        
        # Group the gray values by their ROI indices
        for roi, gray in zip(roi_indices, gray_values):
            if roi not in rois:
                rois[roi] = []
            rois[roi].append(gray)

        return rois

    def make_patient_df(self, rois):
        data = {'MGV': [], 'subj_id': [], 'roi': [], 'diagnosis': [], 'age': [], 'sex': [], 'site': []}

        for roi, value in sorted(rois.items()):
            data['MGV'].append(np.mean(value))
            data['subj_id'].append(self.id)
            data['roi'].append(int(roi))
            data['diagnosis'].append(self.diagnosis)
            data['age'].append(self.age)
            data['sex'].append(self.sex)
            data['site'].append(self.site)


        self.patient_data = pd.DataFrame(data)

In [46]:
# Load patient metadata
# exclude SCA patients for now; these are available in metadataVBM.csv
# 1 = HC, 3 = SCA, 4 = SCZ
metadata = pd.read_csv("/fs04/kg98/trangc/VBM/data/metaVBM_SCZ.csv")

# for lme we can directly take beta values if we code healthy controls as 0
metadata['diagnosis'] = metadata['diagnosis'].replace({1: 0, 4: 1})

# Load atlas
s132_img = nib.load('/fs03/kg98/gchan/Atlases/Tian/Schaefer_Tian/reordered/Schaefer2018_100Parcels_' +
    '7Networks_order_Tian_Subcortex_S2_MNI152NLin6Asym_1.5mm_reordered.nii.gz')
atlas = s132_img.get_fdata()

# Output dataframe for lme
gmv = pd.DataFrame()

for i in range(metadata.shape[0]):
# for i in [1, 987]:
    patient = Patient(i, metadata)

    rois = patient.get_patient_rois(atlas)
    # print(dict(list(rois.items())[0:1]))
    patient.make_patient_df(rois)

    gmv = pd.concat([gmv, patient.patient_data])

display(gmv.head())
gmv.to_csv('./results/patient_gmv_demographics.csv', index=False)

Unnamed: 0,MGV,subj_id,roi,diagnosis,age,sex,site
0,0.436102,sub-10002,1,1,30.0,M,Advan_inno
1,0.519296,sub-10002,2,1,30.0,M,Advan_inno
2,0.476011,sub-10002,3,1,30.0,M,Advan_inno
3,0.328923,sub-10002,4,1,30.0,M,Advan_inno
4,0.388468,sub-10002,5,1,30.0,M,Advan_inno
