# ADNI_COMB_LT
##### Xiao Gao, Department of Radiology and Biomedical Imaging, UCSF; Myriam Chaumeil Lab (xiao.gao@ucsf.edu; xiao.gao@berkeley.edu) 


### Purpose: Reorganizing the puffy `adnicomb` dataset for specific neuroimaging research

### Prerequisite: `adnicomb` Pandas DataFrame

### Notice: This document is presented by the author(s) as a service to ADNI data users. However, users should be aware that no formal review process has vetted this document and that ADNI cannot guarantee the accuracy or utility of this document.

## 0. Loading Libraries

In [1]:
import os 
import numpy as np 
import pandas as pd
import re

In [2]:
working_dir = os.path.dirname(os.getcwd()) # working from the upper directory of this .ipynd file
for root, dirs, files in os.walk(working_dir):
    for name in files:
        if name=='adnicomb_v1_5.pkl':
            adnicomb_dir = root + os.sep + name
        if name=="adnicomb_list.csv":
            comb_list = root + os.sep + name
        if name=="adnicomb_naming_convention.csv":
            nm_conv = root + os.sep + name
        
comb_df = pd.read_csv(comb_list, usecols=['csv', 'alias', 'date_entry', 'subject_entry', 'subject_type','recruit'])
nm_pd = pd.read_csv(nm_conv, usecols=['conv_comb', 'conv_desikan'])         
adnicomb = pd.read_pickle(adnicomb_dir)

## 1. Finding interception of brain regions across different neuroimaging datasets

In [216]:
alias_interest = ['xv15one','xv30one','xv30two','xv3three','asl','taunpvc', 'tauwpvc','av45','fbb']

In [217]:
metrics_dict = ['SV', 'CV', 'SA', 'TA', 'TS', 'HS',\
                'MIN', 'MAX', 'AVG', 'SD', 'CT', 'MD',\
                'SUVR', 'VOLUME']

neuro_img_reg = pd.DataFrame()

for alias in alias_interest:
    temp_columns = pd.Series(adnicomb.filter(regex='^'+alias, axis=1).columns.values)
    temp_select = np.zeros_like(temp_columns.values)
    for i in range(len(temp_columns)):
        for metric in metrics_dict:       
            # only brain region entries have specific metric names by the end of column name
            if bool(re.search('(?<![A-Z])'+metric+ '(?![A-Z])', temp_columns[i])):
                temp_select[i]= 1
                temp_string = re.split('_', temp_columns[i])
                temp_columns[i]= temp_string[1]
                   
    neuro_img_reg = pd.concat([ neuro_img_reg, 
                                pd.DataFrame(temp_columns.loc[temp_select==1].unique(), columns = [alias]) ]
                                             ,axis=1)
                       


In [218]:
alias_interest = ['xv15one','xv30one','xv30two','xv3three','asl','taunpvc', 'av45']

# A for-loop getting intersection of 
for i in range(len(alias_interest)-1):
        col1=alias_interest[i]
        col2=alias_interest[i+1]
        
        if i == 0:
            df1 = pd.DataFrame(neuro_img_reg[col1]).rename(columns = {col1:'region'})
            df2 = pd.DataFrame(neuro_img_reg[col2]).rename(columns = {col2:'region'})
        else:
            df1 = inter_region
            df2 = pd.DataFrame(neuro_img_reg[col2]).rename(columns = {col2:'region'})

        inter_region = pd.merge(df1, df2, how='inner')

In [219]:
for region in inter_region.region:
         inter_region.loc[inter_region.region==region, 'desikan'] = nm_pd.loc[nm_pd.conv_comb==region,
                                                                              'conv_desikan'].values[0]
inter_region.sort_values(by=['desikan'], inplace=True)
inter_region.reset_index(drop=True, inplace = True) 

In [220]:
# All intersected regions outside of Desikan atlas contain no cortical content
inter_region.loc[inter_region.desikan.isnull()]

Unnamed: 0,region,desikan
86,RightVessel,
87,ThirdVentricle,
88,WMHypoIntensities,
89,LeftCerebellumWM,
90,LeftChoroidPlexus,
91,CorpusCallosumAnterior,
92,LeftInferiorLateralVentricle,
93,LeftLateralVentricle,
94,CorpusCallosumCentral,
95,CorpusCallosumMidAnterior,


In [321]:
desikan_region = inter_region.loc[inter_region.desikan.notnull()][['region']]
desikan_region.shape

(86, 1)

## 2. Creating ADNICOMB_lt

In [844]:
# entries of interest
adnicomb_lt_col=['rid', 'col_prot', 'orig_prot', # merge_RID, merge_COLPROT, merge_ORIGPROT
 'exam_date', 'mon_bl', 'age', # merge_EXAMDATE, round(merge_Month_bl/6) * 6, merge_AGE + merge_Years_bl
 'dx','dx_bl_fine','dx_bl','dx_end','dx_conv', # merge_DX, merge_DX_bl, merge_DX_bl, merge_DX, (merge_DX & merge_DX_bl)
 'gender', 'edu', 'apoe4', # merge_PTGENDER, merge_PTEDUCAT, merge_APOE4
 'abeta','tau','ptau', # merge_ABETA, merge_TAU, merge_PTAU
 'cdrsb','adas11','adas13','adasq4', # merge_CDRSB, merge_ADAS11, merge_ADAS13, merge_ADASQ4
 'mmse','ravlt_immed','ravlt_learn', # merge_MMSE, merge_RAVLT_immediate, merge_RAVLT_learning
 'ravlt_forget','ravlt_perc_forget', # merge_RAVLT_forgetting, merge_RAVLT_perc_forgetting
 'ldel','digit_score','trailb','faq', # merge_LDELTOTAL, merge_DIGITSCOR, merge_TRABSCOR, merge_FAQ
 'moca','ecog_pt_mem','ecog_pt_lang', # merge_MOCA, merge_EcogPtMem, merge_EcogPtLang
 'ecog_pt_visspat','ecog_pt_plan', # merge_EcogPtVisspat, merge_EcogPtPlan
 'ecog_pt_organ','ecog_pt_divatt','ecog_pt_total', # merge_EcogPtOrgan, merge_EcogPtDivatt, merge_EcogPtTotal
 'ecog_sp_mem', 'ecog_sp_lang', 'ecog_sp_visspat', # merge_EcogSPMem, merge_EcogSPLang, merge_EcogSPVisspat
 'ecog_sp_plan', 'ecog_sp_organ', # merge_EcogSPPlan, merge_EcogSPOrgan
 'ecog_sp_divatt', 'ecog_sp_total', # merge_EcogSPDivatt, merge_EcogSPTotal
 'mr_fs','mr_3t','fs_version','fs_icv','fs_cv','fs_atr', # （xv15one or xv30one or xv30two or xv3three）
 'tau_pet','tau_subcort_wm','tau_suvr', # taunpvc
 'av45_pet','av45_suvr','av45_subcort_wm', # av45
 'fbb_pet','fbb_suvr','fbb_subcort_wm', # fbb
 'asl','asl_min','asl_max','asl_md','asl_avg','asl_sd', 'asl_ct',  # asl
 'msms','msms_version','msms_abeta42','msms_abeta40','msms_abeta38', # (msms1_ or msms2_ + ABETA42, ABETA40, ABETA38)
 'nfl_version','nfl'] # (nfl1_ or nfl2_ + PLASMA_NFL)

In [845]:
adni_rid_uniq = adnicomb['merge_RID'].unique()

adnicomb_lt = pd.DataFrame(columns = adnicomb_lt_col)
adnicomb_lt.rid = adni_rid_uniq

for rid in adni_rid_uniq:
    temp_df = adnicomb.loc[adnicomb.merge_RID==rid]
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'col_prot'] = temp_df.merge_COLPROT.values[0]
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'orig_prot'] = temp_df.merge_ORIGPROT.values[0]
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'exam_date'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'exam_date'].apply(lambda x: temp_df.merge_EXAMDATE.values)
    
    temp_mon_bl = np.around(temp_df.merge_Month_bl.values/6).astype(int)*6
    temp_mon_bl[temp_df.merge_VISCODE=='m03']=int(3)
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'mon_bl'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'mon_bl'].apply(
                        lambda x: temp_mon_bl )
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'age'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'age'].apply(
                        lambda x: np.around(temp_df.merge_AGE.values + temp_df.merge_Years_bl.values, decimals=1))
    
    temp_dx = temp_df.merge_DX.values
    temp_dx[temp_dx=='CN']=1
    temp_dx[temp_dx=='MCI']=2
    temp_dx[temp_dx=='Dementia']=3
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'dx'] = \
                adnicomb_lt.loc[adnicomb_lt.rid==rid, 'dx'].apply(
                        lambda x: temp_dx)
    
    temp_dx_bl_fine = temp_df.merge_DX_bl.values[0]
    temp_dx_bl = temp_df.merge_DX_bl.values[0]
    if temp_dx_bl_fine == 'CN':
        temp_dx_bl_fine = 1
        temp_dx_bl=1
    elif temp_dx_bl_fine == 'SMC':
        temp_dx_bl_fine = 1.5
        temp_dx_bl=1
    elif temp_dx_bl_fine == 'EMCI':
        temp_dx_bl_fine = 2
        temp_dx_bl=2
    elif temp_dx_bl_fine == 'LMCI':
        temp_dx_bl_fine = 2.5
        temp_dx_bl=2
    elif temp_dx_bl_fine == 'AD':
        temp_dx_bl_fine = 3
        temp_dx_bl=3
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'dx_bl_fine'] = temp_dx_bl_fine  
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'dx_bl'] = temp_dx_bl
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'dx_end'] = temp_dx[-1:]
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'dx_conv'] = int(temp_dx[-1:]>temp_dx_bl)
    
    temp_gender = temp_df.merge_PTGENDER.values[0]
    if temp_gender == 'Male':
        temp_gender = 1
    elif temp_gender == 'Female':
        temp_gender = 2
                
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'gender'] = temp_gender
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'edu'] = temp_df.merge_PTEDUCAT.values[0]
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'apoe4'] = temp_df.merge_APOE4.values[0]
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'abeta'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'abeta'].apply(lambda x: temp_df.merge_ABETA.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'tau'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'tau'].apply(lambda x: temp_df.merge_TAU.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ptau'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ptau'].apply(lambda x: temp_df.merge_PTAU.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'cdrsb'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ptau'].apply(lambda x: temp_df.merge_CDRSB.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'adas11'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'adas11'].apply(lambda x: temp_df.merge_ADAS11.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'adas13'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'adas13'].apply(lambda x: temp_df.merge_ADAS13.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'adasq4'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'adasq4'].apply(lambda x: temp_df.merge_ADASQ4.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'mmse'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'mmse'].apply(lambda x: temp_df.merge_MMSE.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ldel'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ldel'].apply(lambda x: temp_df.merge_LDELTOTAL.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'digit_score'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'digit_score'].apply(lambda x: temp_df.merge_DIGITSCOR.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'trailb'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'trailb'].apply(lambda x: temp_df.merge_TRABSCOR.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'faq'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'faq'].apply(lambda x: temp_df.merge_FAQ.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ravlt_immed'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ravlt_immed'].apply(lambda x: temp_df.merge_RAVLT_immediate.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ravlt_learn'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ravlt_learn'].apply(lambda x: temp_df.merge_RAVLT_learning.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ravlt_forget'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ravlt_forget'].apply(lambda x: temp_df.merge_RAVLT_forgetting.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ravlt_perc_forget'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ravlt_perc_forget'].apply(lambda x: temp_df.merge_RAVLT_perc_forgetting.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'moca'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'moca'].apply(lambda x: temp_df.merge_MOCA.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_pt_mem'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_pt_mem'].apply(lambda x: temp_df.merge_EcogPtMem.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_pt_lang'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_pt_lang'].apply(lambda x: temp_df.merge_EcogPtLang.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_pt_visspat'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_pt_visspat'].apply(lambda x: temp_df.merge_EcogPtVisspat.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_pt_plan'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_pt_plan'].apply(lambda x: temp_df.merge_EcogPtPlan.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_pt_organ'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_pt_organ'].apply(lambda x: temp_df.merge_EcogPtOrgan.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_pt_divatt'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_pt_divatt'].apply(lambda x: temp_df.merge_EcogPtDivatt.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_pt_total'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_pt_total'].apply(lambda x: temp_df.merge_EcogPtTotal.values)
    

    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_sp_mem'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_sp_mem'].apply(lambda x: temp_df.merge_EcogSPMem.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_sp_lang'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_sp_lang'].apply(lambda x: temp_df.merge_EcogSPLang.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_sp_visspat'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_sp_visspat'].apply(lambda x: temp_df.merge_EcogSPVisspat.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_sp_plan'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_sp_plan'].apply(lambda x: temp_df.merge_EcogSPPlan.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_sp_organ'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_sp_organ'].apply(lambda x: temp_df.merge_EcogSPOrgan.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_sp_divatt'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_sp_divatt'].apply(lambda x: temp_df.merge_EcogSPDivatt.values)
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_sp_total'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'ecog_sp_total'].apply(lambda x: temp_df.merge_EcogSPTotal.values)

    
    temp_mr_fs = np.full_like(np.empty((temp_df.shape[0])), int(0))
    temp_mr_3t = np.full_like(np.empty((temp_df.shape[0])), np.nan)
    temp_fs_version = np.full_like(np.empty((temp_df.shape[0])), np.nan)
    temp_fs_icv = np.full_like(np.empty((temp_df.shape[0])), np.nan)
    temp_fs_cv = np.full_like(np.empty((temp_df.shape[0], desikan_region.shape[0])), np.nan)
    for region in desikan_region.region:
        temp0_pd = temp_df[['xv15one_'+region+'_CV']]
        temp1_pd = temp_df[['xv30one_'+region+'_CV']]
        temp2_pd = temp_df[['xv30two_'+region+'_CV']]
        temp3_pd = temp_df[['xv3three_'+region+'_CV']]
        temp_region = pd.concat([temp0_pd, temp1_pd, temp2_pd, temp3_pd], axis=1)
        for i in range(temp_region.shape[0]):
             for j in range(temp_region.shape[1]):
                    if ~np.any(np.isnan(temp_region.values[i][j])):
                        temp_fs_cv[i][desikan_region.region.values==region] = np.mean(temp_region.values[i][j])
                        if region == 'LeftVentralDC': # only checking the last region
                            temp_mr_fs[i]=int(1)                        
                            if j==0:
                                temp_fs_version[i]= int(43)
                                temp_mr_3t[i] = int(0)
                                temp_fs_icv[i] = np.mean(temp_df['xv15one_ICV_CV'].values[i])   
                            elif j==1:
                                temp_fs_version[i]=int(51)
                                temp_mr_3t[i] = int(1)
                                temp_fs_icv[i] = np.mean(temp_df['xv30one_ICV_CV'].values[i]) 
                            elif j==2:
                                temp_fs_version[i]=int(51)
                                temp_mr_3t[i] = int(1)
                                temp_fs_icv[i] = np.mean(temp_df['xv30two_ICV_CV'].values[i])
                            elif j==3:    
                                temp_fs_version[i]=int(60)
                                temp_mr_3t[i] = int(1)
                                temp_fs_icv[i] = np.mean(temp_df['xv3three_ICV_CV'].values[i])                        
                        
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'mr_fs'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'mr_fs'].apply(lambda x: temp_mr_fs.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'mr_3t'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'mr_3t'].apply(lambda x: temp_mr_3t.tolist())

    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'fs_version'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'fs_version'].apply(lambda x: temp_fs_version.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'fs_icv'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'fs_icv'].apply(lambda x: temp_fs_icv.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'fs_cv'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'fs_cv'].apply(lambda x: temp_fs_cv.tolist())
    
    
    temp_tau_pet = np.full_like(np.empty((temp_df.shape[0])), int(0))
    temp_tau_subcort_wm = np.full_like(np.empty((temp_df.shape[0])), np.nan)
    temp_tau_suvr = np.full_like(np.empty((temp_df.shape[0], desikan_region.shape[0])), np.nan)
    for region in desikan_region.region:
        temp_region = temp_df[['taunpvc_'+region+'_SUVR']]
        for i in range(temp_region.shape[0]):
                    if ~np.any(np.isnan(temp_region.values[i][0])):
                        temp_tau_suvr[i][desikan_region.region.values==region] = np.mean(temp_region.values[i][0])
                        if region == 'LeftVentralDC': # only checking the last region
                            temp_tau_pet[i]=int(1)                        
                            temp_tau_subcort_wm[i] = np.mean(temp_df['taunpvc_ErodedSubcorticalWM_SUVR'].values[i][0])                           
                        
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'tau_pet'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'tau_pet'].apply(lambda x: temp_tau_pet.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'tau_subcort_wm'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'tau_subcort_wm'].apply(lambda x: temp_tau_subcort_wm.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'tau_suvr'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'tau_suvr'].apply(lambda x: temp_tau_suvr.tolist())
 

    
    temp_av45_pet = np.full_like(np.empty((temp_df.shape[0])), int(0))
    temp_av45_subcort_wm = np.full_like(np.empty((temp_df.shape[0])), np.nan)
    temp_av45_suvr = np.full_like(np.empty((temp_df.shape[0], desikan_region.shape[0])), np.nan)
    for region in desikan_region.region:
        temp_region = temp_df[['av45_'+region+'_SUVR']]
        for i in range(temp_region.shape[0]):
                    if ~np.any(np.isnan(temp_region.values[i][0])):
                        temp_av45_suvr[i][desikan_region.region.values==region] = np.mean(temp_region.values[i][0])
                        if region == 'LeftVentralDC': # only checking the last region
                            temp_av45_pet[i]=int(1)                        
                            temp_av45_subcort_wm[i] = np.mean(temp_df['av45_ErodedSubcorticalWM_SUVR'].values[i][0])                           
                        
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'av45_pet'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'av45_pet'].apply(lambda x: temp_av45_pet.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'av45_subcort_wm'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'av45_subcort_wm'].apply(lambda x: temp_av45_subcort_wm.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'av45_suvr'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'av45_suvr'].apply(lambda x: temp_av45_suvr.tolist())
    

    temp_fbb_pet = np.full_like(np.empty((temp_df.shape[0])), int(0))
    temp_fbb_subcort_wm = np.full_like(np.empty((temp_df.shape[0])), np.nan)
    temp_fbb_suvr = np.full_like(np.empty((temp_df.shape[0], desikan_region.shape[0])), np.nan)
    for region in desikan_region.region:
        temp_region = temp_df[['fbb_'+region+'_SUVR']]
        for i in range(temp_region.shape[0]):
                    if ~np.any(np.isnan(temp_region.values[i][0])):
                        temp_fbb_suvr[i][desikan_region.region.values==region] = np.mean(temp_region.values[i][0])
                        if region == 'LeftVentralDC': # only checking the last region
                            temp_fbb_pet[i]=int(1)                        
                            temp_fbb_subcort_wm[i] = np.mean(temp_df['fbb_ErodedSubcorticalWM_SUVR'].values[i][0])                           
                        
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'fbb_pet'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'fbb_pet'].apply(lambda x: temp_fbb_pet.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'fbb_subcort_wm'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'fbb_subcort_wm'].apply(lambda x: temp_fbb_subcort_wm.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'fbb_suvr'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'fbb_suvr'].apply(lambda x: temp_fbb_suvr.tolist())
    
    
    
    temp_asl = np.full_like(np.empty((temp_df.shape[0])), int(0))
    temp_asl_min = np.full_like(np.empty((temp_df.shape[0], desikan_region.shape[0])), np.nan)
    temp_asl_max = np.full_like(np.empty((temp_df.shape[0], desikan_region.shape[0])), np.nan)
    temp_asl_md = np.full_like(np.empty((temp_df.shape[0], desikan_region.shape[0])), np.nan)
    temp_asl_avg = np.full_like(np.empty((temp_df.shape[0], desikan_region.shape[0])), np.nan)
    temp_asl_sd = np.full_like(np.empty((temp_df.shape[0], desikan_region.shape[0])), np.nan)
    temp_asl_ct = np.full_like(np.empty((temp_df.shape[0], desikan_region.shape[0])), np.nan)  
    for region in desikan_region.region:
        temp_region_min = temp_df[['asl_'+region+'_MIN']]
        temp_region_max = temp_df[['asl_'+region+'_MAX']]
        temp_region_md = temp_df[['asl_'+region+'_MD']]
        temp_region_avg = temp_df[['asl_'+region+'_AVG']]
        temp_region_sd = temp_df[['asl_'+region+'_SD']]
        temp_region_ct = temp_df[['asl_'+region+'_CT']]
        for i in range(temp_region_min.shape[0]):
                    if ~np.any(np.isnan(temp_region_min.values[i][0])):
                        temp_asl_min[i][desikan_region.region.values==region] = np.mean(temp_region_min.values[i][0])
                        temp_asl_max[i][desikan_region.region.values==region] = np.mean(temp_region_max.values[i][0])
                        temp_asl_md[i][desikan_region.region.values==region] = np.mean(temp_region_md.values[i][0])
                        temp_asl_avg[i][desikan_region.region.values==region] = np.mean(temp_region_avg.values[i][0])
                        temp_asl_sd[i][desikan_region.region.values==region] = np.mean(temp_region_sd.values[i][0])
                        temp_asl_ct[i][desikan_region.region.values==region] = np.mean(temp_region_ct.values[i][0])
                        if region == 'LeftVentralDC': # only checking the last region
                            temp_asl[i]=int(1)                                                  
                        
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'asl'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'asl'].apply(lambda x: temp_asl.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'asl_min'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'asl_min'].apply(lambda x: temp_asl_min.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'asl_max'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'asl_max'].apply(lambda x: temp_asl_max.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'asl_md'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'asl_md'].apply(lambda x: temp_asl_md.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'asl_avg'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'asl_avg'].apply(lambda x: temp_asl_avg.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'asl_sd'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'asl_sd'].apply(lambda x: temp_asl_sd.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'asl_ct'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'asl_ct'].apply(lambda x: temp_asl_ct.tolist())
    
      
    
    temp_msms = np.full_like(np.empty((temp_df.shape[0])), 0)
    temp_msms_version = np.full_like(np.empty((temp_df.shape[0])), np.nan)
    temp_msms_abeta42 = np.full_like(np.empty((temp_df.shape[0])), np.nan)
    temp_msms_abeta40 = np.full_like(np.empty((temp_df.shape[0])), np.nan)
    temp_msms_abeta38 = np.full_like(np.empty((temp_df.shape[0])), np.nan)
    for region in ['ABETA42','ABETA40','ABETA38']:
        temp0_pd = temp_df[['msms1_'+region]]
        temp1_pd = temp_df[['msms2_'+region]]
        temp_region = pd.concat([temp0_pd, temp1_pd], axis=1)
        for i in range(temp_region.shape[0]):
             for j in range(temp_region.shape[1]):
                    if ~np.any(np.isnan(temp_region.values[i][j])):                       
                        if region == 'ABETA38': 
                            temp_msms_abeta38[i]=np.mean(temp_region.values[i][j])
                            temp_msms[i]=1                        
                            if j==0:
                                temp_msms_version[i]=int(1)  
                            elif j==1:
                                temp_msms_version[i]=int(2)
                        elif region == 'ABETA40': 
                            temp_msms_abeta40[i]=np.mean(temp_region.values[i][j])
                            temp_msms[i]=1 
                        elif region == 'ABETA42': 
                            temp_msms_abeta42[i]=np.mean(temp_region.values[i][j])
                            temp_msms[i]=1    
                                    
     
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'msms'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'msms'].apply(lambda x: temp_msms.tolist())

    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'msms_version'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'msms_version'].apply(lambda x: temp_msms_version.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'msms_abeta42'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'msms_abeta42'].apply(lambda x: temp_msms_abeta42.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'msms_abeta40'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'msms_abeta40'].apply(lambda x: temp_msms_abeta40.tolist())
    
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'msms_abeta38'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'msms_abeta38'].apply(lambda x: temp_msms_abeta38.tolist())
    
    
    

    temp_nfl_version = np.full_like(np.empty((temp_df.shape[0])), np.nan)
    temp_nfl = np.full_like(np.empty((temp_df.shape[0])), np.nan)
    for region in ['PLASMA_NFL']:
        temp0_pd = temp_df[['nfl1_'+region]]
        temp1_pd = temp_df[['nfl2_'+region]]
        temp_region = pd.concat([temp0_pd, temp1_pd], axis=1)
        for i in range(temp_region.shape[0]):
             for j in range(temp_region.shape[1]):
                    if ~np.any(np.isnan(temp_region.values[i][j])):                       
                        temp_nfl[i]=np.mean(temp_region.values[i][j])                      
                        if j==0:
                            temp_nfl_version[i]=int(1)  
                        elif j==1:
                            temp_nfl_version[i]=int(2)                                    
     
    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'nfl'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'nfl'].apply(lambda x: temp_nfl.tolist())

    adnicomb_lt.loc[adnicomb_lt.rid==rid, 'nfl_version'] = \
            adnicomb_lt.loc[adnicomb_lt.rid==rid, 'nfl_version'].apply(lambda x: temp_nfl_version.tolist())

    #'abeta42','abeta40','abeta38', # (msms1_ or msms2_ + ABETA42, ABETA40, ABETA38)
 #'nfl'] # (nfl1_ or nfl2_ + PLASMA_NFL)
    

In [848]:
adnicomb_lt.to_pickle(working_dir+'/adnicomb_lt.pkl')

In [4]:
adnicomb_lt= pd.read_pickle(working_dir+'/adnicomb_lt.pkl')

In [21]:
adnicomb.loc[adnicomb.merge_RID==112,['merge_RID','merge_EXAMDATE']].join(adnicomb.loc[adnicomb.merge_RID==112].filter(regex='RightMiddleTemporal_vol$', axis=1))

Unnamed: 0,merge_RID,merge_EXAMDATE
688,112,2006-02-01
689,112,2006-07-27
690,112,2007-02-07
691,112,2007-07-30
692,112,2008-01-29
693,112,2008-07-25
694,112,2009-01-23
695,112,2009-08-07
696,112,2010-01-28
697,112,2010-08-27
