In [1]:
#getting and working with data
import pandas as pd
import numpy as np
import re
import os
import datetime as dt
import string

#visualizing results
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set_context("poster")
sns.set_style("ticks")
#import yellowbrick as yb

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import warnings; warnings.simplefilter('ignore')
np.set_printoptions(suppress=True)

C:\ProgramData\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.CSRRD7HKRKC3T3YXA7VY7TAZGLSWDKW6.gfortran-win_amd64.dll
C:\ProgramData\Anaconda3\lib\site-packages\numpy\.libs\libopenblas.PYQHXLVVQ7VESDPUVUADXEVJOBGHJPAY.gfortran-win_amd64.dll
  stacklevel=1)


In [56]:
path_data = 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Clinical projects/PeskindTBI/freeze_191003/TBIFreeze_20191003.xlsx'

#original file has multiple sheets (for different data set types (e.g. single visit, imaging etc)
#only interested in deploayed controls ('C') and mTBI ('T')
#each participant can have multiple visits - we are currently intersted in data only from the first visit

#we want to create one data frame with one row per participant corresponding to their first study visit and data across columns
first_visit_data = pd.DataFrame()

whole_file = pd.ExcelFile(path_data)
print(whole_file.sheet_names, '\n')

#each data set uses as different column name for visit date, use a dictionary to access corresponding date column name
sheet_dic = {'Single_Record_Measures': 'ScreenDate', 
             'Multi_Record_Measures': 'MeasureDate', 
             'TBI_Symptom_Q': 'NSIFormDate', 
             'DTI_Kleinhans_Lab': 'ScreenDate', 
             'FDG-PET': 'ScreenDate', 
             'Clinical_Labs': 'CLabDate'}

for sheet in whole_file.sheet_names:
    print('Sheet being processed:\n', sheet)
    
    #create intermediate dataframe
    data_int = pd.DataFrame(data = pd.read_excel(whole_file, sheet))

    #select only TBIID C and T (control and TBI)
    data_int = data_int[data_int['TBIID'].str.match(r'[CT]\d\d')]
    print('Data shape only deployed controls and mTBI groups:\n', data_int.shape)

    # create new data frame containing only the first visit record (based on 'ScreenDate' column)
    #add new column with group ID (C = deployed controls, T = blast mTBI)
    first_visit_data_int = pd.DataFrame()

    participants = data_int['TBIID'].unique()
    print('Number of participants:\n', len(participants))

    for part in participants:
        dates = data_int.loc[data_int['TBIID'] == part, sheet_dic[sheet]].values
        min_date = dates.min()
    
        first_date = data_int[(data_int['TBIID'] == part) & (data_int[sheet_dic[sheet]] == min_date)]
        first_visit_data_int = first_visit_data_int.append(first_date)
    
    #reset indexes and add to final dataframe
    first_visit_data_int = first_visit_data_int.reset_index(drop=True)
    #clean up missing values
    first_visit_data_int = first_visit_data_int.replace({-999.0: np.nan, 'None': np.nan})
    
    if first_visit_data.shape[0] < 1:
        first_visit_data = first_visit_data_int
    else:
        first_visit_data = pd.merge(first_visit_data, first_visit_data_int, how='left', on=['TBIID'])
    print('First visit data shape for current sheet:\n', first_visit_data.shape, '\n')

print('Final shape of first visit data:\n', first_visit_data.shape)
first_visit_data.head()

['Single_Record_Measures', 'Multi_Record_Measures', 'TBI_Symptom_Q', 'DTI_Kleinhans_Lab', 'FDG-PET', 'Clinical_Labs'] 

Sheet being processed:
 Single_Record_Measures
Data shape only deployed controls and mTBI groups:
 (215, 835)
Number of participants:
 144
First visit data shape for current sheet:
 (145, 835) 

Sheet being processed:
 Multi_Record_Measures
Data shape only deployed controls and mTBI groups:
 (271, 84)
Number of participants:
 144
First visit data shape for current sheet:
 (145, 918) 

Sheet being processed:
 TBI_Symptom_Q
Data shape only deployed controls and mTBI groups:
 (265, 42)
Number of participants:
 144
First visit data shape for current sheet:
 (145, 959) 

Sheet being processed:
 DTI_Kleinhans_Lab
Data shape only deployed controls and mTBI groups:
 (212, 196)
Number of participants:
 144
First visit data shape for current sheet:
 (145, 1154) 

Sheet being processed:
 FDG-PET
Data shape only deployed controls and mTBI groups:
 (212, 126)
Number of participant

Unnamed: 0,EntityID,DOB,GType,Race,Hispanic,Handedness,Status_x,StatusChangedDate,KFPSubject,TBIID,ScreenDate_x,VisitSeq_x,ScreenAge,Education,branchAF,branchArm,branchCG,branchMar,branchNav,branchNG,branchOth,MOS,MOS2,MOS3,MOS4,MOS5,Marital,servconn,cestotal,ExpPB,ExpMark1,ExpAntiM,ExpStim,ExpOthrS,ExpDEET,ExpTick,ExpPCollr,ExpPStrp,ExpPEnvi,ExpToxic,ExpPaint,ExpXsVib,ExpHStrk,ExpRadar,ExpIonRa,ExpYCake,ExpVhicl,ExpUrRnd,ExpDtOrd,ExpGasM,ExpMOPP,ExpRadBg,ExpAN,ExpNG,ExpTNT,ExpPETN,ExpRDX,ExpNC,ExpANFO,ExpCompB,ExpOctol,ExpPntlt,ExpDynmt,ExpOthrX,TBIMilitaryTBIID,StartDate1,EndDate1,StartDate2,EndDate2,StartDate3,EndDate3,StartDate4,EndDate4,StartDate5,EndDate5,CognitiveTBIID,PsyEduc,MMSEORDA,MMSEORLO,MMSE,DIGIF3,DIGIFLEN3,DIGIB3,DIGIBLEN3,DIGITOT3,DIGIPER3,ANIMALS,TRAILA,TRAILB,LETTERS,LETTERF,LETTERA,AnimalsT,TrailAT,TrailBT,LetFAST,wtarfsiq,wtarviq,tommtot1,tommtot2,tommret,CVTr1FR,CVTr1FRT,CVT15FR,CVT15FRT,CVShDFR,CVShDFRT,CVShDCR,CVShDCRT,CVLoDFR,CVLoDFRT,CVLoDCR,CVLoDCRT,CVYNRH,CVYNRHT,CVYNFP,CVYNFPT,CVFCRRaw,CVDeInt,BVTr1,BVTr1T,BVTotRe,BVTotReT,BVLearn,BVLearnT,BVDelRe,BVDelReT,BVRecHit,BVRecFA,BVDeInt,FTDomH,FTDomHT,FTNDomH,FTNDomHT,W3LNS,W3LNSSS,GPgD,GPgDT,GPgND,GPgNDT,CPTOm,CPTOmT,CPTCom,CPTComT,CPTHRT,CPTHRTT,CPTHSE,CPTHSET,CPTVar,CPTVarT,CPTd,CPTdT,CPTB,CPTBT,CPTRTBl,CPTRTBlT,CPTSEBl,CPTSEBlT,CPTRTIs,CPTRTIsT,CPTSEIs,CPTSEIsT,WCSTPrs,WCSTPrsT,WCSTCL,WCSTCLP,WCSTCLPT,WCSTCat,WCSTFail,TowerItemAd,TowerFrstMove,TowerMove,NumItemAdmin,TowerRuleVio,TowerItemCom,TowerAchieve,TowerAchieveS,TowerMeanFrstMoveR,TowerMeanFrstMoveRS,TowerTimePerMoveR,TowerTimePerMoveRS,TowerMoveAccR,TowerMoveAccRS,TowerRuleVioPerc,TEA6tptS,TEA7dtdS,TEA8LotS,SDW90Raw,SDW90TS,SDO90Raw,SDO90TS,MTRRRaw,MTRRRPer,MT2Raw,MT2Per,MT15Raw,MT15Per,MTTCRaw,MTTCPer,MTECRaw,MTECPer,MTVRRaw,MTVRPer,MTARRaw,MTARPer,MTPMTRaw,MTPMTPer,MTDPRaw,MTClck,MTPMT1EC,MTPMT2EC,MTPMT3EC,MTPMT4EC,MTPMT5EC,MTPMT6EC,MTPMT7EC,MTPMT8EC,CvmtHitR,CvmtHitT,CvmtFaR,CvmtFaT,CvmtDprR,CvmtDprT,CvmtTotR,CvmtTotT,CvmtDelR,CvmtDelT,CvmtVis,CvmtInt,DkefCnR,DkefCnSS,DkefWrR,DkefWrSS,DkefInR,DkefInSS,DkefSwR,DkefSwSS,DkefCnEr,DkefWrEr,DkefInEr,DkefSwEr,DkefCnUn,DkefWrUn,DkefInUn,DkefSwUn,NDCompSc,NDRdRate,ACTFrmDate,ACT0,ACT9,...,DTI_MD_sup_long_fascic_R,DTI_MD_sup_long_fascic_L,DTI_MD_sup_frontocc_fascic_R,DTI_MD_sup_frontocc_fascic_L,DTI_MD_unc_fascic_R,DTI_MD_unc_fascic_L,DTI_MD_tapetum_R,DTI_MD_tapetum_L,subjects_,DTI_AD_mid_cerebll_pedunc,DTI_AD_pontine_xing_tract,DTI_AD_genu_cc,DTI_AD_body_cc,DTI_AD_splenium_cc,DTI_AD_fornix,DTI_AD_corticospinal_R,DTI_AD_corticospinal_L,DTI_AD_med_lemniscus_R,DTI_AD_med_lemniscus_L,DTI_AD_inf_cerebll_pedunc_R,DTI_AD_inf_cerebll_pedunc_L,DTI_AD_sup_cerebll_pedunc_R,DTI_AD_sup_cerebll_pedunc_L,DTI_AD_cerebr_pedunc_R,DTI_AD_cerebr_pedunc_L,DTI_AD_ant_int_capsule_R,DTI_AD_ant_int_capsule_L,DTI_AD_post_int_capsule_R,DTI_AD_post_int_capsule_L,DTI_AD_retrolenticular_int_capsule_R,DTI_AD_retrolenticular_int_capsule_L,DTI_AD_ant_corona_radiata_R,DTI_AD_ant_corona_radiata_L,DTI_AD_sup_corona_radiata_R,DTI_AD_sup_corona_radiata_L,DTI_AD_post_corona_radiata_R,DTI_AD_post_corona_radiata_L,DTI_AD_post_thal_rad_R,DTI_AD_post_thal_rad_L,DTI_AD_sag_stratum_R,DTI_AD_sag_stratum_L,DTI_AD_ext_capsule_R,DTI_AD_ext_capsule_L,DTI_AD_cingulum_cg_R,DTI_AD_cingulum_cg_L,DTI_AD_cingulum_hipp_R,DTI_AD_cingulum_hipp_L,DTI_AD_fornix_striaterm_R,DTI_AD_fornix_striaterm_L,DTI_AD_sup_long_fascic_R,DTI_AD_sup_long_fascic_L,DTI_AD_sup_frontocc_fascic_R,DTI_AD_sup_frontocc_fascic_L,DTI_AD_unc_fascic_R,DTI_AD_unc_fascic_L,DTI_AD_tapetum_R,DTI_AD_tapetum_L,DTI_RD_mid_cerebll_pedunc,DTI_RD_pontine_xing_tract,DTI_RD_genu_cc,DTI_RD_body_cc,DTI_RD_splenium_cc,DTI_RD_fornix,DTI_RD_corticospinal_R,DTI_RD_corticospinal_L,DTI_RD_med_lemniscus_R,DTI_RD_med_lemniscus_L,DTI_RD_inf_cerebll_pedunc_R,DTI_RD_inf_cerebll_pedunc_L,DTI_RD_sup_cerebll_pedunc_R,DTI_RD_sup_cerebll_pedunc_L,DTI_RD_cerebr_pedunc_R,DTI_RD_cerebr_pedunc_L,DTI_RD_ant_int_capsule_R,DTI_RD_ant_int_capsule_L,DTI_RD_post_int_capsule_R,DTI_RD_post_int_capsule_L,DTI_RD_retrolenticular_int_capsule_R,DTI_RD_retrolenticular_int_capsule_L,DTI_RD_ant_corona_radiata_R,DTI_RD_ant_corona_radiata_L,DTI_RD_sup_corona_radiata_R,DTI_RD_sup_corona_radiata_L,DTI_RD_post_corona_radiata_R,DTI_RD_post_corona_radiata_L,DTI_RD_post_thal_rad_R,DTI_RD_post_thal_rad_L,DTI_RD_sag_stratum_R,DTI_RD_sag_stratum_L,DTI_RD_ext_capsule_R,DTI_RD_ext_capsule_L,DTI_RD_cingulum_cg_R,DTI_RD_cingulum_cg_L,DTI_RD_cingulum_hipp_R,DTI_RD_cingulum_hipp_L,DTI_RD_fornix_striaterm_R,DTI_RD_fornix_striaterm_L,DTI_RD_sup_long_fascic_R,DTI_RD_sup_long_fascic_L,DTI_RD_sup_frontocc_fascic_R,DTI_RD_sup_frontocc_fascic_L,DTI_RD_unc_fascic_R,DTI_RD_unc_fascic_L,DTI_RD_tapetum_R,DTI_RD_tapetum_L,ScreenDate_x.1,VisitSeq_x.1,PETType,location,Cameras,RadiaType,Precentral_l,Precentral_r,Frontal_Sup_l,Frontal_Sup_r,Frontal_Sup_Orb_l,Frontal_Sup_Orb_r,Frontal_Mid_l,Frontal_Mid_r,Frontal_Mid_Orb_l,Frontal_Mid_Orb_r,Frontal_Inf_Oper_l,Frontal_Inf_Oper_r,Frontal_Inf_Tri_l,Frontal_Inf_Tri_r,Frontal_Inf_Orb_l,Frontal_Inf_Orb_r,Rolandic_Oper_l,Rolandic_Oper_r,Supp_Motor_Area_l,Supp_Motor_Area_r,Olfactory_l,Olfactory_r,Frontal_Sup_Medial_l,Frontal_Sup_Medial_r,Frontal_Med_Orb_l,Frontal_Med_Orb_r,Rectus_l,Rectus_r,Insula_l,Insula_r,Cingulum_Ant_l,Cingulum_Ant_r,Cingulum_Mid_l,Cingulum_Mid_r,Cingulum_Post_l,Cingulum_Post_r,Hippocampus_l,Hippocampus_r,Parahippocampus_l,Parahippocampus_r,Amygdala_l,Amygdala_r,Calcarine_l,Calcarine_r,Cuneus_l,Cuneus_r,Lingual_l,Lingual_r,Occipital_Sup_l,Occipital_Sup_r,Occipital_Mid_l,Occipital_Mid_r,Occipital_Inf_l,Occipital_Inf_r,Fusiform_l,Fusiform_r,Postcentral_l,Postcentral_r,Parietal_Sup_l,Parietal_Sup_r,Parietal_Inf_l,Parietal_Inf_r,Supra_Marginal_l,Supra_Marginal_r,Angular_l,Angular_r,Precuneus_l,Precuneus_r,Paracentral_Lobule_l,Paracentral_Lobule_r,CaudateNucl_l,CaudateNucl_r,Putamen_l,Putamen_r,Pallidum_l,Pallidum_r,Thalamus_l,Thalamus_r,Heschl_l,Heschl_r,Temporal_Sup_l,Temporal_Sup_r,Temporal_Pole_Sup_l,Temporal_Pole_Sup_r,Temporal_Mid_l,Temporal_Mid_r,Temporal_Pole_Mid_l,Temporal_Pole_Mid_r,Temporal_Inf_l,Temporal_Inf_r,Cerebellum_Crus1_l,Cerebellum_Crus1_r,Cerebellum_Crus2_l,Cerebellum_Crus2_r,Cerebellum3_l,Cerebellum3_r,Cerebellum45_l,Cerebellum45_r,Cerebellum6_l,Cerebellum6_r,Cerebellum7_l,Cerebellum7_r,Cerebellum8_l,Cerebellum8_r,Cerebellum9_l,Cerebellum9_r,Cerebellum10_l,Cerebellum10_r,Vermis12,Vermis3,Vermis45,Vermis6,Vermis7,Vermis8,Vermis9,Vermis10,Medulla,Midbrain,Pons,ScreenDate_y,VisitSeq_y,PETDate,MRIDate,LPDate_y,CLabDate,DiffDay,BGlucose,BNa,BUN,BCreat,BOsmo,UOsmo,USG,UNa,BK,TotalChol,LDL,HDL,Trig
0,2904,1968-12-06,M12,White,0.0,2.0,Lost to F/U,1998-03-03,,C002,1995-03-07,1,26,12.0,1.0,1.0,,,,,,,,,,,Married,0.0,7.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,C002,1992-08-28,1993-11-14,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,C002,,5.0,5.0,30.0,,,,,,,25.0,24.0,53.0,13.0,9.0,14.0,55.0,48.0,53.0,43.0,107.0,108.0,50.0,50.0,50.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1995-03-07,1,PET GE,,,FDG,1.067264,1.024925,0.999485,0.995533,0.915995,0.932315,1.074125,1.107485,0.870637,0.940206,1.086084,1.133551,1.068026,1.090121,0.956046,1.028727,1.008464,1.036488,1.05159,1.077502,0.848907,0.853268,1.0115,1.004927,1.009303,1.062037,1.016962,1.030693,0.975914,0.980018,0.981088,1.027511,1.040961,1.102279,1.097756,0.959721,0.661169,0.67677,0.692472,0.740362,0.676899,0.669192,1.097681,1.076332,1.109972,1.122513,0.988007,0.961065,0.976932,1.049444,0.968884,0.997221,1.003791,1.027867,0.908473,0.903717,0.988904,0.952827,1.011167,1.056395,1.076204,1.123787,1.020114,0.973047,1.066799,1.074832,1.129694,1.116656,0.916046,0.910626,0.759268,0.658437,1.017896,1.042611,0.835282,0.837992,0.825463,0.848968,1.075684,1.075098,1.01498,1.017725,0.745198,0.743663,0.982394,0.961644,0.720095,0.738997,0.880831,0.923937,0.786514,0.826993,0.779399,0.759134,0.668251,0.601853,0.747181,0.717031,0.85013,0.838781,0.741037,0.77626,0.697926,0.723422,0.713332,0.733021,0.462881,0.492539,0.641519,0.646462,0.697395,0.741214,0.733316,0.770235,0.770194,0.646034,0.652642,0.754134,0.553952,1995-03-07,1.0,1995-03-21,1995-04-08,1995-04-05,2008-04-16,75.0,104.0,143.0,11.0,0.96,,,1.015,,4.4,,,,
1,2892,1965-04-20,F14,Black/African American,1.0,2.0,Inactive,NaT,,C003,1995-03-28,1,29,14.0,1.0,1.0,,,,,,,,,,,Married,20.0,2.0,9.0,9.0,1.0,0.0,,1.0,0.0,1.0,0.0,1.0,0.0,9.0,1.0,9.0,1.0,9.0,9.0,5.0,0.0,0.0,0.0,0.0,0.0,9.0,9.0,9.0,9.0,9.0,9.0,1.0,9.0,9.0,9.0,9.0,,C003,1989-10-06,1990-05-30,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,C003,,5.0,5.0,29.0,,,,,,,23.0,27.0,55.0,15.0,12.0,10.0,57.0,48.0,56.0,46.0,93.0,92.0,49.0,50.0,50.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1995-03-28,1,PET GE,,,FDG,1.040714,1.026187,0.962313,0.965398,0.860081,0.904812,1.014722,1.088792,0.878706,0.942198,1.015315,1.077424,1.022545,1.07201,0.910408,0.977191,0.983137,0.980235,1.020384,1.018343,0.843068,0.872565,0.963885,0.931519,0.927464,0.973689,0.895649,0.958905,0.89361,0.939169,0.881872,0.918325,1.021922,1.074308,1.009462,0.919229,0.6567,0.700351,0.696541,0.730154,0.675509,0.70931,1.192609,1.180114,1.151432,1.142876,1.036742,1.034602,1.107385,1.082528,0.980606,1.03484,0.971199,1.09436,0.852236,0.886624,0.971337,0.979982,1.014377,1.05085,1.033142,1.058974,0.962877,0.930941,1.035257,1.030569,1.096745,1.103125,0.948769,1.010743,0.800211,0.75712,1.046565,1.046644,0.910253,0.873938,0.952819,0.979255,1.015257,1.045889,0.937063,0.95847,0.671388,0.724568,0.932757,0.953618,0.668705,0.708132,0.830476,0.882811,0.886687,0.963994,0.906289,0.906474,0.696859,0.642256,0.794473,0.79813,0.92481,0.943716,0.863547,0.891201,0.820796,0.827063,0.836816,0.86284,0.548094,0.593916,0.732724,0.732797,0.782502,0.900442,0.919003,0.931222,0.955611,0.81758,0.717451,0.769019,0.613842,1995-03-28,1.0,1995-10-03,1995-04-19,1995-04-11,2008-05-07,225.0,104.0,140.0,7.0,0.69,,,1.012,,3.9,,,,
2,2987,1948-07-06,M13,White,0.0,1.0,Lost to F/U,NaT,,C004,1995-04-30,1,46,13.0,1.0,1.0,,,,1.0,1.0,,,,,,Married,40.0,3.0,9.0,9.0,9.0,0.0,,9.0,9.0,9.0,9.0,9.0,9.0,9.0,0.0,0.0,0.0,9.0,9.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,C004,1994-01-22,1995-02-01,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,C004,,5.0,5.0,29.0,,,,,,,18.0,18.0,27.0,17.0,6.0,9.0,43.0,63.0,83.0,41.0,102.0,102.0,49.0,50.0,49.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1995-04-30,1,PET GE,,,FDG,1.041652,1.050654,0.95895,1.011525,0.838749,0.856308,1.034269,1.084596,0.824991,0.908059,1.018421,1.072219,1.013686,1.078163,0.892659,0.975225,1.005714,1.052377,1.055798,1.077215,0.76616,0.799483,0.948961,0.92688,0.954713,0.999913,0.897078,0.928615,0.929358,0.940807,0.894634,0.913217,1.046513,1.077996,1.122275,1.031164,0.655752,0.695753,0.679778,0.72395,0.643917,0.72666,1.077364,1.103518,1.139,1.144061,1.031287,1.042392,1.062189,1.126386,0.985129,1.053278,0.983193,1.059517,0.891465,0.913173,1.013879,1.028834,1.10068,1.121185,1.056544,1.11046,1.009504,1.001034,1.091594,1.175986,1.137513,1.156358,0.962605,0.971093,0.720634,0.661129,0.989777,1.007563,0.802491,0.817569,0.882095,0.874663,1.061666,1.097568,1.028554,1.057342,0.692474,0.797083,0.981305,0.981403,0.716184,0.75236,0.84685,0.918211,0.85815,0.890125,0.843529,0.782545,0.61834,0.550208,0.777356,0.75353,0.922044,0.91662,0.78843,0.812973,0.779647,0.799079,0.742169,0.753239,0.458964,0.511044,0.649182,0.647208,0.733297,0.772802,0.783774,0.844218,0.807439,0.656465,0.633002,0.755601,0.60209,1995-04-30,1.0,1995-10-09,1995-06-29,1995-11-20,2008-06-09,426.0,,,,,,,1.009,,,,,,
3,2988,1952-05-07,M14,White,1.0,2.0,Withdrawn,1996-12-10,,C005,1995-05-24,1,43,14.0,,1.0,,,,,,,,,,,Married,0.0,4.0,1.0,9.0,1.0,0.0,,9.0,1.0,0.0,0.0,0.0,0.0,9.0,1.0,0.0,9.0,9.0,9.0,1.0,1.0,0.0,10.0,1.0,9.0,9.0,9.0,9.0,9.0,9.0,9.0,1.0,9.0,9.0,9.0,9.0,,C005,1975-06-01,1975-10-06,1994-01-06,1995-01-20,NaT,NaT,NaT,NaT,NaT,NaT,C005,14.0,4.0,5.0,29.0,,,,,,,21.0,26.0,65.0,20.0,19.0,12.0,49.0,47.0,45.0,57.0,112.0,113.0,49.0,50.0,50.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1995-05-24,1,PET GE,,,FDG,1.034821,1.039842,0.95711,0.973998,0.874699,0.914346,1.043725,1.085284,0.902223,0.969151,0.976354,1.034632,1.021732,1.082787,0.94163,0.993866,1.0176,0.998749,1.019306,1.010427,0.87945,0.886314,0.990875,1.015158,0.98375,1.053904,0.978208,1.049032,0.969928,1.0179,0.965277,1.050021,1.039972,1.111283,1.048396,1.004995,0.704422,0.735949,0.728172,0.761715,0.730345,0.76034,1.162562,1.163751,1.141726,1.114489,1.036505,1.022552,1.076677,1.01374,0.98857,1.023802,1.018296,0.997294,0.893619,0.90184,1.001614,0.981603,1.034862,1.067575,1.061684,1.116328,1.022846,0.992552,1.036447,1.046177,1.075347,1.102723,0.931028,0.963797,0.789377,0.712679,1.072107,1.04625,0.8594,0.825648,0.854152,0.903498,1.142711,1.159114,1.020235,1.017692,0.767752,0.765315,0.968654,0.967685,0.750552,0.75811,0.858491,0.919917,0.826543,0.867972,0.810505,0.821699,0.688729,0.621275,0.779462,0.745774,0.907434,0.871766,0.771219,0.789817,0.735487,0.735867,0.709871,0.758054,0.474656,0.549618,0.708485,0.684529,0.670574,0.760833,0.75923,0.790948,0.799504,0.670152,0.70261,0.780022,0.579497,1995-05-24,1.0,1995-05-28,1995-05-29,NaT,2008-07-03,9.0,113.0,143.0,17.0,0.95,,,1.019,,4.1,,,,
4,2989,1956-10-21,M12,White,1.0,2.0,Active,2003-08-03,1.0,C006,1995-09-07,1,38,12.0,,1.0,,,,,,,,,,,Divorced,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,,C006,1994-02-06,1995-01-06,NaT,NaT,NaT,NaT,NaT,NaT,NaT,NaT,C006,12.0,4.0,5.0,29.0,,,,,,,22.0,24.0,66.0,15.0,13.0,10.0,50.0,52.0,46.0,48.0,101.0,101.0,50.0,50.0,50.0,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,NaT,,,...,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,1995-09-07,1,PET GE,,,FDG,1.081993,1.053682,0.993991,0.988885,0.882198,0.864892,1.072757,1.099724,0.836857,0.907481,1.083169,1.021175,1.026775,1.093111,0.908342,0.979409,1.01502,0.996002,1.012347,1.046865,0.830422,0.873265,0.959571,0.917169,0.968804,1.001755,0.913268,0.955992,0.948435,0.948203,0.917825,0.935522,1.053676,1.082309,1.027107,0.947164,0.676139,0.670107,0.699366,0.724685,0.684752,0.67776,1.110042,1.138898,1.151756,1.171133,1.020774,1.021672,1.040613,1.15645,1.016694,1.058935,1.03702,1.082083,0.906905,0.908471,1.007168,0.992547,1.122327,1.15993,1.06767,1.081106,1.04647,0.985204,1.095488,1.123302,1.15777,1.164854,0.974307,1.02719,0.767488,0.675154,1.05483,1.032991,0.840412,0.843504,0.812726,0.810041,1.05412,1.004989,1.003332,1.019788,0.677974,0.721115,0.998107,0.966638,0.681782,0.711982,0.866867,0.906848,0.789466,0.815259,0.793439,0.749445,0.642316,0.616483,0.724737,0.729856,0.87264,0.861931,0.754835,0.765121,0.728997,0.766448,0.740655,0.752368,0.45442,0.513582,0.671006,0.658535,0.689331,0.754244,0.775065,0.81185,0.793631,0.709093,0.69046,0.718021,0.607689,1995-09-07,1.0,1995-12-04,1995-09-12,1996-07-07,2008-08-01,628.0,89.0,142.0,19.0,0.86,,,1.026,,4.2,,,,


In [59]:
first_visit_data.drop_duplicates(inplace=True)

(144,)

In [60]:
#create new column for sum of distance from blast (worst 5)
first_visit_data['QEDist_sum'] = first_visit_data.loc[:, 'QEDist1':'QEDist5'].sum(axis=1)
#create new column for mean of distance from blast (worst 5)
first_visit_data['QEDist_mean'] = first_visit_data.loc[:, 'QEDist1':'QEDist5'].mean(axis=1)
#create new column for min of distance from blast (worst 5)
first_visit_data['QEDist_min'] = first_visit_data.loc[:, 'QEDist1':'QEDist5'].min(axis=1)

In [None]:
#now deal with path_RH
data_RH = pd.read_csv(path_RH)
data_RH = pd.DataFrame(data = data_RH)
print('Data shape all groups:\n', data_RH.shape, '\n')

#select only TBIID C and T (control and TBI)
data_RH = data_RH[data_RH['TBIID'].str.match(r'[CT]\d\d')]
print('Data shape only deployed controls and mTBI groups:\n', data_RH.shape, '\n')
print('Data types:\n', data_RH.info(), '\n')
data_RH.head(1)

In [None]:
#change DA = 0 to np.nan, add ratio cals
data_RH['DA'] = data_RH['DA'].replace({0: np.nan})

data_RH['da_dopa_ratio'] = data_RH['DA'] / data_RH['DOPA']
data_RH['dopac_da_ratio'] = data_RH['DOPAC'] / data_RH['DA']
data_RH['ne_dopa_ratio'] = data_RH['NE'] / data_RH['DOPA']
data_RH['dhpg_ne_ratio'] = data_RH['DHPG'] / data_RH['NE']

In [None]:
#further clean dataframe: only keep columns of interest and required metadata
first_visit_data_mult = first_visit_data_mult[['TBIID', 'Group', 'VisitSeq', 'MeasureDate', 'hrslp', 'PSQI1hr',
       'PSQI1min', 'PSQI2', 'PSQI3hr', 'PSQI3min', 'PSQI5a', 'PSQI5b',
       'PSQI5c', 'PSQI5d', 'PSQI5e', 'PSQI5f', 'PSQI5g', 'PSQI5h',
       'PSQI5i', 'PSQI5j', 'PSQI6', 'PSQI7', 'PSQI8', 'PSQI9', 'PSQIc1',
       'PSQIc2', 'PSQIc3', 'PSQIc4', 'PSQIc5', 'PSQIc6', 'PSQIc7',
       'PSQItot', 'PCL1', 'PCL2', 'PCL3', 'PCL4', 'PCL5', 'PCL6', 'PCL7',
       'PCL8', 'PCL9', 'PCL10', 'PCL11', 'PCL12', 'PCL13', 'PCL14',
       'PCL15', 'PCL16', 'PCL17', 'PCLTot', 'PHQ1', 'PHQ2', 'PHQ3',
       'PHQ4', 'PHQ5', 'PHQ6', 'PHQ7', 'PHQ8', 'PHQ9', 'PHQTot', 'auditc',
       'BNI1Im', 'BNI2Im', 'BNI3Im', 'BNI4Im', 'BNI5Im', 'BNI6Im',
       'BNI7Im', 'BNI8Im', 'BNI9Im', 'BNI10Im', 'BNI11', 'BNITotIm']]

first_visit_data_mult.columns = ['TBIID', 'Group', 'VisitSeq_mult', 'MeasureDate_mult', 'hrslp', 'PSQI1hr',
       'PSQI1min', 'PSQI2', 'PSQI3hr', 'PSQI3min', 'PSQI5a', 'PSQI5b',
       'PSQI5c', 'PSQI5d', 'PSQI5e', 'PSQI5f', 'PSQI5g', 'PSQI5h',
       'PSQI5i', 'PSQI5j', 'PSQI6', 'PSQI7', 'PSQI8', 'PSQI9', 'PSQIc1',
       'PSQIc2', 'PSQIc3', 'PSQIc4', 'PSQIc5', 'PSQIc6', 'PSQIc7',
       'PSQItot', 'PCL1', 'PCL2', 'PCL3', 'PCL4', 'PCL5', 'PCL6', 'PCL7',
       'PCL8', 'PCL9', 'PCL10', 'PCL11', 'PCL12', 'PCL13', 'PCL14',
       'PCL15', 'PCL16', 'PCL17', 'PCLTot', 'PHQ1', 'PHQ2', 'PHQ3',
       'PHQ4', 'PHQ5', 'PHQ6', 'PHQ7', 'PHQ8', 'PHQ9', 'PHQTot', 'AUDITtot',
       'BNI1Im', 'BNI2Im', 'BNI3Im', 'BNI4Im', 'BNI5Im', 'BNI6Im',
       'BNI7Im', 'BNI8Im', 'BNI9Im', 'BNI10Im', 'BNI11', 'BNITotIm']

In [None]:
#further clean dataframe: only keep columns of interest and required metadata
first_visit_data_single = first_visit_data_single[['TBIID', 'VisitSeq', 
        'ScreenDate', 'GType', 'Race', 'Hispanic', 'Handedness',
       'ScreenAge', 'Education', 'cestotal', 'PsyEduc', 'QKOIorA', 'QKOExpMil', 'QKOAllMil',
       'QKOLife', 'QBlstExp', 'QBEACRM', 'MnthSncBlst', 'QEDist_sum', 'QEDist_mean', 'QEDist_min',
       'BISAtt', 'BISMtr', 'BISNonpl', 'BISTot', 'DvpVers', 'DvpHA',
       'DvpHAAct', 'DvpHASlp', 'DvpHAMd', 'DvpHAStr', 'DvpBP', 'DvpBPAct',
       'DvpBPslp', 'DvpBPMd', 'DvpBPStr', 'CAPSTotal']]

first_visit_data_single.columns = ['TBIID', 'VisitSeq_single', 
        'MeasureDate_single', 'GType', 'Race', 'Hispanic', 'Handedness',
       'ScreenAge', 'Education', 'cestotal', 'PsyEduc', 'QKOIorA', 'QKOExpMil', 'QKOAllMil',
       'QKOLife', 'QBlstExp', 'QBEACRM', 'MnthSncBlst', 'QEDist_sum', 'QEDist_mean', 'QEDist_min',
       'BISAtt', 'BISMtr', 'BISNonpl', 'BISTot', 'DvpVers', 'DvpHA',
       'DvpHAAct', 'DvpHASlp', 'DvpHAMd', 'DvpHAStr', 'DvpBP', 'DvpBPAct',
       'DvpBPslp', 'DvpBPMd', 'DvpBPStr', 'CAPSTotal']

In [None]:
#further clean dataframe: only keep columns of interest and required metadata
first_visit_data_TBI = first_visit_data_TBI[['TBIID', 'VisitSeq', 'NSIFormDate',
                                             'tbiDizzy', 'tbiBalan', 'tbiCoord',
       'tbiHeada', 'tbiNaus', 'tbiVision', 'tbiLight', 'tbiHear',
       'tbiNoise', 'tbiTingl', 'tbiTstsml', 'tbiAppet', 'tbiConc',
       'tbiForget', 'tbiDecis', 'tbiSlow', 'tbiEnergy', 'tbiSleep',
       'tbiAnx', 'tbiSad', 'tbiIrrit', 'tbiOverw', 'tbiDisin', 'tbiWithd',
       'tbiRing', 'tbiMoods', 'tbiFight', 'tbiSpch', 'NSITot', 'TBITot']]

first_visit_data_TBI.columns = ['TBIID', 'VisitSeq_TBI', 'MeasureDate_TBI',
                                             'tbiDizzy', 'tbiBalan', 'tbiCoord',
       'tbiHeada', 'tbiNaus', 'tbiVision', 'tbiLight', 'tbiHear',
       'tbiNoise', 'tbiTingl', 'tbiTstsml', 'tbiAppet', 'tbiConc',
       'tbiForget', 'tbiDecis', 'tbiSlow', 'tbiEnergy', 'tbiSleep',
       'tbiAnx', 'tbiSad', 'tbiIrrit', 'tbiOverw', 'tbiDisin', 'tbiWithd',
       'tbiRing', 'tbiMoods', 'tbiFight', 'tbiSpch', 'NSITot', 'TBITot']

In [None]:
#further clean dataframe: only keep columns of interest and required metadata
first_visit_data_PET = first_visit_data_PET[['TBIID', 'VisitSeq', 'ScreenDate', 'PETType', 'Frontal_Mid_l',
       'Frontal_Mid_r', 'Insula_l', 'Insula_r', 'Cingulum_Ant_l',
       'Cingulum_Ant_r', 'Amygdala_l', 'Amygdala_r', 'CaudateNucl_l',
       'CaudateNucl_r', 'Putamen_l', 'Putamen_r', 'Pallidum_l',
       'Pallidum_r', 'Medulla', 'Midbrain', 'Pons']]

first_visit_data_PET.columns = ['TBIID', 'VisitSeq_PET', 'MeasureDate_PET', 'PETType', 'Frontal_Mid_l',
       'Frontal_Mid_r', 'Insula_l', 'Insula_r', 'Cingulum_Ant_l',
       'Cingulum_Ant_r', 'Amygdala_l', 'Amygdala_r', 'CaudateNucl_l',
       'CaudateNucl_r', 'Putamen_l', 'Putamen_r', 'Pallidum_l',
       'Pallidum_r', 'Medulla', 'Midbrain', 'Pons']

In [None]:
#further clean dataframe: only keep columns of interest and required metadata
first_visit_data_drug = first_visit_data_drug[['TBIID', 'VisitSeq', 'ScreenDate', 'PreCaff', 'PreETOH', 'PreNic', 'PreSleep', 
                                            'PreTHC']]

first_visit_data_drug.columns = ['TBIID', 'VisitSeq_drug', 'MeasureDate_drug', 'PreCaff', 'PreETOH', 'PreNic', 'PreSleep', 
                                            'PreTHC']

In [None]:
#further clean dataframe: only keep columns of interest and required metadata
data_RH = data_RH[['TBIID', 'AUDIT1', 'AUDIT2', 'AUDIT3', 'DOPA', 'DA',
       'DOPAC', 'da_dopa_ratio', 'dopac_da_ratio', 'NE', 'DHPG', 'ne_dopa_ratio',
       'dhpg_ne_ratio', 'CSFPROTEIN', 'LPHRate', 'LPBPSys',
       'LPBPDias', 'PTSD_YN']]

In [None]:
#check length of data tables (AUDITC has more entries)
print('The length of the multi data table is: ', first_visit_data_mult.shape)
print('The length of the single data table is: ', first_visit_data_single.shape)
print('The length of the TBI data table is: ', first_visit_data_TBI.shape)
print('The length of the RH data table is: ', data_RH.shape)
print('The length of the TBI data table is: ', first_visit_data_PET.shape)
print('The length of the drug_history data table is: ', first_visit_data_drug.shape)

In [None]:
#joing the data sets on the common key 'TBIID' - this should only keep entries that are shared across all data sets
merge_data_first = pd.merge(first_visit_data_mult, first_visit_data_single, how='inner', on='TBIID', suffixes=('_mult', '_single'), validate='one_to_one')
print(merge_data_first.shape)
merge_data_first = pd.merge(merge_data_first, first_visit_data_TBI, how='inner', on='TBIID', suffixes=('_mult', '_TBI'), validate='one_to_one')
print(merge_data_first.shape)
merge_data_first = pd.merge(merge_data_first, first_visit_data_PET, how='inner', on='TBIID', suffixes=('_mult', '_PET'), validate='one_to_one')
#print(merge_data_first.shape)
merge_data_first = pd.merge(merge_data_first, first_visit_data_drug, how='inner', on='TBIID', suffixes=('_mult', '_drug'), validate='one_to_one')
#print(merge_data_first.shape)
merge_data_first = pd.merge(merge_data_first, data_RH, how='inner', on='TBIID', suffixes=('_mult', '_AUDITC'), validate='one_to_one')
print(merge_data_first.shape)
merge_data_first.head(1)

In [None]:
#further clean dataframe: rearange columns and only keep columns of interest and required metadata    
merge_data_first = merge_data_first[['TBIID', 'Group', 
                                     'VisitSeq_mult', 'VisitSeq_single', 'VisitSeq_TBI', 'VisitSeq_PET',
                                     'MeasureDate_mult', 'MeasureDate_single', 'MeasureDate_TBI', 'MeasureDate_PET',
                                     'GType', 'Race', 'Hispanic', 'Handedness', 'ScreenAge', 'Education', 'PsyEduc', 
                                     'cestotal', 'NSITot', 'TBITot', 'CAPSTotal', 'PCLTot', 'PTSD_YN', 'PSQItot', 'PHQTot', 'BNITotIm',
                                     'BISTot', 'AUDITtot', 'AUDIT1', 'AUDIT2', 'AUDIT3', 
                                     'PreCaff', 'PreETOH', 'PreNic', 'PreSleep', 'PreTHC',
                                     'DOPA', 'DA', 'DOPAC', 'da_dopa_ratio', 'dopac_da_ratio', 
                                     'NE', 'DHPG', 'ne_dopa_ratio', 'dhpg_ne_ratio',
                                     'CSFPROTEIN', 'LPHRate', 'LPBPSys', 'LPBPDias',
                                     'QKOIorA', 'QKOExpMil', 'QKOAllMil', 'QKOLife', 'QBlstExp', 'QBEACRM', 
                                     'MnthSncBlst', 'QEDist_sum', 'QEDist_mean', 'QEDist_min',
                                     'DvpHA', 'DvpHAAct', 'DvpHASlp', 'DvpHAMd', 'DvpHAStr', 'DvpBP', 'DvpBPAct', 'DvpBPslp', 
                                     'DvpBPMd', 'DvpBPStr',
                                     'tbiDizzy', 'tbiBalan', 'tbiCoord', 'tbiHeada', 'tbiNaus', 'tbiVision', 'tbiLight', 
                                     'tbiHear', 'tbiNoise', 'tbiTingl', 'tbiTstsml', 'tbiAppet', 'tbiConc', 'tbiForget', 
                                     'tbiDecis', 'tbiSlow', 'tbiEnergy', 'tbiSleep', 'tbiAnx', 'tbiSad', 'tbiIrrit', 'tbiOverw',
                                     'tbiDisin', 'tbiWithd', 'tbiRing', 'tbiMoods', 'tbiFight', 'tbiSpch', 
                                     'hrslp', 'PSQI1hr', 'PSQI1min', 'PSQI2', 'PSQI3hr', 'PSQI3min', 'PSQI5a', 
                                     'PSQI5b', 'PSQI5c', 'PSQI5d', 'PSQI5e', 'PSQI5f', 'PSQI5g', 'PSQI5h', 'PSQI5i', 'PSQI5j', 
                                     'PSQI6', 'PSQI7', 'PSQI8', 'PSQI9', 'PSQIc1', 'PSQIc2', 'PSQIc3', 'PSQIc4', 'PSQIc5', 
                                     'PSQIc6', 'PSQIc7', 
                                     'PCL1', 'PCL2', 'PCL3', 'PCL4', 'PCL5', 'PCL6', 'PCL7', 'PCL8', 'PCL9',
                                     'PCL10', 'PCL11', 'PCL12', 'PCL13', 'PCL14', 'PCL15', 'PCL16', 'PCL17',  
                                     'PHQ1', 'PHQ2', 'PHQ3', 'PHQ4', 'PHQ5', 'PHQ6', 'PHQ7', 'PHQ8', 'PHQ9', 
                                     'BNI1Im', 'BNI2Im', 'BNI3Im', 'BNI4Im', 'BNI5Im', 'BNI6Im', 'BNI7Im', 'BNI8Im', 'BNI9Im', 
                                     'BNI10Im', 'BNI11',
                                     'BISAtt', 'BISMtr', 'BISNonpl',  
                                     'PETType', 'Frontal_Mid_l', 'Frontal_Mid_r', 'Insula_l', 'Insula_r', 'Cingulum_Ant_l', 
                                     'Cingulum_Ant_r', 'Amygdala_l', 'Amygdala_r', 'CaudateNucl_l', 'CaudateNucl_r', 
                                     'Putamen_l', 'Putamen_r', 'Pallidum_l', 'Pallidum_r', 'Medulla', 'Midbrain', 'Pons']]

print(merge_data_first.shape)
merge_data_first.head(5)

In [None]:
merge_data_first.to_csv('merge_data_first.csv')

In [None]:
#-999 and 'None' are missing values so replace with 'Nan'
merge_data_first = merge_data_first.replace({-999.0: np.nan, 'None': np.nan})

In [None]:
#explore missing data
print(merge_data_first.shape)
print(merge_data_first[merge_data_first['Group'] == 'T'].shape)
print(merge_data_first[merge_data_first['Group'] == 'T'].isna().sum().sort_values(ascending=False))
merge_data_first[merge_data_first['Group'] == 'T'].isna().sum().sort_values(ascending=False).plot(kind = 'hist')
plt.show()

In [None]:
dep_vars = ['GType', 'Race', 'Hispanic', 'Handedness', 'ScreenAge',
       'Education', 'PsyEduc', 'cestotal', 'NSITot', 'TBITot',
       'CAPSTotal', 'PCLTot', 'PTSD_YN', 'PSQItot', 'PHQTot', 'BNITotIm',
       'BISTot', 'AUDITtot', 'AUDIT1', 'AUDIT2', 'AUDIT3', 'PreCaff', 'PreETOH',
       'PreNic', 'PreSleep', 'PreTHC','DOPA', 'DA', 'DOPAC', 'da_dopa_ratio', 'dopac_da_ratio', 
       'NE', 'DHPG', 'ne_dopa_ratio', 'dhpg_ne_ratio',
    'CSFPROTEIN', 'LPHRate', 'LPBPSys', 'LPBPDias', 'QKOIorA', 'QKOExpMil', 'QKOAllMil', 'QKOLife', 'QBlstExp',
       'QBEACRM', 'MnthSncBlst', 'QEDist_sum', 'QEDist_mean', 'DvpHA',
       'DvpHAAct', 'DvpHASlp', 'DvpHAMd', 'DvpHAStr', 'DvpBP', 'DvpBPAct',
       'DvpBPslp', 'DvpBPMd', 'DvpBPStr', 'tbiDizzy', 'tbiBalan',
       'tbiCoord', 'tbiHeada', 'tbiNaus', 'tbiVision', 'tbiLight',
       'tbiHear', 'tbiNoise', 'tbiTingl', 'tbiTstsml', 'tbiAppet',
       'tbiConc', 'tbiForget', 'tbiDecis', 'tbiSlow', 'tbiEnergy',
       'tbiSleep', 'tbiAnx', 'tbiSad', 'tbiIrrit', 'tbiOverw', 'tbiDisin',
       'tbiWithd', 'tbiRing', 'tbiMoods', 'tbiFight', 'tbiSpch', 'hrslp',
       'PSQI1hr', 'PSQI1min', 'PSQI2', 'PSQI3hr', 'PSQI3min', 'PSQI5a',
       'PSQI5b', 'PSQI5c', 'PSQI5d', 'PSQI5e', 'PSQI5f', 'PSQI5g',
       'PSQI5h', 'PSQI5i', 'PSQI5j', 'PSQI6', 'PSQI7', 'PSQI8', 'PSQI9',
       'PSQIc1', 'PSQIc2', 'PSQIc3', 'PSQIc4', 'PSQIc5', 'PSQIc6',
       'PSQIc7', 'PCL1', 'PCL2', 'PCL3', 'PCL4', 'PCL5', 'PCL6', 'PCL7',
       'PCL8', 'PCL9', 'PCL10', 'PCL11', 'PCL12', 'PCL13', 'PCL14',
       'PCL15', 'PCL16', 'PCL17', 'PHQ1', 'PHQ2', 'PHQ3', 'PHQ4', 'PHQ5',
       'PHQ6', 'PHQ7', 'PHQ8', 'PHQ9', 'BNI1Im', 'BNI2Im', 'BNI3Im',
       'BNI4Im', 'BNI5Im', 'BNI6Im', 'BNI7Im', 'BNI8Im', 'BNI9Im',
       'BNI10Im', 'BNI11', 'BISAtt', 'BISMtr', 'BISNonpl', 'PETType',
       'Frontal_Mid_l', 'Frontal_Mid_r', 'Insula_l', 'Insula_r',
       'Cingulum_Ant_l', 'Cingulum_Ant_r', 'Amygdala_l', 'Amygdala_r',
       'CaudateNucl_l', 'CaudateNucl_r', 'Putamen_l', 'Putamen_r',
       'Pallidum_l', 'Pallidum_r', 'Medulla', 'Midbrain', 'Pons']

In [None]:
for param in dep_vars:
    print(param)
    plt.figure(figsize=(10,5))
    try:
        merge_data_first.groupby(['Group'])[param].mean().plot(kind='bar', yerr=merge_data_first.groupby(['Group'])[param].sem())
        plt.ylabel([param])
    #plt.savefig(str(param + '.png'))
        plt.show()
    except:
        pass

In [None]:
corr = merge_data_first.groupby('Group').corr()
#fig, ax = plt.subplots(figsize=(40, 40))
#sns.heatmap(corr, center=0)

In [None]:
AUDITC_cols = ['AUDITtot',
       'AUDIT1', 'AUDIT2', 'AUDIT3']

In [None]:
merge_data_first = merge_data_first[merge_data_first["TBIID"] != 'C010']
merge_data_first = merge_data_first[merge_data_first["TBIID"] != 'T080']

In [None]:
#corr.to_csv('corr.csv')
merge_data_first.to_csv('merge_data_first.csv')

In [None]:
AUDIT_tot_counts = merge_data_first.groupby('Group')['AUDITtot'].value_counts()
AUDIT_1_counts = merge_data_first.groupby('Group')['AUDIT1'].value_counts()
AUDIT_2_counts = merge_data_first.groupby('Group')['AUDIT2'].value_counts()
AUDIT_3_counts = merge_data_first.groupby('Group')['AUDIT3'].value_counts()
AUDIT_tot_counts.to_csv('AUDIT_tot_counts.csv')
AUDIT_1_counts.to_csv('AUDIT_1_counts.csv')
AUDIT_2_counts.to_csv('AUDIT_2_counts.csv')
AUDIT_3_counts.to_csv('AUDIT_3_counts.csv')

In [None]:
AUDIT_tot_perc = merge_data_first.groupby('Group')['AUDITtot'].value_counts() / merge_data_first.groupby('Group')['AUDITtot'].count()
AUDIT_1_perc = merge_data_first.groupby('Group')['AUDIT1'].value_counts() / merge_data_first.groupby('Group')['AUDIT1'].count()
AUDIT_2_perc = merge_data_first.groupby('Group')['AUDIT2'].value_counts() / merge_data_first.groupby('Group')['AUDIT2'].count()
AUDIT_3_perc = merge_data_first.groupby('Group')['AUDIT3'].value_counts() / merge_data_first.groupby('Group')['AUDIT3'].count()
AUDIT_tot_perc.to_csv('AUDIT_tot_perc.csv')
AUDIT_1_perc.to_csv('AUDIT_1_perc.csv')
AUDIT_2_perc.to_csv('AUDIT_2_perc.csv')
AUDIT_3_perc.to_csv('AUDIT_3_perc.csv')