In [None]:
#getting and working with data
import pandas as pd
import numpy as np
import re
import os
import datetime as dt

#visualizing results
%matplotlib inline
import seaborn as sns
import matplotlib.pyplot as plt
#import yellowbrick as yb

pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

import warnings; warnings.simplefilter('ignore')
np.set_printoptions(suppress=True)

In [None]:
#create file paths for required data (some from TBIFreeze_190614, individual questions quiried by RCH)

#path for excel sheet of multi measurements 
path_multi = 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Clinical projects/PeskindTBI/freeze_190614/multi.xlsx'

#path for excel sheet of single measurements 
path_single = 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Clinical projects/PeskindTBI/freeze_190614/single.xlsx'

#path for excel sheet of TBI measurements 
path_TBI = 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Clinical projects/PeskindTBI/freeze_190614/TBI.xlsx'

#path for excel sheet of PET measurements 
path_PET = 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Clinical projects/PeskindTBI/freeze_190614/PET.xlsx'

#path for csv of data pulled by RCH (AUDITC individual questions, CSF monoamines)
path_RH = 'C:/Users/Schindler/Documents/Schindler_Lab/Data/Clinical projects/PeskindTBI/RHdatapull/RHPull.csv'

In [None]:
#we are going to deal with path_multi first
data_mult = pd.read_excel(path_multi)
data_mult = pd.DataFrame(data = data_mult)
print('Data shape all groups:\n', data_mult.shape, '\n')

#select only TBIID C and T (control and TBI)
data_mult = data_mult[data_mult['TBIID'].str.match(r'[CT]\d\d')]
print('Data shape only deployed controls and mTBI groups:\n', data_mult.shape, '\n')
print('Data types:\n', data_mult.info(), '\n')
data_mult.head(1)

In [None]:
# create new data frame containing only the first visit record (based on 'MeasureDate' column)
#add new column with group ID (C = deployed controls, T = blast mTBI)

first_visit_data_mult = pd.DataFrame()

participants = data_mult['TBIID'].unique()

for part in participants:
    dates = data_mult.loc[data_mult['TBIID'] == part, ['MeasureDate']].values
    min_date = dates.min()
    
    first_date = data_mult[(data_mult['TBIID'] == part) & (data_mult['MeasureDate'] == min_date)]
    first_date['Group'] = part[0]
    first_visit_data_mult = first_visit_data_mult.append(first_date)

#reset indexes
first_visit_data_mult = first_visit_data_mult.reset_index(drop=True)

print(len(participants))
print(first_visit_data_mult.shape)
first_visit_data_mult.head(1)

In [None]:
#now deal with path_single
data_single = pd.read_excel(path_single)
data_single = pd.DataFrame(data = data_single)
print('Data shape:\n', data_single.shape, '\n')

#select only TBIID C and T (control and TBI)
data_single = data_single[data_single['TBIID'].str.match(r'[CT]\d\d')]
print('Data shape:\n', data_single.shape, '\n')
print('Data types:\n', data_single.info(), '\n')
data_single.head(1)

In [None]:
#create new data frame containing only the first visit record (based on 'ScreenDate' column)
#add new column with group ID (C = deployed controls, T = blast mTBI)

first_visit_data_single = pd.DataFrame()

participants = data_single['TBIID'].unique()

for part in participants:
    dates = data_single.loc[data_single['TBIID'] == part, ['ScreenDate']].values
    min_date = dates.min()
    
    first_date = data_single[(data_single['TBIID'] == part) & (data_single['ScreenDate'] == min_date)]
    first_date['Group'] = part[0]
    first_visit_data_single = first_visit_data_single.append(first_date)
    
#reset indexes
first_visit_data_single = first_visit_data_single.reset_index(drop=True)

print(len(participants))
print(first_visit_data_single.shape)
first_visit_data_single.head(1)

In [None]:
#create new column for sum of distance from blast (worst 5)
first_visit_data_single['QEDist_sum'] = first_visit_data_single.loc[:, 'QEDist1':'QEDist5'].sum(axis=1)
#create new column for mean of distance from blast (worst 5)
first_visit_data_single['QEDist_mean'] = first_visit_data_single.loc[:, 'QEDist1':'QEDist5'].mean(axis=1)

In [None]:
#now deal with path_TBI
data_TBI = pd.read_excel(path_TBI)
data_TBI = pd.DataFrame(data = data_TBI)
print('Data shape all groups:\n', data_TBI.shape, '\n')

#select only TBIID C and T (control and TBI)
data_TBI = data_TBI[data_TBI['TBIID'].str.match(r'[CT]\d\d')]
print('Data shape only deployed controls and mTBI groups:\n', data_TBI.shape, '\n')
print('Data types:\n', data_TBI.info(), '\n')
data_TBI.head(1)

In [None]:
#create new data frame containing only the first visit record (based on 'NSIFormDate' column)
#add new column with group ID (C = deployed controls, T = blast mTBI)

first_visit_data_TBI = pd.DataFrame()

participants = data_TBI['TBIID'].unique()

for part in participants:
    dates = data_TBI.loc[data_TBI['TBIID'] == part, ['NSIFormDate']].values
    min_date = dates.min()
    
    first_date = data_TBI[(data_TBI['TBIID'] == part) & (data_TBI['NSIFormDate'] == min_date)]
    first_date['Group'] = part[0]
    first_visit_data_TBI = first_visit_data_TBI.append(first_date)
    
#reset indexes
first_visit_data_TBI = first_visit_data_TBI.reset_index(drop=True)

print(len(participants))
print(first_visit_data_TBI.shape)
first_visit_data_TBI.head(1)

In [None]:
#now deal with path_PET
data_PET = pd.read_excel(path_PET)
data_PET = pd.DataFrame(data = data_PET)
print('Data shape all groups:\n', data_PET.shape, '\n')

#select only TBIID C and T (control and TBI)
data_PET = data_PET[data_PET['TBIID'].str.match(r'[CT]\d\d')]
print('Data shape only deployed controls and mTBI groups:\n', data_PET.shape, '\n')
print('Data types:\n', data_PET.info(), '\n')
data_PET.head(1)

In [None]:
#create new data frame containing only the first visit record (based on 'NSIFormDate' column)
#add new column with group ID (C = deployed controls, T = blast mTBI)

first_visit_data_PET = pd.DataFrame()

participants = data_PET['TBIID'].unique()

for part in participants:
    dates = data_PET.loc[data_PET['TBIID'] == part, ['ScreenDate']].values
    min_date = dates.min()
    
    first_date = data_PET[(data_PET['TBIID'] == part) & (data_PET['ScreenDate'] == min_date)]
    first_date['Group'] = part[0]
    first_visit_data_PET = first_visit_data_PET.append(first_date)
    
#reset indexes
first_visit_data_PET = first_visit_data_PET.reset_index(drop=True)

print(len(participants))
print(first_visit_data_PET.shape)
first_visit_data_PET.head(1)

In [None]:
#now deal with path_RH
data_RH = pd.read_csv(path_RH)
data_RH = pd.DataFrame(data = data_RH)
print('Data shape all groups:\n', data_RH.shape, '\n')

#select only TBIID C and T (control and TBI)
data_RH = data_RH[data_RH['TBIID'].str.match(r'[CT]\d\d')]
print('Data shape only deployed controls and mTBI groups:\n', data_RH.shape, '\n')
print('Data types:\n', data_RH.info(), '\n')
data_RH.head(1)

In [None]:
#change DA = 0 to np.nan, add dopac/da ratio
data_RH['DA'] = data_RH['DA'].replace({0: np.nan})
data_RH['dopac_da_ratio'] = data_RH['DOPA'] / data_RH['DA']

In [None]:
#further clean dataframe: only keep columns of interest and required metadata
first_visit_data_mult = first_visit_data_mult[['TBIID', 'Group', 'VisitSeq', 'MeasureDate', 'hrslp', 'PSQI1hr',
       'PSQI1min', 'PSQI2', 'PSQI3hr', 'PSQI3min', 'PSQI5a', 'PSQI5b',
       'PSQI5c', 'PSQI5d', 'PSQI5e', 'PSQI5f', 'PSQI5g', 'PSQI5h',
       'PSQI5i', 'PSQI5j', 'PSQI6', 'PSQI7', 'PSQI8', 'PSQI9', 'PSQIc1',
       'PSQIc2', 'PSQIc3', 'PSQIc4', 'PSQIc5', 'PSQIc6', 'PSQIc7',
       'PSQItot', 'PCL1', 'PCL2', 'PCL3', 'PCL4', 'PCL5', 'PCL6', 'PCL7',
       'PCL8', 'PCL9', 'PCL10', 'PCL11', 'PCL12', 'PCL13', 'PCL14',
       'PCL15', 'PCL16', 'PCL17', 'PCLTot', 'PHQ1', 'PHQ2', 'PHQ3',
       'PHQ4', 'PHQ5', 'PHQ6', 'PHQ7', 'PHQ8', 'PHQ9', 'PHQTot', 'auditc',
       'BNI1Im', 'BNI2Im', 'BNI3Im', 'BNI4Im', 'BNI5Im', 'BNI6Im',
       'BNI7Im', 'BNI8Im', 'BNI9Im', 'BNI10Im', 'BNI11', 'BNITotIm']]

first_visit_data_mult.columns = ['TBIID', 'Group', 'VisitSeq_mult', 'MeasureDate_mult', 'hrslp', 'PSQI1hr',
       'PSQI1min', 'PSQI2', 'PSQI3hr', 'PSQI3min', 'PSQI5a', 'PSQI5b',
       'PSQI5c', 'PSQI5d', 'PSQI5e', 'PSQI5f', 'PSQI5g', 'PSQI5h',
       'PSQI5i', 'PSQI5j', 'PSQI6', 'PSQI7', 'PSQI8', 'PSQI9', 'PSQIc1',
       'PSQIc2', 'PSQIc3', 'PSQIc4', 'PSQIc5', 'PSQIc6', 'PSQIc7',
       'PSQItot', 'PCL1', 'PCL2', 'PCL3', 'PCL4', 'PCL5', 'PCL6', 'PCL7',
       'PCL8', 'PCL9', 'PCL10', 'PCL11', 'PCL12', 'PCL13', 'PCL14',
       'PCL15', 'PCL16', 'PCL17', 'PCLTot', 'PHQ1', 'PHQ2', 'PHQ3',
       'PHQ4', 'PHQ5', 'PHQ6', 'PHQ7', 'PHQ8', 'PHQ9', 'PHQTot', 'auditc',
       'BNI1Im', 'BNI2Im', 'BNI3Im', 'BNI4Im', 'BNI5Im', 'BNI6Im',
       'BNI7Im', 'BNI8Im', 'BNI9Im', 'BNI10Im', 'BNI11', 'BNITotIm']

In [None]:
#further clean dataframe: only keep columns of interest and required metadata
first_visit_data_single = first_visit_data_single[['TBIID', 'VisitSeq', 
        'ScreenDate', 'GType', 'Race', 'Hispanic', 'Handedness',
       'ScreenAge', 'Education', 'cestotal', 'PsyEduc', 'QKOIorA', 'QKOExpMil', 'QKOAllMil',
       'QKOLife', 'QBlstExp', 'QBEACRM', 'MnthSncBlst', 'QEDist_sum', 'QEDist_mean',
       'BISAtt', 'BISMtr', 'BISNonpl', 'BISTot', 'DvpVers', 'DvpHA',
       'DvpHAAct', 'DvpHASlp', 'DvpHAMd', 'DvpHAStr', 'DvpBP', 'DvpBPAct',
       'DvpBPslp', 'DvpBPMd', 'DvpBPStr', 'CAPSTotal']]

first_visit_data_single.columns = ['TBIID', 'VisitSeq_single', 
        'MeasureDate_single', 'GType', 'Race', 'Hispanic', 'Handedness',
       'ScreenAge', 'Education', 'cestotal', 'PsyEduc', 'QKOIorA', 'QKOExpMil', 'QKOAllMil',
       'QKOLife', 'QBlstExp', 'QBEACRM', 'MnthSncBlst', 'QEDist_sum', 'QEDist_mean',
       'BISAtt', 'BISMtr', 'BISNonpl', 'BISTot', 'DvpVers', 'DvpHA',
       'DvpHAAct', 'DvpHASlp', 'DvpHAMd', 'DvpHAStr', 'DvpBP', 'DvpBPAct',
       'DvpBPslp', 'DvpBPMd', 'DvpBPStr', 'CAPSTotal']

In [None]:
#further clean dataframe: only keep columns of interest and required metadata
first_visit_data_TBI = first_visit_data_TBI[['TBIID', 'VisitSeq', 'NSIFormDate',
                                             'tbiDizzy', 'tbiBalan', 'tbiCoord',
       'tbiHeada', 'tbiNaus', 'tbiVision', 'tbiLight', 'tbiHear',
       'tbiNoise', 'tbiTingl', 'tbiTstsml', 'tbiAppet', 'tbiConc',
       'tbiForget', 'tbiDecis', 'tbiSlow', 'tbiEnergy', 'tbiSleep',
       'tbiAnx', 'tbiSad', 'tbiIrrit', 'tbiOverw', 'tbiDisin', 'tbiWithd',
       'tbiRing', 'tbiMoods', 'tbiFight', 'tbiSpch', 'NSITot', 'TBITot']]

first_visit_data_TBI.columns = ['TBIID', 'VisitSeq_TBI', 'MeasureDate_TBI',
                                             'tbiDizzy', 'tbiBalan', 'tbiCoord',
       'tbiHeada', 'tbiNaus', 'tbiVision', 'tbiLight', 'tbiHear',
       'tbiNoise', 'tbiTingl', 'tbiTstsml', 'tbiAppet', 'tbiConc',
       'tbiForget', 'tbiDecis', 'tbiSlow', 'tbiEnergy', 'tbiSleep',
       'tbiAnx', 'tbiSad', 'tbiIrrit', 'tbiOverw', 'tbiDisin', 'tbiWithd',
       'tbiRing', 'tbiMoods', 'tbiFight', 'tbiSpch', 'NSITot', 'TBITot']

In [None]:
#further clean dataframe: only keep columns of interest and required metadata
first_visit_data_PET = first_visit_data_PET[['TBIID', 'VisitSeq', 'ScreenDate', 'PETType', 'Frontal_Mid_l',
       'Frontal_Mid_r', 'Insula_l', 'Insula_r', 'Cingulum_Ant_l',
       'Cingulum_Ant_r', 'Amygdala_l', 'Amygdala_r', 'CaudateNucl_l',
       'CaudateNucl_r', 'Putamen_l', 'Putamen_r', 'Pallidum_l',
       'Pallidum_r', 'Medulla', 'Midbrain', 'Pons']]

first_visit_data_PET.columns = ['TBIID', 'VisitSeq_PET', 'MeasureDate_PET', 'PETType', 'Frontal_Mid_l',
       'Frontal_Mid_r', 'Insula_l', 'Insula_r', 'Cingulum_Ant_l',
       'Cingulum_Ant_r', 'Amygdala_l', 'Amygdala_r', 'CaudateNucl_l',
       'CaudateNucl_r', 'Putamen_l', 'Putamen_r', 'Pallidum_l',
       'Pallidum_r', 'Medulla', 'Midbrain', 'Pons']

In [None]:
#further clean dataframe: only keep columns of interest and required metadata
data_RH = data_RH[['TBIID', 'AUDIT1', 'AUDIT2', 'AUDIT3', 'AUDITtot',
       'PTSD_YN', 'DA', 'DOPA', 'dopac_da_ratio', 'NE']]

In [None]:
#check length of data tables (AUDITC has more entries)
print('The length of the multi data table is: ', first_visit_data_mult.shape)
print('The length of the single data table is: ', first_visit_data_single.shape)
print('The length of the TBI data table is: ', first_visit_data_TBI.shape)
print('The length of the RH data table is: ', data_RH.shape)
print('The length of the TBI data table is: ', first_visit_data_PET.shape)

In [None]:
#joing the data sets on the common key 'TBIID' - this should only keep entries that are shared across all data sets
merge_data_first = pd.merge(first_visit_data_mult, first_visit_data_single, how='inner', on='TBIID', suffixes=('_mult', '_single'), validate='one_to_one')
print(merge_data_first.shape)
merge_data_first = pd.merge(merge_data_first, first_visit_data_TBI, how='inner', on='TBIID', suffixes=('_mult', '_TBI'), validate='one_to_one')
print(merge_data_first.shape)
merge_data_first = pd.merge(merge_data_first, first_visit_data_PET, how='inner', on='TBIID', suffixes=('_mult', '_PET'), validate='one_to_one')
#print(merge_data_first.shape)
merge_data_first = pd.merge(merge_data_first, data_RH, how='inner', on='TBIID', suffixes=('_mult', '_AUDITC'), validate='one_to_one')
print(merge_data_first.shape)
merge_data_first.head(1)

In [None]:
#further clean dataframe: rearange columns and only keep columns of interest and required metadata

merge_data_first = merge_data_first[['TBIID', 'Group', 
                                     'VisitSeq_mult', 'VisitSeq_single', 'VisitSeq_TBI', 'VisitSeq_PET',
                                     'MeasureDate_mult', 'MeasureDate_single', 'MeasureDate_TBI', 'MeasureDate_PET',
                                     'GType', 'Race', 'Hispanic', 'Handedness', 'ScreenAge', 'Education', 'PsyEduc', 
                                     'cestotal', 'NSITot', 'TBITot', 'CAPSTotal', 'PCLTot', 'PTSD_YN', 'PSQItot', 'PHQTot', 'BNITotIm',
                                     'BISTot', 'AUDITtot', 'AUDIT1', 'AUDIT2', 'AUDIT3', 
                                     'DA', 'DOPA', 'dopac_da_ratio', 'NE',
                                     'QKOIorA', 'QKOExpMil', 'QKOAllMil', 'QKOLife', 'QBlstExp', 'QBEACRM', 
                                     'MnthSncBlst', 'QEDist_sum', 'QEDist_mean',
                                     'DvpHA', 'DvpHAAct', 'DvpHASlp', 'DvpHAMd', 'DvpHAStr', 'DvpBP', 'DvpBPAct', 'DvpBPslp', 
                                     'DvpBPMd', 'DvpBPStr',
                                     'tbiDizzy', 'tbiBalan', 'tbiCoord', 'tbiHeada', 'tbiNaus', 'tbiVision', 'tbiLight', 
                                     'tbiHear', 'tbiNoise', 'tbiTingl', 'tbiTstsml', 'tbiAppet', 'tbiConc', 'tbiForget', 
                                     'tbiDecis', 'tbiSlow', 'tbiEnergy', 'tbiSleep', 'tbiAnx', 'tbiSad', 'tbiIrrit', 'tbiOverw',
                                     'tbiDisin', 'tbiWithd', 'tbiRing', 'tbiMoods', 'tbiFight', 'tbiSpch', 
                                     'hrslp', 'PSQI1hr', 'PSQI1min', 'PSQI2', 'PSQI3hr', 'PSQI3min', 'PSQI5a', 
                                     'PSQI5b', 'PSQI5c', 'PSQI5d', 'PSQI5e', 'PSQI5f', 'PSQI5g', 'PSQI5h', 'PSQI5i', 'PSQI5j', 
                                     'PSQI6', 'PSQI7', 'PSQI8', 'PSQI9', 'PSQIc1', 'PSQIc2', 'PSQIc3', 'PSQIc4', 'PSQIc5', 
                                     'PSQIc6', 'PSQIc7', 
                                     'PCL1', 'PCL2', 'PCL3', 'PCL4', 'PCL5', 'PCL6', 'PCL7', 'PCL8', 'PCL9',
                                     'PCL10', 'PCL11', 'PCL12', 'PCL13', 'PCL14', 'PCL15', 'PCL16', 'PCL17',  
                                     'PHQ1', 'PHQ2', 'PHQ3', 'PHQ4', 'PHQ5', 'PHQ6', 'PHQ7', 'PHQ8', 'PHQ9', 
                                     'BNI1Im', 'BNI2Im', 'BNI3Im', 'BNI4Im', 'BNI5Im', 'BNI6Im', 'BNI7Im', 'BNI8Im', 'BNI9Im', 
                                     'BNI10Im', 'BNI11',
                                     'BISAtt', 'BISMtr', 'BISNonpl',  
                                     'PETType', 'Frontal_Mid_l', 'Frontal_Mid_r', 'Insula_l', 'Insula_r', 'Cingulum_Ant_l', 
                                     'Cingulum_Ant_r', 'Amygdala_l', 'Amygdala_r', 'CaudateNucl_l', 'CaudateNucl_r', 
                                     'Putamen_l', 'Putamen_r', 'Pallidum_l', 'Pallidum_r', 'Medulla', 'Midbrain', 'Pons']]

print(merge_data_first.shape)
merge_data_first.head(5)

In [None]:
#-999 and 'None' are missing values so replace with 'Nan'
merge_data_first = merge_data_first.replace({-999.0: np.nan, 'None': np.nan})

In [None]:
#explore missing data
print(merge_data_first.shape)
print(merge_data_first[merge_data_first['Group'] == 'T'].shape)
print(merge_data_first[merge_data_first['Group'] == 'T'].isna().sum().sort_values(ascending=False))
merge_data_first[merge_data_first['Group'] == 'T'].isna().sum().sort_values(ascending=False).plot(kind = 'hist')
plt.show()

In [None]:
dep_vars = ['GType', 'Race', 'Hispanic', 'Handedness', 'ScreenAge',
       'Education', 'PsyEduc', 'cestotal', 'NSITot', 'TBITot',
       'CAPSTotal', 'PCLTot', 'PTSD_YN', 'PSQItot', 'PHQTot', 'BNITotIm',
       'BISTot', 'AUDITtot', 'AUDIT1', 'AUDIT2', 'AUDIT3', 'DA', 'DOPA',
       'NE', 'QKOIorA', 'QKOExpMil', 'QKOAllMil', 'QKOLife', 'QBlstExp',
       'QBEACRM', 'MnthSncBlst', 'QEDist_sum', 'QEDist_mean', 'DvpHA',
       'DvpHAAct', 'DvpHASlp', 'DvpHAMd', 'DvpHAStr', 'DvpBP', 'DvpBPAct',
       'DvpBPslp', 'DvpBPMd', 'DvpBPStr', 'tbiDizzy', 'tbiBalan',
       'tbiCoord', 'tbiHeada', 'tbiNaus', 'tbiVision', 'tbiLight',
       'tbiHear', 'tbiNoise', 'tbiTingl', 'tbiTstsml', 'tbiAppet',
       'tbiConc', 'tbiForget', 'tbiDecis', 'tbiSlow', 'tbiEnergy',
       'tbiSleep', 'tbiAnx', 'tbiSad', 'tbiIrrit', 'tbiOverw', 'tbiDisin',
       'tbiWithd', 'tbiRing', 'tbiMoods', 'tbiFight', 'tbiSpch', 'hrslp',
       'PSQI1hr', 'PSQI1min', 'PSQI2', 'PSQI3hr', 'PSQI3min', 'PSQI5a',
       'PSQI5b', 'PSQI5c', 'PSQI5d', 'PSQI5e', 'PSQI5f', 'PSQI5g',
       'PSQI5h', 'PSQI5i', 'PSQI5j', 'PSQI6', 'PSQI7', 'PSQI8', 'PSQI9',
       'PSQIc1', 'PSQIc2', 'PSQIc3', 'PSQIc4', 'PSQIc5', 'PSQIc6',
       'PSQIc7', 'PCL1', 'PCL2', 'PCL3', 'PCL4', 'PCL5', 'PCL6', 'PCL7',
       'PCL8', 'PCL9', 'PCL10', 'PCL11', 'PCL12', 'PCL13', 'PCL14',
       'PCL15', 'PCL16', 'PCL17', 'PHQ1', 'PHQ2', 'PHQ3', 'PHQ4', 'PHQ5',
       'PHQ6', 'PHQ7', 'PHQ8', 'PHQ9', 'BNI1Im', 'BNI2Im', 'BNI3Im',
       'BNI4Im', 'BNI5Im', 'BNI6Im', 'BNI7Im', 'BNI8Im', 'BNI9Im',
       'BNI10Im', 'BNI11', 'BISAtt', 'BISMtr', 'BISNonpl', 'PETType',
       'Frontal_Mid_l', 'Frontal_Mid_r', 'Insula_l', 'Insula_r',
       'Cingulum_Ant_l', 'Cingulum_Ant_r', 'Amygdala_l', 'Amygdala_r',
       'CaudateNucl_l', 'CaudateNucl_r', 'Putamen_l', 'Putamen_r',
       'Pallidum_l', 'Pallidum_r', 'Medulla', 'Midbrain', 'Pons']

In [None]:
for param in dep_vars:
    print(param)
    plt.figure(figsize=(10,5))
    try:
        merge_data_first.groupby(['Group'])[param].mean().plot(kind='bar', yerr=merge_data_first.groupby(['Group'])[param].sem())
        plt.ylabel([param])
    #plt.savefig(str(param + '.png'))
        plt.show()
    except:
        pass

In [None]:
corr = merge_data_first.groupby('Group').corr()
#fig, ax = plt.subplots(figsize=(40, 40))
#sns.heatmap(corr, center=0)

In [None]:
AUDITC_cols = ['AUDITtot',
       'AUDIT1', 'AUDIT2', 'AUDIT3']

In [None]:
merge_data_first = merge_data_first[merge_data_first["TBIID"] != 'C010']
merge_data_first = merge_data_first[merge_data_first["TBIID"] != 'T080']

In [None]:
#corr.to_csv('corr.csv')
merge_data_first.to_csv('merge_data_first.csv')

In [None]:
AUDIT_tot_counts = merge_data_first.groupby('Group')['AUDITtot'].value_counts()
AUDIT_1_counts = merge_data_first.groupby('Group')['AUDIT1'].value_counts()
AUDIT_2_counts = merge_data_first.groupby('Group')['AUDIT2'].value_counts()
AUDIT_3_counts = merge_data_first.groupby('Group')['AUDIT3'].value_counts()
AUDIT_tot_counts.to_csv('AUDIT_tot_counts.csv')
AUDIT_1_counts.to_csv('AUDIT_1_counts.csv')
AUDIT_2_counts.to_csv('AUDIT_2_counts.csv')
AUDIT_3_counts.to_csv('AUDIT_3_counts.csv')

In [None]:
AUDIT_tot_perc = merge_data_first.groupby('Group')['AUDITtot'].value_counts() / merge_data_first.groupby('Group')['AUDITtot'].count()
AUDIT_1_perc = merge_data_first.groupby('Group')['AUDIT1'].value_counts() / merge_data_first.groupby('Group')['AUDIT1'].count()
AUDIT_2_perc = merge_data_first.groupby('Group')['AUDIT2'].value_counts() / merge_data_first.groupby('Group')['AUDIT2'].count()
AUDIT_3_perc = merge_data_first.groupby('Group')['AUDIT3'].value_counts() / merge_data_first.groupby('Group')['AUDIT3'].count()
AUDIT_tot_perc.to_csv('AUDIT_tot_perc.csv')
AUDIT_1_perc.to_csv('AUDIT_1_perc.csv')
AUDIT_2_perc.to_csv('AUDIT_2_perc.csv')
AUDIT_3_perc.to_csv('AUDIT_3_perc.csv')