In [67]:
import glob
from os.path import join as oj

import numpy as np
import os
import pandas as pd
from tqdm import tqdm
from typing import Dict
import functools

import rulevetting
from rulevetting.api import validation, util as api_util
from rulevetting.projects.csi_pecarn.dataset1 import Dataset
from rulevetting import DATA_PATH


In [99]:
raw_data_path = oj(rulevetting.DATA_PATH, Dataset().get_dataset_id(), 'raw')
fnames = sorted(glob.glob(f'{raw_data_path}/*'))
dfs = [pd.read_csv(fname) for fname in fnames]
dfs[0].loc[:, 'ControlType'] = (dfs[0]['ControlType'] == 'case').astype(int)
# fname=glob.glob(f'{raw_data_path}/clinicalpresentationsite.csv')[0]
# dfs = pd.read_csv(fname)
# dfs = dfs.replace(['Y', 'YES', 'A'], 1)       
# dfs = dfs.replace(['N', 'NO'], 0)
# dfs = dfs.replace(['ND', 'NA'], float("NaN"))
# dfs=dfs.fillna(dfs.median()) 

# Correlation of features and in groups and association with outcome


## Group1: Consciousness

In [63]:
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [101]:
dfs[3].columns

Index(['SITE', 'CaseID', 'ControlType', 'StudySubjectID', 'ArrivalDate',
       'ArrivalTime', 'ArrivalTimeND', 'ModeArrival', 'ReceivedInTransfer',
       'DxCspineInjury',
       ...
       'IntervForCervicalStabOthertxt', 'LongTermRehab', 'TrfToLongTermRehab',
       'OutcomeStudySite', 'OutcomeStudySiteNeuro', 'OutcomeStudySiteMobility',
       'OutcomeStudySiteMobility1', 'OutcomeStudySiteMobility2',
       'OutcomeStudySiteBowel', 'OutcomeStudySiteUrine'],
      dtype='object', length=136)

In [106]:
feat_conscious = ['HxLOC', 'TotalGCSManual', 'TotalGCS', 'AVPUDetails','AlteredMentalStatus', 'LOC','ControlType_x']
dfs_conscious=dfs[0].merge(dfs[3],how='left', on=['SITE', 'CaseID', 'StudySubjectID'])
dfs_conscious=dfs_conscious[feat_conscious]
# dfs_conscious.loc[:, 'ControlType_x'] = (dfs_conscious['ControlType_x'] == 'case').astype(int)

dfs_conscious = dfs_conscious.replace(['Y', 'YES', 'A'], 1)       
dfs_conscious = dfs_conscious.replace(['N', 'NO'], 0)
dfs_conscious = dfs_conscious.replace(['15'], 15)
dfs_conscious = dfs_conscious.replace(['10'], 10)
dfs_conscious = dfs_conscious.replace(['14'], 14)
dfs_conscious = dfs_conscious.replace(['6'], 6)
dfs_conscious = dfs_conscious.replace(['8'], 8)
dfs_conscious = dfs_conscious.replace(['12'], 12)
dfs_conscious = dfs_conscious.replace(['5'], 5)
dfs_conscious = dfs_conscious.replace(['13'], 13)
dfs_conscious = dfs_conscious.replace(['11'],11)
dfs_conscious = dfs_conscious.replace(['9'], 9)
dfs_conscious = dfs_conscious.replace(['7','7T'], 7)
dfs_conscious = dfs_conscious.replace(['4'], 4)
dfs_conscious = dfs_conscious.replace(['ND', 'NA', '3'], float("NaN"))
dfs_conscious = dfs_conscious.replace(['U','V','N', 'P'], 0)
# dfs_conscious=dfs_conscious.fillna(dfs_conscious.median()) 
# print(dfs_conscious['TotalGCS'])
# print(pd.unique(dfs_conscious['TotalGCS']))
dfs_conscious.corr(method='pearson').style.background_gradient(cmap="Blues")


Unnamed: 0,HxLOC,TotalGCSManual,TotalGCS,AVPUDetails,AlteredMentalStatus,LOC,ControlType_x
HxLOC,1.0,-0.409745,-0.300756,-0.322035,0.368531,0.945069,0.04262
TotalGCSManual,-0.409745,1.0,,0.697871,-0.710236,-0.35828,-0.131194
TotalGCS,-0.300756,,1.0,0.678785,-0.726675,-0.280861,-0.105658
AVPUDetails,-0.322035,0.697871,0.678785,1.0,-0.797965,-0.293623,-0.089832
AlteredMentalStatus,0.368531,-0.710236,-0.726675,-0.797965,1.0,0.352847,0.117775
LOC,0.945069,-0.35828,-0.280861,-0.293623,0.352847,1.0,0.027341
ControlType_x,0.04262,-0.131194,-0.105658,-0.089832,0.117775,0.027341,1.0


## Group2: Complaint of pain in neck and age


In [108]:
# feat_pain = ['PtCompPainHead', 'PtCompPainFace', 'PtCompPainNeck', 'PtCompPainNeckMove', 'PtCompPainChest', 'PtCompPainBack', 'PtCompPainFlank', 'PtCompPainAbd', 'PtCompPainPelvis', 'PtCompPainExt']
# demog_df = dfs[4]
clean_key_col_names = lambda df: df.rename(columns={'site': 'SITE',
                                                            'caseid': 'CaseID',
                                                            'studysubjectid': 'StudySubjectID'})
demog_df = clean_key_col_names(dfs[4])

agegroup_df = pd.get_dummies(pd.cut(demog_df['AgeInYears'], bins=[0, 2, 6, 12, 16],
                                            labels=['infant', 'preschool', 'school_age', 'adolescents'],
                                            include_lowest=True), prefix='age')
agegroup_df=pd.concat([demog_df[['SITE', 'CaseID', 'StudySubjectID']], agegroup_df], axis=1)

feat_pain = ['PtCompPainNeck', 'PtCompPainNeckMove', 'age_infant', 'age_preschool',  'age_school_age',  'age_adolescents','PainNeck','ControlType_x']
dfs_pain=dfs[0].merge(dfs[3],how='left', on=['SITE', 'CaseID', 'StudySubjectID'])
dfs_pain=dfs_pain.merge(agegroup_df,how='left', on=['SITE', 'CaseID', 'StudySubjectID'])
dfs_pain=dfs_pain[feat_pain]
dfs_pain = dfs_pain.replace(['Y', 'YES', 'A'], 1)       
dfs_pain = dfs_pain.replace(['N', 'NO'], 0)
dfs_pain = dfs_pain.replace(['ND', 'NA'], float("NaN"))
# dfs_pain=dfs_pain.fillna(dfs_pain.median()) 
# print(pd.unique(dfs_pain['PtCompPainNeckMove']))
dfs_pain.corr(method='pearson').style.background_gradient(cmap="Blues")

Unnamed: 0,PtCompPainNeck,PtCompPainNeckMove,age_infant,age_preschool,age_school_age,age_adolescents,PainNeck,ControlType_x
PtCompPainNeck,1.0,,-0.189416,-0.107573,0.044036,0.140669,1.0,0.123322
PtCompPainNeckMove,,1.0,,0.111897,-0.102825,0.006222,,0.114576
age_infant,-0.189416,,1.0,-0.131898,-0.16726,-0.242185,-0.202944,-0.028564
age_preschool,-0.107573,0.111897,-0.131898,1.0,-0.311781,-0.451444,-0.086399,-0.005268
age_school_age,0.044036,-0.102825,-0.16726,-0.311781,1.0,-0.572475,0.043904,-0.025394
age_adolescents,0.140669,0.006222,-0.242185,-0.451444,-0.572475,1.0,0.130484,0.041454
PainNeck,1.0,,-0.202944,-0.086399,0.043904,0.130484,1.0,0.12375
ControlType_x,0.123322,0.114576,-0.028564,-0.005268,-0.025394,0.041454,0.12375,1.0


## Group3: Tenderness in neck


In [109]:
# feat_tender = ['PtTenderHead', 'PtTenderFace', 'PtTenderNeck', 'PtTenderNeckLevel', 'PtTenderNeckLevelC1', 'PtTenderNeckLevelC2', 'PtTenderNeckLevelC3', 'PtTenderNeckLevelC4', 'PtTenderNeckLevelC5', 'PtTenderNeckLevelC6', 'PtTenderNeckLevelC7', 'PtTenderNeckAnt', 'PtTenderNeckPos', 'PtTenderNeckLat', 'PtTenderNeckMid', 'PtTenderNeckOther', 'PtTenderChest', 'PtTenderBack', 'PtTenderFlank', 'PtTenderAbd', 'PtTenderPelvis', 'PtTenderExt']
feat_tender =['PtTenderNeck', 'PtTenderNeckLevel', 'PtTenderNeckLevelC1', 'PtTenderNeckLevelC2', 'PtTenderNeckLevelC3', 'PtTenderNeckLevelC4', 'PtTenderNeckLevelC5', 'PtTenderNeckLevelC6', 'PtTenderNeckLevelC7', 'PtTenderNeckAnt', 'PtTenderNeckPos', 'PtTenderNeckLat', 'PtTenderNeckMid', 'PtTenderNeckOther','PosMidNeckTenderness', 'TenderNeck','ControlType_x'] 

dfs_tender=dfs[0].merge(dfs[3],how='left', on=['SITE', 'CaseID', 'StudySubjectID'])
dfs_tender=dfs_tender[feat_tender]
dfs_tender = dfs_tender.replace(['Y', 'YES', 'A'], 1)       
dfs_tender = dfs_tender.replace(['N', 'NO'], 0)
dfs_tender = dfs_tender.replace(['ND', 'NA'], float("NaN"))
# dfs_tender=dfs_tender.fillna(dfs_tender.median()) 
# print(dfs_tender)
dfs_tender.corr(method='pearson').style.background_gradient(cmap="Blues")

Unnamed: 0,PtTenderNeck,PtTenderNeckLevel,PtTenderNeckLevelC1,PtTenderNeckLevelC2,PtTenderNeckLevelC3,PtTenderNeckLevelC4,PtTenderNeckLevelC5,PtTenderNeckLevelC6,PtTenderNeckLevelC7,PtTenderNeckAnt,PtTenderNeckPos,PtTenderNeckLat,PtTenderNeckMid,PtTenderNeckOther,PosMidNeckTenderness,TenderNeck,ControlType_x
PtTenderNeck,1.0,0.591681,0.221431,0.322259,0.379668,0.385305,0.371512,0.329501,0.322259,0.091787,0.37069,0.31951,0.416516,0.337507,0.814658,1.0,0.040167
PtTenderNeckLevel,0.591681,1.0,0.37424,0.544649,0.641676,0.648205,0.624801,0.55689,0.544649,-0.007973,0.177299,0.060407,0.20655,0.009321,0.722381,0.588493,0.029306
PtTenderNeckLevelC1,0.221431,0.37424,1.0,0.615101,0.347228,0.279918,0.249581,0.263999,0.271,-0.011704,0.122742,0.039894,0.075404,-0.004623,0.269673,0.219691,0.030859
PtTenderNeckLevelC2,0.322259,0.544649,0.615101,1.0,0.632498,0.360589,0.256533,0.244491,0.251905,0.001833,0.156966,0.072054,0.154796,0.00266,0.392645,0.319871,0.030202
PtTenderNeckLevelC3,0.379668,0.641676,0.347228,0.632498,1.0,0.677963,0.374814,0.308744,0.263816,-0.003703,0.136528,0.068838,0.165507,-0.002999,0.462747,0.37698,0.008022
PtTenderNeckLevelC4,0.385305,0.648205,0.279918,0.360589,0.677963,1.0,0.616756,0.402932,0.287764,-0.004204,0.11974,0.046517,0.149286,-0.000309,0.469634,0.382591,0.013563
PtTenderNeckLevelC5,0.371512,0.624801,0.249581,0.256533,0.374814,0.616756,1.0,0.690527,0.426781,-0.002968,0.146753,0.088058,0.128013,0.004711,0.452784,0.368864,0.022655
PtTenderNeckLevelC6,0.329501,0.55689,0.263999,0.244491,0.308744,0.402932,0.690527,1.0,0.655711,0.001083,0.160862,0.040166,0.121767,0.005316,0.401485,0.327073,0.045919
PtTenderNeckLevelC7,0.322259,0.544649,0.271,0.251905,0.263816,0.287764,0.426781,0.655711,1.0,0.001833,0.126865,0.020658,0.100187,0.040746,0.392645,0.319871,0.065611
PtTenderNeckAnt,0.091787,-0.007973,-0.011704,0.001833,-0.003703,-0.004204,-0.002968,0.001083,0.001833,1.0,0.063909,0.097173,0.008283,0.018385,0.013993,0.091036,-0.018944


## Group4: Focal neurological deficits

In [110]:
feat_focal= ['PtParesthesias', 'PtSensoryLoss', 'PtExtremityWeakness', 'OtherNeuroDeficitDescCat','FocalNeuroFindings','ControlType_x']
dfs_focal=dfs[0].merge(dfs[3],how='left', on=['SITE', 'CaseID', 'StudySubjectID'])
dfs_focal=dfs_focal[feat_focal]
dfs_focal = dfs_focal.replace(['Y', 'YES', 'A'], 1)       
dfs_focal = dfs_focal.replace(['N', 'NO'], 0)
dfs_focal = dfs_focal.replace(['3'], 3)
dfs_focal = dfs_focal.replace(['ND', 'NA'], float("NaN"))
# dfs_focal=dfs_focal.fillna(dfs_focal.median()) 
# print(pd.unique(dfs_focal['PtExtremityWeakness']))
dfs_focal.corr(method='pearson').style.background_gradient(cmap="Blues")

Unnamed: 0,PtParesthesias,PtSensoryLoss,PtExtremityWeakness,OtherNeuroDeficitDescCat,FocalNeuroFindings,ControlType_x
PtParesthesias,1.0,0.935326,0.906438,0.258781,0.256178,0.149634
PtSensoryLoss,0.935326,1.0,0.92903,0.284431,0.224048,0.163905
PtExtremityWeakness,0.906438,0.92903,1.0,0.314892,0.251376,0.188315
OtherNeuroDeficitDescCat,0.258781,0.284431,0.314892,1.0,0.844679,0.477683
FocalNeuroFindings,0.256178,0.224048,0.251376,0.844679,1.0,0.303406
ControlType_x,0.149634,0.163905,0.188315,0.477683,0.303406,1.0


## Group 5: Other parts of the body

In [111]:
feat_otherpain = ['PtCompPainHead', 'PtCompPainFace', 'PtCompPainExt', 'PtTenderHead', 'PtTenderFace', 'PtTenderExt','SubInj_Head', 'SubInj_Face', 'SubInj_Ext', 'SubInj_TorsoTrunk','ControlType_x']
dfs_otherpain=dfs[0].merge(dfs[3],how='left', on=['SITE', 'CaseID', 'StudySubjectID'])
dfs_otherpain=dfs_otherpain[feat_otherpain]
dfs_otherpain = dfs_otherpain.replace(['Y', 'YES', 'A'], 1)       
dfs_otherpain = dfs_otherpain.replace(['N', 'NO'], 0)
dfs_otherpain = dfs_otherpain.replace(['ND', 'NA'], float("NaN"))
# dfs_focal=dfs_focal.fillna(dfs_focal.median()) 
# print(dfs_focal)
dfs_otherpain.corr(method='pearson').style.background_gradient(cmap="Blues")

Unnamed: 0,PtCompPainHead,PtCompPainFace,PtCompPainExt,PtTenderHead,PtTenderFace,PtTenderExt,SubInj_Head,SubInj_Face,SubInj_Ext,SubInj_TorsoTrunk,ControlType_x
PtCompPainHead,1.0,0.023547,0.037793,0.262369,0.016954,0.010375,-0.04497,-0.07568,-0.106077,-0.117747,-0.093533
PtCompPainFace,0.023547,1.0,0.075118,0.011725,0.374723,0.056732,-0.048357,0.201582,-0.014918,-0.023898,-0.075342
PtCompPainExt,0.037793,0.075118,1.0,0.01072,0.05305,0.56155,-0.133816,-0.059482,0.249719,-0.017379,-0.088681
PtTenderHead,0.262369,0.011725,0.01072,1.0,0.099348,0.022536,0.019453,-0.025788,-0.055554,-0.044621,-0.026963
PtTenderFace,0.016954,0.374723,0.05305,0.099348,1.0,0.081796,-0.024345,0.198127,-0.018845,-0.012344,-0.050347
PtTenderExt,0.010375,0.056732,0.56155,0.022536,0.081796,1.0,-0.084606,-0.018728,0.199783,-0.000556,-0.091866
SubInj_Head,-0.04497,-0.048357,-0.133816,0.019453,-0.024345,-0.084606,1.0,0.213585,0.11702,0.138892,0.061941
SubInj_Face,-0.07568,0.201582,-0.059482,-0.025788,0.198127,-0.018728,0.213585,1.0,0.061393,0.093213,-0.013333
SubInj_Ext,-0.106077,-0.014918,0.249719,-0.055554,-0.018845,0.199783,0.11702,0.061393,1.0,0.164603,-0.000852
SubInj_TorsoTrunk,-0.117747,-0.023898,-0.017379,-0.044621,-0.012344,-0.000556,0.138892,0.093213,0.164603,1.0,0.090464


## Group6: Injury mechanism


In [112]:
feat_injury= ['InjuryPrimaryMechanism', 'HeadFirst', 'HeadFirstRegion','HighriskDiving', 'HighriskFall', 'HighriskHanging', 'HighriskHitByCar', 'HighriskMVC', 'HighriskOtherMV', 'AxialLoadAnyDoc', 'axialloadtop', 'Clotheslining','ControlType_x']
dfs_injury=dfs[0].merge(dfs[6],how='left', on=['SITE', 'CaseID', 'StudySubjectID'])
dfs_injury=dfs_injury[feat_injury]
dfs_injury = dfs_injury.replace(['Y', 'YES', 'A'], 1)       
dfs_injury = dfs_injury.replace(['N', 'NO'], 0)
dfs_injury = dfs_injury.replace(['ND', 'NA'], float("NaN"))
# dfs_focal=dfs_focal.fillna(dfs_focal.median()) 
print(pd.unique(dfs_injury['HeadFirst']))
dfs_injury.corr(method='pearson').style.background_gradient(cmap="Blues")

[nan  0.  1.]


Unnamed: 0,HeadFirst,HighriskDiving,HighriskFall,HighriskHanging,HighriskHitByCar,HighriskMVC,HighriskOtherMV,AxialLoadAnyDoc,axialloadtop,Clotheslining,ControlType_x
HeadFirst,1.0,0.162025,-0.031451,-0.046832,-0.147697,-0.116292,-0.048737,1.0,0.239442,-0.080529,0.101228
HighriskDiving,0.162025,1.0,-0.023477,-0.004458,-0.048346,-0.05204,-0.020541,0.179104,0.246021,-0.011536,0.202099
HighriskFall,-0.031451,-0.023477,1.0,-0.007958,-0.08649,-0.093436,-0.036682,-0.03496,-0.020865,-0.020622,-0.040855
HighriskHanging,-0.046832,-0.004458,-0.007958,1.0,-0.016391,-0.017635,-0.006964,-0.021845,-0.006037,-0.00391,-0.017156
HighriskHitByCar,-0.147697,-0.048346,-0.08649,-0.016391,1.0,-0.192654,-0.075521,-0.163841,-0.065465,-0.041723,-0.062772
HighriskMVC,-0.116292,-0.05204,-0.093436,-0.017635,-0.192654,1.0,-0.081355,-0.153035,-0.053508,-0.045932,0.066794
HighriskOtherMV,-0.048737,-0.020541,-0.036682,-0.006964,-0.075521,-0.081355,1.0,-0.035495,-0.027815,0.002087,0.005705
AxialLoadAnyDoc,1.0,0.179104,-0.03496,-0.021845,-0.163841,-0.153035,-0.035495,1.0,0.276359,-0.02657,0.071673
axialloadtop,0.239442,0.246021,-0.020865,-0.006037,-0.065465,-0.053508,-0.027815,0.276359,1.0,-0.015417,0.087797
Clotheslining,-0.080529,-0.011536,-0.020622,-0.00391,-0.041723,-0.045932,0.002087,-0.02657,-0.015417,1.0,0.060762
