In [127]:
import pandas as pd

In [128]:
feat_import = pd.read_csv('data/output/feature_importance.csv')
feat_import

Unnamed: 0,imp,feature
0,0.011333,220045
1,0.008279,220046
2,0.006968,220047
3,0.010390,220179
4,0.010059,220180
...,...,...
1442,0.000786,ethnicity_6
1443,0.001591,ethnicity_7
1444,0.000652,insurance_0
1445,0.001757,insurance_1


In [113]:
# contains metadata for lab ids
d_labitems = pd.read_csv("mimiciv/1.0/icu/d_items.csv.gz")
d_labitems

Unnamed: 0,itemid,label,abbreviation,linksto,category,unitname,param_type,lownormalvalue,highnormalvalue
0,220003,ICU Admission date,ICU Admission date,datetimeevents,ADT,,Date and time,,
1,220045,Heart Rate,HR,chartevents,Routine Vital Signs,bpm,Numeric,,
2,220046,Heart rate Alarm - High,HR Alarm - High,chartevents,Alarms,bpm,Numeric,,
3,220047,Heart Rate Alarm - Low,HR Alarm - Low,chartevents,Alarms,bpm,Numeric,,
4,220048,Heart Rhythm,Heart Rhythm,chartevents,Routine Vital Signs,,Text,,
...,...,...,...,...,...,...,...,...,...
3856,229355,Absolute Neutrophil Count,Absolute Neutrophil Count,chartevents,Labs,,Numeric,,
3857,229453,Exam-GI/GU,Exam-GI/GU,chartevents,MD Progress Note,,Text,,
3858,229604,Therapeutic Bed,Therapeutic Bed,chartevents,Treatments,,Text,,
3859,229709,Angiotensin II (Giapreza),Angiotensin II (Giapreza),inputevents,Medications,mg,Solution,,


In [126]:
d_labitems[d_labitems.label.str.lower().str.contains('sofa')] #sofa

Unnamed: 0,itemid,label,abbreviation,linksto,category,unitname,param_type,lownormalvalue,highnormalvalue
1747,227428,SOFA Score,SOFA Score,chartevents,General,,Numeric,,


In [49]:
feat_mask = feat_import.feature.apply(lambda x: x[0].isdigit())
feat_import_itemid = feat_import.feature[feat_mask].astype(int)
feat_import_itemid

0      220045
1      220046
2      220047
3      220179
4      220180
        ...  
365    227933
366    227602
367    227614
368    228628
369    228731
Name: feature, Length: 370, dtype: int64

In [89]:
flowlab_metadata = d_labitems[d_labitems.itemid.isin(feat_import_itemid)][['itemid', 'label']]
flowlab_metadata['itemid'] = flowlab_metadata.itemid.astype('str')
flowlab_metadata = flowlab_metadata.rename({"label": "meta", "itemid": "feature"}, axis=1)
flowlab_metadata['feature_type'] = 'lab/flowsheet'
flowlab_metadata

Unnamed: 0,feature,meta,feature_type
1,220045,Heart Rate,lab/flowsheet
2,220046,Heart rate Alarm - High,lab/flowsheet
3,220047,Heart Rate Alarm - Low,lab/flowsheet
5,220050,Arterial Blood Pressure systolic,lab/flowsheet
6,220051,Arterial Blood Pressure diastolic,lab/flowsheet
...,...,...,...
3796,228184,SVV (PiCCO),lab/flowsheet
3797,228185,SVRI (PiCCO),lab/flowsheet
3841,228872,HM II- Mean BP,lab/flowsheet
3842,228873,HM II- Flow,lab/flowsheet


In [108]:
# get mapping for ICD10 codes. this isnt a perfect csv but it will do for now
icd10_mapping = pd.read_csv('utils/mappings/icd10_codes.csv', names=[
    'ICD10_parent',
    'ICD10_sub',
    'ICD10_full',
    'diagnosis_name',
    'diagnosis_name_more',
    'parent_diagnosis_name',
])
icd10_mapping = icd10_mapping[['ICD10_parent', 'parent_diagnosis_name']].drop_duplicates().reset_index(drop=True)
icd10_mapping = icd10_mapping.rename({"parent_diagnosis_name": "meta", "ICD10_parent": "feature"}, axis=1)
icd10_mapping['feature_type'] = 'diagnosis'
icd10_mapping

Unnamed: 0,feature,meta,feature_type
0,A00,Cholera,diagnosis
1,A010,Typhoid fever,diagnosis
2,A011,Paratyphoid fever A,diagnosis
3,A012,Paratyphoid fever B,diagnosis
4,A013,Paratyphoid fever C,diagnosis
...,...,...,...
19922,Z99,"Dependence on enabling machines and devices, n...",diagnosis
19923,Z991,Dependence on respirator,diagnosis
19924,Z992,Dependence on renal dialysis,diagnosis
19925,Z993,Dependence on wheelchair,diagnosis


In [109]:
dx_feat = feat_import.merge(icd10_mapping, on='feature')
flowlab_feat = feat_import.merge(flowlab_metadata, on='feature')
metadata = pd.concat([flowlab_feat, dx_feat])[['feature', 'meta', 'feature_type']]
metadata.to_csv('data/output/metadata.csv', index=False)
metadata

Unnamed: 0,feature,meta,feature_type
0,220045,Heart Rate,lab/flowsheet
1,220046,Heart rate Alarm - High,lab/flowsheet
2,220047,Heart Rate Alarm - Low,lab/flowsheet
3,220179,Non Invasive Blood Pressure systolic,lab/flowsheet
4,220180,Non Invasive Blood Pressure diastolic,lab/flowsheet
...,...,...,...
451,M13,Other arthritis,diagnosis
452,Z47,Orthopedic aftercare,diagnosis
453,B48,"Other mycoses, not elsewhere classified",diagnosis
454,L88,Pyoderma gangrenosum,diagnosis


In [110]:
# stuff that's still unmatched
feat_import[~feat_import.feature.isin(dx_feat.feature.tolist() + flowlab_feat.feature.tolist())]

Unnamed: 0,imp,feature
371,0.000793,R09
380,0.001244,E78
383,0.001508,T81
384,0.000051,H47
385,0.000400,D61
...,...,...
1086,0.000758,ethnicity_6
1087,0.001348,ethnicity_7
1088,0.000268,insurance_0
1089,0.001640,insurance_1


In [130]:
feat_import.merge(metadata, on="feature").iloc[:30]

Unnamed: 0,imp,feature,meta,feature_type
0,0.011333,220045,Heart Rate,lab/flowsheet
1,0.008279,220046,Heart rate Alarm - High,lab/flowsheet
2,0.006968,220047,Heart Rate Alarm - Low,lab/flowsheet
3,0.01039,220179,Non Invasive Blood Pressure systolic,lab/flowsheet
4,0.010059,220180,Non Invasive Blood Pressure diastolic,lab/flowsheet
5,0.009567,220181,Non Invasive Blood Pressure mean,lab/flowsheet
6,0.011479,220210,Respiratory Rate,lab/flowsheet
7,0.013578,220277,O2 saturation pulseoxymetry,lab/flowsheet
8,0.006108,223751,Non-Invasive Blood Pressure Alarm - High,lab/flowsheet
9,0.005271,223752,Non-Invasive Blood Pressure Alarm - Low,lab/flowsheet
