In [13]:
import pandas as pd
import numpy as np
import pickle

In [2]:
fields = ["subject_id","hadm_id","deathtime", "admission_type", "admission_location", 
          "discharge_location", "insurance", "diagnosis", "hospital_expire_flag"]
admitdf = pd.read_csv("https://physionet.org/files/mimiciii-demo/1.4/ADMISSIONS.csv?download", usecols = fields)

In [3]:
fields =["subject_id", "hadm_id", "icustay_id", "first_careunit", "last_careunit",
        "first_wardid", "last_wardid", "los"]
icudf = pd.read_csv("https://physionet.org/files/mimiciii-demo/1.4/ICUSTAYS.csv?download", usecols = fields)

In [4]:
procdf = pd.read_csv("https://physionet.org/files/mimiciii-demo/1.4/PROCEDUREEVENTS_MV.csv?download")
drugdf = pd.read_csv("https://physionet.org/files/mimiciii-demo/1.4/PRESCRIPTIONS.csv?download")

Identify those who died in the ICU

In [5]:
deaths = admitdf[admitdf['discharge_location'].isin(['DEAD/EXPIRED'])]
icu_admin = pd.merge(icudf, deaths, how='left', on='hadm_id')
icu_admin['discharge_location'].value_counts()

icu_full = (
            icu_admin.
            drop(columns=['subject_id_y']).
            rename(columns={"subject_id_x": "subject_id"})
)
icu_full

Unnamed: 0,subject_id,hadm_id,icustay_id,first_careunit,last_careunit,first_wardid,last_wardid,los,deathtime,admission_type,admission_location,discharge_location,insurance,diagnosis,hospital_expire_flag
0,10006,142345,206504,MICU,MICU,52,52,1.6325,,,,,,,
1,10011,105331,232110,MICU,MICU,15,15,13.8507,2126-08-28 18:59:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,DEAD/EXPIRED,Private,HEPATITIS B,1.0
2,10013,165520,264446,MICU,MICU,15,15,2.6499,2125-10-07 15:13:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,DEAD/EXPIRED,Medicare,SEPSIS,1.0
3,10017,199207,204881,CCU,CCU,7,7,2.1436,,,,,,,
4,10019,177759,228977,MICU,MICU,15,15,1.2938,2163-05-15 12:00:00,EMERGENCY,TRANSFER FROM HOSP/EXTRAM,DEAD/EXPIRED,Medicare,ALCOHOLIC HEPATITIS,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,44083,198330,286428,CCU,CCU,7,7,3.6174,,,,,,,
132,44154,174245,217724,MICU,MICU,50,50,0.6259,2178-05-15 09:45:00,EMERGENCY,EMERGENCY ROOM ADMIT,DEAD/EXPIRED,Medicare,ALTERED MENTAL STATUS,1.0
133,44212,163189,239396,MICU,MICU,50,50,31.1235,,,,,,,
134,44222,192189,238186,CCU,CCU,7,7,1.3279,,,,,,,


Procedures

In [15]:
procdf = pd.read_csv("https://physionet.org/files/mimiciii-demo/1.4/PROCEDUREEVENTS_MV.csv?download")
topprocs = procdf['ordercategoryname'].value_counts().head(8).index.tolist()
procdf = procdf[procdf['ordercategoryname'].isin(topprocs)].reset_index()
procdf = procdf[procdf['icustay_id'].notna()]
myproc_counts = procdf.groupby(['subject_id', 'icustay_id', 'ordercategoryname']).size().reset_index(name='counts')
myproc_counts_long = myproc_counts.pivot(index = ['subject_id','icustay_id'], 
                                         columns = 'ordercategoryname',
                                         values = 'counts').reset_index()
#.reset_index()
myproc_counts_long = myproc_counts_long.replace(np.nan,0)
#myproc_counts_long = myproc_counts_long.reset_index(drop=True, inplace=True)
myproc_counts_long = myproc_counts_long.rename_axis(None).drop(columns=['subject_id'])

procedure_columns = ['Dialysis', 'Imaging', 'Intubation/Extubation',
       'Invasive Lines', 'Peripheral Lines', 'Procedures',
       'Significant Events', 'Ventilation']

with open("../../crosstables/procedure_list.txt", "wb") as fp:   #Pickling
    pickle.dump(procedure_columns, fp)
myproc_counts_long.head()

ordercategoryname,icustay_id,Dialysis,Imaging,Intubation/Extubation,Invasive Lines,Peripheral Lines,Procedures,Significant Events,Ventilation
0,269173,0.0,1.0,1.0,0.0,3.0,6.0,0.0,1.0
1,279554,0.0,0.0,2.0,0.0,3.0,2.0,0.0,1.0
2,236120,0.0,0.0,0.0,0.0,2.0,5.0,0.0,0.0
3,285750,0.0,0.0,1.0,2.0,2.0,0.0,1.0,0.0
4,285369,0.0,1.0,0.0,0.0,2.0,1.0,0.0,0.0


Drugs

In [23]:
drugdf = pd.read_csv("https://physionet.org/files/mimiciii-demo/1.4/PRESCRIPTIONS.csv?download")
top_twenty = drugdf['formulary_drug_cd'].value_counts().head(20).index.tolist()
#filter rows to only these drugs
drugdf = drugdf[drugdf['formulary_drug_cd'].isin(top_twenty)].reset_index()
drugdf = drugdf[drugdf['icustay_id'].notna()]
#now start making counts based on person and icu_stay_id
mycounts = drugdf.groupby(['subject_id', 'icustay_id', 'formulary_drug_cd']).size().reset_index(name='counts')
mycounts_long = mycounts.pivot(index = ['subject_id','icustay_id'], 
                               columns = 'formulary_drug_cd', values = 'counts').reset_index()
mycounts_long = mycounts_long.replace(np.nan,0).drop(columns=['subject_id'])
prescription_list = ['ACET325', 'CALG1I', 'D5W1000', 'D5W250', 'FURO40I',
       'HEPA5I', 'INSULIN', 'KCL20P', 'KCL20PM', 'KCLBASE2', 'LR1000',
       'MAG2PM', 'METO25', 'MORP2I', 'NACLFLUSH', 'NS1000', 'NS250', 'NS500',
       'VANC1F', 'VANCOBASE']
with open("../../crosstables/prescription_list.txt", "wb") as dl:   #Pickling
    pickle.dump(prescription_list, dl)
mycounts_long.head()

formulary_drug_cd,icustay_id,ACET325,CALG1I,D5W1000,D5W250,FURO40I,HEPA5I,INSULIN,KCL20P,KCL20PM,...,LR1000,MAG2PM,METO25,MORP2I,NACLFLUSH,NS1000,NS250,NS500,VANC1F,VANCOBASE
0,206504.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,2.0
1,264446.0,1.0,0.0,0.0,4.0,1.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,204881.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,...,0.0,0.0,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
3,228977.0,0.0,3.0,5.0,6.0,0.0,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0
4,277021.0,1.0,0.0,0.0,0.0,0.0,1.0,1.0,0.0,0.0,...,0.0,0.0,2.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0


Merge procedures and drugs to ICU data

In [48]:
df1 = pd.merge(icu_full, mycounts_long, how='left', on='icustay_id')
analyticdf = pd.merge(df1, myproc_counts_long, how='left', on='icustay_id')
analyticdf

Unnamed: 0,subject_id,hadm_id,icustay_id,first_careunit,last_careunit,first_wardid,last_wardid,los,deathtime,admission_type,...,VANC1F,VANCOBASE,Dialysis,Imaging,Intubation/Extubation,Invasive Lines,Peripheral Lines,Procedures,Significant Events,Ventilation
0,10006,142345,206504,MICU,MICU,52,52,1.6325,,,...,2.0,2.0,,,,,,,,
1,10011,105331,232110,MICU,MICU,15,15,13.8507,2126-08-28 18:59:00,EMERGENCY,...,,,,,,,,,,
2,10013,165520,264446,MICU,MICU,15,15,2.6499,2125-10-07 15:13:00,EMERGENCY,...,0.0,0.0,,,,,,,,
3,10017,199207,204881,CCU,CCU,7,7,2.1436,,,...,0.0,0.0,,,,,,,,
4,10019,177759,228977,MICU,MICU,15,15,1.2938,2163-05-15 12:00:00,EMERGENCY,...,0.0,0.0,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
131,44083,198330,286428,CCU,CCU,7,7,3.6174,,,...,0.0,0.0,0.0,0.0,0.0,1.0,2.0,4.0,2.0,0.0
132,44154,174245,217724,MICU,MICU,50,50,0.6259,2178-05-15 09:45:00,EMERGENCY,...,0.0,0.0,0.0,1.0,1.0,1.0,2.0,0.0,0.0,1.0
133,44212,163189,239396,MICU,MICU,50,50,31.1235,,,...,7.0,7.0,16.0,14.0,3.0,10.0,3.0,9.0,0.0,1.0
134,44222,192189,238186,CCU,CCU,7,7,1.3279,,,...,0.0,0.0,0.0,0.0,0.0,0.0,2.0,3.0,0.0,0.0
