In [1]:
import os
import pickle
os.chdir('../../')
from utils.icu_preprocess_util import *    # module of preprocessing functions

In [2]:
# Custom function for preprocessing tables with ICD codes
# Takes a path to the diagnoses_icd table, a gzipped admissions-cohort table path, and an optional mapping table path
# Output shows codes that were unable to be mapped by the mapping table
diag = preproc_icd_module("./mimic-iv-1.0/hosp/diagnoses_icd.csv.gz", './data/cohort/cohort_icu_mortality.csv.gz', './utils/mappings/ICD9_to_ICD10_mapping.txt', map_code_colname='diagnosis_code')
diag[['subject_id', 'hadm_id', 'stay_id', 'icd_code','root_icd10_convert','root']].to_csv("./data/features/preproc_diag_icu.csv.gz", compression='gzip', index=False)

100%|██████████| 6686/6686 [00:17<00:00, 371.53it/s]


# unique ICD-9 codes 6686
# unique ICD-10 codes 10120
# unique ICD-10 codes (After converting ICD-9 to ICD-10) 10414
# unique ICD-10 codes (After clinical gruping ICD-10 codes) 1522


In [2]:
out = preproc_out("./mimic-iv-1.0/icu/outputevents.csv.gz", './data/cohort/cohort_icu_mortality.csv.gz', 'charttime', dtypes=None, usecols=None)
out[['subject_id', 'hadm_id', 'stay_id', 'itemid', 'charttime', 'intime', 'event_time_from_admit']].to_csv("./data/features/preproc_out_icu.csv.gz", compression='gzip', index=False)
#proc.head()

   subject_id   hadm_id   stay_id           charttime            storetime  \
0    10003700  28623837  30600691 2165-04-24 05:40:00  2165-04-24 05:44:00   
1    10004235  24181354  34100191 2196-02-24 17:55:00  2196-02-24 17:55:00   
2    10004235  24181354  34100191 2196-02-24 19:00:00  2196-02-24 19:43:00   
3    10004235  24181354  34100191 2196-02-24 20:00:00  2196-02-24 20:02:00   
4    10004235  24181354  34100191 2196-02-24 21:00:00  2196-02-24 20:56:00   

   itemid  value valueuom  
0  226559  300.0       ml  
1  226559  100.0       ml  
2  226559   45.0       ml  
3  226559   45.0       ml  
4  226559   45.0       ml  
# Unique Events:   71
Total rows 4457381


In [None]:
chart = preproc_chart("./mimic-iv-1.0/icu/chartevents.csv.gz", './data/cohort/cohort_icu_mortality.csv.gz', 'charttime', dtypes=None, usecols=['stay_id','charttime','itemid'])
chart[['subject_id', 'hadm_id', 'stay_id', 'itemid', 'charttime', 'intime', 'event_time_from_admit']].to_csv("./data/features/preproc_chart_icu.csv.gz", compression='gzip', index=False)

In [2]:
proc = preproc_chart("./mimic-iv-1.0/icu/procedureevents.csv.gz", './data/cohort/cohort_icu_mortality.csv.gz', 'starttime', dtypes=None, usecols=['stay_id','starttime','itemid'])
proc[['subject_id', 'hadm_id', 'stay_id', 'itemid', 'starttime', 'intime', 'event_time_from_admit']].to_csv("./data/features/preproc_proc_icu.csv.gz", compression='gzip', index=False)

    stay_id           starttime  itemid
0  30500789 2147-06-05 11:25:00  225399
1  30500789 2147-06-05 22:00:00  224385
2  30863119 2141-01-23 12:00:00  224275
4  33484414 2153-07-29 18:13:00  224277
5  33484414 2153-07-29 18:14:00  224275
# Unique Events:   157
Total rows 713377


In [2]:
med = preproc_meds("./mimic-iv-1.0/icu/inputevents.csv.gz", './data/cohort/cohort_icu_mortality.csv.gz')
med[['subject_id', 'hadm_id', 'stay_id', 'itemid' ,'starttime','endtime', 'start_hours_from_admit', 'stop_hours_from_admit','rate','amount','orderid']].to_csv('./data/features/preproc_med_icu.csv.gz', compression='gzip', index=False)

# of unique type of drug:  325
# Total rows 9460658


In [None]:
diag = pd.read_csv("./data/features/preproc_diag.csv.gz", compression='gzip',header=0)
if(icd_orig):
    diag['new_icd_code']=diag['icd_code']
if(icd_convert):
    diag['new_icd_code']=diag['root_icd10_convert']
if(icd_group):
    diag['new_icd_code']=diag['root']

diag[['subject_id', 'hadm_id', 'new_icd_code']].dropna().to_csv("./data/features/preproc_diag.csv.gz", compression='gzip', index=False)

In [None]:
def feature_icu(cohort_output, diag_flag=True,out_flag=True,chart_flag=True,proc_flag=True,med_flag=True):
    if diag_flag:
        diag = preproc_icd_module("./mimic-iv-1.0/hosp/diagnoses_icd.csv.gz", './data/cohort/'+cohort_output+'.csv.gz', './utils/mappings/ICD9_to_ICD10_mapping.txt', map_code_colname='diagnosis_code')
        diag[['subject_id', 'hadm_id', 'stay_id', 'icd_code','root_icd10_convert','root']].to_csv("./data/features/preproc_diag_icu.csv.gz", compression='gzip', index=False)
    if out_flag:    
        out = preproc_out("./mimic-iv-1.0/icu/outputevents.csv.gz", './data/cohort/'+cohort_output+'.csv.gz', 'charttime', dtypes=None, usecols=None)
        out[['subject_id', 'hadm_id', 'stay_id', 'itemid', 'charttime', 'intime', 'event_time_from_admit']].to_csv("./data/features/preproc_out_icu.csv.gz", compression='gzip', index=False)
    if chart_flag:
        chart = preproc_chart("./mimic-iv-1.0/icu/chartevents.csv.gz", './data/cohort/'+cohort_output+'.csv.gz', 'charttime', dtypes=None, usecols=['stay_id','charttime','itemid'])
        chart[['subject_id', 'hadm_id', 'stay_id', 'itemid', 'charttime', 'intime', 'event_time_from_admit']].to_csv("./data/features/preproc_chart_icu.csv.gz", compression='gzip', index=False)
    if proc_flag:
        proc = preproc_chart("./mimic-iv-1.0/icu/procedureevents.csv.gz", './data/cohort/'+cohort_output+'.csv.gz', 'starttime', dtypes=None, usecols=['stay_id','starttime','itemid'])
        proc[['subject_id', 'hadm_id', 'stay_id', 'itemid', 'starttime', 'intime', 'event_time_from_admit']].to_csv("./data/features/preproc_proc_icu.csv.gz", compression='gzip', index=False)
    if med_flag:
        med = preproc_meds("./mimic-iv-1.0/icu/inputevents.csv.gz", './data/cohort/'+cohort_output+'.csv.gz')
        med[['subject_id', 'hadm_id', 'stay_id', 'itemid' ,'starttime','endtime', 'start_hours_from_admit', 'stop_hours_from_admit','rate','amount','orderid']].to_csv('./data/features/preproc_med_icu.csv.gz', compression='gzip', index=False)

In [9]:
dat = pd.read_csv("./mimic-iv-1.0/hosp/prescriptions.csv.gz", compression='gzip', header=0)
dat.head()

  interactivity=interactivity, compiler=compiler, result=result)


Unnamed: 0,subject_id,hadm_id,pharmacy_id,starttime,stoptime,drug_type,drug,gsn,ndc,prod_strength,form_rx,dose_val_rx,dose_unit_rx,form_val_disp,form_unit_disp,doses_per_24_hrs,route
0,17868682,22726960,73313910,2160-01-07 08:00:00,2160-01-07 21:00:00,MAIN,BuPROPion (Sustained Release),46238,591083960.0,150mg SR Tablet,,300.0,mg,2,TAB,1.0,PO
1,17868682,22726960,16239987,2160-01-07 08:00:00,2160-01-08 16:00:00,MAIN,BuPROPion (Sustained Release),46238,591083960.0,150mg SR Tablet,,150.0,mg,1,TAB,1.0,PO
2,17868682,22726960,16634804,2160-01-07 10:00:00,2160-01-07 16:00:00,MAIN,Aspirin,4380,904404073.0,81mg Tab,,81.0,mg,1,TAB,1.0,PO
3,17868682,22726960,2697460,2160-01-07 14:00:00,2160-01-07 14:00:00,BASE,1/2 NS,1209,338004304.0,1000mL Bag,,1000.0,mL,1000,mL,,IV
4,17868682,22726960,1383959,2160-01-07 15:00:00,2160-01-08 16:00:00,MAIN,Pneumococcal Vac Polyvalent,48548,6494300.0,25mcg/0.5mL Vial,,0.5,mL,1,VIAL,,IM


In [3]:
dat = pd.read_csv("./mimic-iv-1.0/core/patients.csv.gz", compression='gzip', header=0)
dat.head()

Unnamed: 0,subject_id,gender,anchor_age,anchor_year,anchor_year_group,dod
0,10000048,F,23,2126,2008 - 2010,
1,10002723,F,0,2128,2017 - 2019,
2,10003939,M,0,2184,2008 - 2010,
3,10004222,M,0,2161,2014 - 2016,
4,10005325,F,0,2154,2011 - 2013,


In [2]:
dat = pd.read_csv("./mimic-iv-1.0/icu/icustays.csv.gz", compression='gzip', header=0)
dat.head()

Unnamed: 0,subject_id,hadm_id,stay_id,first_careunit,last_careunit,intime,outtime,los
0,17867402,24528534,31793211,Trauma SICU (TSICU),Trauma SICU (TSICU),2154-03-03 04:11:00,2154-03-04 18:16:56,1.587454
1,14435996,28960964,31983544,Trauma SICU (TSICU),Trauma SICU (TSICU),2150-06-19 17:57:00,2150-06-22 18:33:54,3.025625
2,17609946,27385897,33183475,Trauma SICU (TSICU),Trauma SICU (TSICU),2138-02-05 18:54:00,2138-02-15 12:42:05,9.741725
3,18966770,23483021,34131444,Trauma SICU (TSICU),Trauma SICU (TSICU),2123-10-25 10:35:00,2123-10-25 18:59:47,0.350544
4,12776735,20817525,34547665,Neuro Stepdown,Neuro Stepdown,2200-07-12 00:33:00,2200-07-13 16:44:40,1.674769


In [6]:
dat = pd.read_csv("./mimic-iv-1.0/icu/d_items.csv.gz", compression='gzip', header=0)
dat.head()

Unnamed: 0,itemid,label,abbreviation,linksto,category,unitname,param_type,lownormalvalue,highnormalvalue
0,220003,ICU Admission date,ICU Admission date,datetimeevents,ADT,,Date and time,,
1,220045,Heart Rate,HR,chartevents,Routine Vital Signs,bpm,Numeric,,
2,220046,Heart rate Alarm - High,HR Alarm - High,chartevents,Alarms,bpm,Numeric,,
3,220047,Heart Rate Alarm - Low,HR Alarm - Low,chartevents,Alarms,bpm,Numeric,,
4,220048,Heart Rhythm,Heart Rhythm,chartevents,Routine Vital Signs,,Text,,


In [5]:
dat = pd.read_csv("./mimic-iv-1.0/icu/datetimeevents.csv.gz", compression='gzip', header=0)
dat.head()

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valueuom,warning
0,10003700,28623837,30600691,2165-04-24 05:42:00,2165-04-24 05:42:00,225755,2165-04-24 05:42:00,Date,0
1,10003700,28623837,30600691,2165-04-24 08:00:00,2165-04-24 08:26:00,225755,2165-04-24 00:00:00,Date,0
2,10004235,24181354,34100191,2196-02-24 18:06:00,2196-02-24 18:07:00,224261,2196-02-24 18:06:00,Date,0
3,10004235,24181354,34100191,2196-02-24 18:06:00,2196-02-24 18:07:00,224279,2196-02-24 18:06:00,Date and Time,0
4,10004235,24181354,34100191,2196-02-24 18:06:00,2196-02-24 18:07:00,224280,2196-02-24 18:06:00,Date,0


In [4]:
dat = pd.read_csv("./mimic-iv-1.0/icu/inputevents.csv.gz", compression='gzip', header=0)
dat.head()

Unnamed: 0,subject_id,hadm_id,stay_id,starttime,endtime,storetime,itemid,amount,amountuom,rate,...,ordercategorydescription,patientweight,totalamount,totalamountuom,isopenbag,continueinnextdept,cancelreason,statusdescription,originalamount,originalrate
0,12481680,26876606,30863119,2141-01-23 17:37:00,2141-01-23 17:38:00,2141-01-23 17:37:00,226452,180.0,ml,,...,Bolus,123.0,180.0,ml,0,0,0,FinishedRunning,180.0,180.0
1,12481680,26876606,30863119,2141-01-23 13:00:00,2141-01-23 13:01:00,2141-01-23 13:23:00,226452,240.0,ml,,...,Bolus,123.0,240.0,ml,0,0,0,FinishedRunning,240.0,240.0
2,12481680,26876606,30863119,2141-01-23 11:00:00,2141-01-23 11:01:00,2141-01-23 12:24:00,226452,120.0,ml,,...,Bolus,123.0,120.0,ml,0,0,0,FinishedRunning,120.0,120.0
3,12481680,26876606,30863119,2141-01-23 14:16:00,2141-01-23 14:17:00,2141-01-23 14:17:00,226452,60.0,ml,,...,Bolus,123.0,60.0,ml,0,0,0,FinishedRunning,60.0,60.0
4,15614172,27424463,33484414,2153-07-30 23:16:00,2153-07-30 23:17:00,2153-07-30 23:16:00,226452,100.0,ml,,...,Bolus,71.6,100.0,ml,0,0,0,FinishedRunning,100.0,100.0


In [5]:
dat.columns

Index(['subject_id', 'hadm_id', 'stay_id', 'starttime', 'endtime', 'storetime',
       'itemid', 'amount', 'amountuom', 'rate', 'rateuom', 'orderid',
       'linkorderid', 'ordercategoryname', 'secondaryordercategoryname',
       'ordercomponenttypedescription', 'ordercategorydescription',
       'patientweight', 'totalamount', 'totalamountuom', 'isopenbag',
       'continueinnextdept', 'cancelreason', 'statusdescription',
       'originalamount', 'originalrate'],
      dtype='object')

In [7]:
dat[['amount','rate']]

Unnamed: 0,amount,rate
0,180.000000,
1,240.000000,
2,120.000000,
3,60.000000,
4,100.000000,
...,...,...
9460653,1.000000,
9460654,200.000000,
9460655,984.999977,100.0
9460656,1.000000,


In [7]:
dat = pd.read_csv("./mimic-iv-1.0/icu/outputevents.csv.gz", compression='gzip', header=0)
dat.head()

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valueuom
0,10003700,28623837,30600691,2165-04-24 05:40:00,2165-04-24 05:44:00,226559,300.0,ml
1,10004235,24181354,34100191,2196-02-24 17:55:00,2196-02-24 17:55:00,226559,100.0,ml
2,10004235,24181354,34100191,2196-02-24 19:00:00,2196-02-24 19:43:00,226559,45.0,ml
3,10004235,24181354,34100191,2196-02-24 20:00:00,2196-02-24 20:02:00,226559,45.0,ml
4,10004235,24181354,34100191,2196-02-24 21:00:00,2196-02-24 20:56:00,226559,45.0,ml


In [10]:
dat = pd.read_csv("./mimic-iv-1.0/icu/procedureevents.csv.gz", compression='gzip', header=0)
dat.head()

Unnamed: 0,subject_id,hadm_id,stay_id,starttime,endtime,storetime,itemid,value,valueuom,location,...,patientweight,totalamount,totalamountuom,isopenbag,continueinnextdept,cancelreason,statusdescription,comments_date,originalamount,originalrate
0,15693895,21203492,30500789,2147-06-05 11:25:00,2147-06-05 11:26:00,2147-06-06 11:25:00,225399,1.0,,,...,80.0,,,0,0,0,FinishedRunning,,1.0,0
1,15693895,21203492,30500789,2147-06-05 22:00:00,2147-06-05 22:01:00,2147-06-06 11:25:00,224385,1.0,,,...,80.0,,,0,0,0,FinishedRunning,,1.0,0
2,12481680,26876606,30863119,2141-01-23 12:00:00,2141-01-23 18:33:00,2141-01-23 18:33:51.14,224275,393.0,min,,...,123.0,,,1,0,0,FinishedRunning,,393.0,1
3,12481680,26876606,30863119,2141-01-23 12:00:00,2141-01-23 18:33:00,2141-01-23 18:33:51.14,224275,393.0,min,,...,123.0,,,1,0,0,FinishedRunning,,393.0,1
4,15614172,27424463,33484414,2153-07-29 18:13:00,2153-07-31 01:25:00,2153-07-31 01:26:00,224277,1872.0,min,RL Post Forearm,...,71.6,,,1,1,0,FinishedRunning,,1872.0,1


In [None]:
dat = pd.read_csv("./mimic-iv-1.0/icu/chartevents.csv.gz", compression='gzip', header=0)
dat.head()

In [9]:
dat[dat['itemid']==225755]

Unnamed: 0,itemid,label,abbreviation,linksto,category,unitname,param_type,lownormalvalue,highnormalvalue
1152,225755,18 Gauge Insertion Date,18 Gauge Insertion Date,datetimeevents,Access Lines - Peripheral,,Date and time,,


In [7]:
dat['linksto'].unique()

array(['datetimeevents', 'chartevents', 'inputevents', 'procedureevents',
       'outputevents'], dtype=object)

In [8]:
dat[dat['linksto']=='inputevents']['itemid'].nunique()

474