In [1]:
import os
import pickle
os.chdir('../../')
from utils.hosp_preprocess_util import *    # module of preprocessing functions

In [2]:
# Custom function for preprocessing tables with ICD codes
# Takes a path to the diagnoses_icd table, a gzipped admissions-cohort table path, and an optional mapping table path
# Output shows codes that were unable to be mapped by the mapping table
diag = preproc_icd_module("./mimic-iv-1.0/hosp/diagnoses_icd.csv.gz", './data/cohort/cohort_non-icu_30_day_readmission.csv.gz', './utils/mappings/ICD9_to_ICD10_mapping.txt', map_code_colname='diagnosis_code')
diag[['subject_id', 'hadm_id', 'icd_code','root_icd10_convert','root']].to_csv("./data/features/preproc_diag.csv.gz", compression='gzip', index=False)

100%|██████████| 9134/9134 [00:41<00:00, 222.18it/s]


# unique ICD-9 codes 9134
# unique ICD-10 codes 16924
# unique ICD-10 codes (After converting ICD-9 to ICD-10) 17126
# unique ICD-10 codes (After clinical gruping ICD-10 codes) 1695


In [2]:
proc = preproc_proc("./mimic-iv-1.0/hosp/procedures_icd.csv.gz", './data/cohort/cohort_non-icu_30_day_readmission.csv.gz', 'chartdate', 'base_anchor_year', dtypes=None, usecols=None)
proc[['subject_id', 'hadm_id', 'icd_code', 'chartdate', 'admittime', 'proc_time_from_admit']].dropna().to_csv("./data/features/preproc_proc.csv.gz", compression='gzip', index=False)
#proc.head()

# Unique ICD9 Procedures:   2560
# Unique ICD10 Procedures:  10203

Value counts of each ICD version:
 9     463096
10    224075
Name: icd_version, dtype: int64


Unnamed: 0,subject_id,hadm_id,seq_num,chartdate,icd_code,icd_version,admittime,dischtime,proc_time_from_admit
0,10287061,27485182,1,2124-08-22,7569,9,2124-08-21 16:23:00,2124-08-24 13:00:00,07:37:00
1,13135573,22286210,3,2187-05-30,7309,9,2187-05-29 16:04:00,2187-05-31 12:40:00,07:56:00
2,13135573,22286210,1,2187-05-30,7569,9,2187-05-29 16:04:00,2187-05-31 12:40:00,07:56:00
3,13135573,22286210,2,2187-05-30,734,9,2187-05-29 16:04:00,2187-05-31 12:40:00,07:56:00
4,12646116,24154012,2,2115-01-13,734,9,2115-01-12 17:23:00,2115-01-17 13:40:00,06:37:00


In [None]:
med = preproc_meds("./mimic-iv-1.0/hosp/prescriptions.csv.gz", './data/cohort/cohort_non-icu_30_day_readmission.csv.gz','./utils/mappings/ndc_product.txt')
med[['subject_id', 'hadm_id', 'starttime','stoptime','drug','nonproprietaryname', 'start_hours_from_admit', 'stop_hours_from_admit']].to_csv('./data/features/preproc_med.csv.gz', compression='gzip', index=False)

In [None]:
diag = pd.read_csv("./data/features/preproc_diag.csv.gz", compression='gzip',header=0)
if(icd_orig):
    diag['new_icd_code']=diag['icd_code']
if(icd_convert):
    diag['new_icd_code']=diag['root_icd10_convert']
if(icd_group):
    diag['new_icd_code']=diag['root']

diag[['subject_id', 'hadm_id', 'new_icd_code']].dropna().to_csv("./data/features/preproc_diag.csv.gz", compression='gzip', index=False)

In [None]:
proc = pd.read_csv("./data/features/preproc_proc.csv.gz", compression='gzip',header=0)
if(proc_icd910):
    proc[['subject_id', 'hadm_id', 'icd_code', 'chartdate', 'admittime', 'proc_time_from_admit']].dropna().to_csv("./data/features/preproc_proc.csv.gz", compression='gzip', index=False)
elif(proc_icd10):
    proc.loc[proc.icd_version == 10][['subject_id', 'hadm_id', 'icd_code', 'chartdate', 'admittime', 'proc_time_from_admit']].dropna().to_csv("./data/features/preproc_proc.csv.gz", compression='gzip', index=False)

In [None]:
med = pd.read_csv("./data/features/preproc_med.csv.gz", compression='gzip',header=0)
if med_group:
    med['drug_name']=med['nonproprietaryname']
if med_group:
    med['drug_name']=med['drug']
med[['subject_id', 'hadm_id', 'starttime','stoptime','drug_name', 'start_hours_from_admit', 'stop_hours_from_admit']].dropna().to_csv('./data/features/preproc_med.csv.gz', compression='gzip', index=False)

In [None]:
dat = pd.read_csv("./mimic-iv-1.0/icu/chartevents.csv.gz", compression='gzip', header=0)
dat.head()

In [4]:
dat = pd.read_csv("./mimic-iv-1.0/icu/d_items.csv.gz", compression='gzip', header=0)
dat.head()

Unnamed: 0,itemid,label,abbreviation,linksto,category,unitname,param_type,lownormalvalue,highnormalvalue
0,220003,ICU Admission date,ICU Admission date,datetimeevents,ADT,,Date and time,,
1,220045,Heart Rate,HR,chartevents,Routine Vital Signs,bpm,Numeric,,
2,220046,Heart rate Alarm - High,HR Alarm - High,chartevents,Alarms,bpm,Numeric,,
3,220047,Heart Rate Alarm - Low,HR Alarm - Low,chartevents,Alarms,bpm,Numeric,,
4,220048,Heart Rhythm,Heart Rhythm,chartevents,Routine Vital Signs,,Text,,


In [5]:
dat = pd.read_csv("./mimic-iv-1.0/icu/datetimeevents.csv.gz", compression='gzip', header=0)
dat.head()

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valueuom,warning
0,10003700,28623837,30600691,2165-04-24 05:42:00,2165-04-24 05:42:00,225755,2165-04-24 05:42:00,Date,0
1,10003700,28623837,30600691,2165-04-24 08:00:00,2165-04-24 08:26:00,225755,2165-04-24 00:00:00,Date,0
2,10004235,24181354,34100191,2196-02-24 18:06:00,2196-02-24 18:07:00,224261,2196-02-24 18:06:00,Date,0
3,10004235,24181354,34100191,2196-02-24 18:06:00,2196-02-24 18:07:00,224279,2196-02-24 18:06:00,Date and Time,0
4,10004235,24181354,34100191,2196-02-24 18:06:00,2196-02-24 18:07:00,224280,2196-02-24 18:06:00,Date,0


In [6]:
dat = pd.read_csv("./mimic-iv-1.0/icu/inputevents.csv.gz", compression='gzip', header=0)
dat.head()

Unnamed: 0,subject_id,hadm_id,stay_id,starttime,endtime,storetime,itemid,amount,amountuom,rate,...,ordercategorydescription,patientweight,totalamount,totalamountuom,isopenbag,continueinnextdept,cancelreason,statusdescription,originalamount,originalrate
0,12481680,26876606,30863119,2141-01-23 17:37:00,2141-01-23 17:38:00,2141-01-23 17:37:00,226452,180.0,ml,,...,Bolus,123.0,180.0,ml,0,0,0,FinishedRunning,180.0,180.0
1,12481680,26876606,30863119,2141-01-23 13:00:00,2141-01-23 13:01:00,2141-01-23 13:23:00,226452,240.0,ml,,...,Bolus,123.0,240.0,ml,0,0,0,FinishedRunning,240.0,240.0
2,12481680,26876606,30863119,2141-01-23 11:00:00,2141-01-23 11:01:00,2141-01-23 12:24:00,226452,120.0,ml,,...,Bolus,123.0,120.0,ml,0,0,0,FinishedRunning,120.0,120.0
3,12481680,26876606,30863119,2141-01-23 14:16:00,2141-01-23 14:17:00,2141-01-23 14:17:00,226452,60.0,ml,,...,Bolus,123.0,60.0,ml,0,0,0,FinishedRunning,60.0,60.0
4,15614172,27424463,33484414,2153-07-30 23:16:00,2153-07-30 23:17:00,2153-07-30 23:16:00,226452,100.0,ml,,...,Bolus,71.6,100.0,ml,0,0,0,FinishedRunning,100.0,100.0


In [7]:
dat = pd.read_csv("./mimic-iv-1.0/icu/outputevents.csv.gz", compression='gzip', header=0)
dat.head()

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valueuom
0,10003700,28623837,30600691,2165-04-24 05:40:00,2165-04-24 05:44:00,226559,300.0,ml
1,10004235,24181354,34100191,2196-02-24 17:55:00,2196-02-24 17:55:00,226559,100.0,ml
2,10004235,24181354,34100191,2196-02-24 19:00:00,2196-02-24 19:43:00,226559,45.0,ml
3,10004235,24181354,34100191,2196-02-24 20:00:00,2196-02-24 20:02:00,226559,45.0,ml
4,10004235,24181354,34100191,2196-02-24 21:00:00,2196-02-24 20:56:00,226559,45.0,ml


In [8]:
dat = pd.read_csv("./mimic-iv-1.0/icu/procedureevents.csv.gz", compression='gzip', header=0)
dat.head()

Unnamed: 0,subject_id,hadm_id,stay_id,starttime,endtime,storetime,itemid,value,valueuom,location,...,patientweight,totalamount,totalamountuom,isopenbag,continueinnextdept,cancelreason,statusdescription,comments_date,originalamount,originalrate
0,15693895,21203492,30500789,2147-06-05 11:25:00,2147-06-05 11:26:00,2147-06-06 11:25:00,225399,1.0,,,...,80.0,,,0,0,0,FinishedRunning,,1.0,0
1,15693895,21203492,30500789,2147-06-05 22:00:00,2147-06-05 22:01:00,2147-06-06 11:25:00,224385,1.0,,,...,80.0,,,0,0,0,FinishedRunning,,1.0,0
2,12481680,26876606,30863119,2141-01-23 12:00:00,2141-01-23 18:33:00,2141-01-23 18:33:51.14,224275,393.0,min,,...,123.0,,,1,0,0,FinishedRunning,,393.0,1
3,12481680,26876606,30863119,2141-01-23 12:00:00,2141-01-23 18:33:00,2141-01-23 18:33:51.14,224275,393.0,min,,...,123.0,,,1,0,0,FinishedRunning,,393.0,1
4,15614172,27424463,33484414,2153-07-29 18:13:00,2153-07-31 01:25:00,2153-07-31 01:26:00,224277,1872.0,min,RL Post Forearm,...,71.6,,,1,1,0,FinishedRunning,,1872.0,1
