# MIMIC 4 data - dataset construction inputevents

Code taken from GRU-ODE-Bayes preprocessing; simplified and adapted for MIMIC 4 1.0

In [1]:
import os
import pathlib

p_project = str(pathlib.Path(os.getcwd()).parents[1])

In [2]:
import pandas as pd
import numpy as np

In [3]:
pd.set_option('display.max_rows', 50)
pd.set_option('display.max_columns', 300)

In [4]:
path_data = p_project + '/data/original/mimic4'
path_temp = p_project + '/data/mimic4'

In [5]:
adm = pd.read_csv(path_temp + '/processed/tables/admissions_processed.csv')

In [6]:
outputs = pd.read_csv(path_data + '/icu/outputevents.csv.gz')
outputs.tail()

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valueuom
4457376,19999068,21606769,30143796,2161-08-30 12:00:00,2161-08-30 12:21:00,226559,180.0,ml
4457377,19999068,21606769,30143796,2161-08-30 14:00:00,2161-08-30 13:49:00,226559,320.0,ml
4457378,19999068,21606769,30143796,2161-08-30 15:00:00,2161-08-30 14:49:00,226559,120.0,ml
4457379,19999068,21606769,30143796,2161-08-30 18:00:00,2161-08-30 17:50:00,226559,400.0,ml
4457380,19999068,21606769,30143796,2161-08-30 23:00:00,2161-08-30 23:35:00,226559,250.0,ml


In [7]:
# only choose previously selected admission ids
adm_ids=list(adm['hadm_id'])
outputs=outputs.loc[outputs['hadm_id'].isin(adm_ids)]

print('Number of patients remaining in the database: ')
print(outputs['subject_id'].nunique())

Number of patients remaining in the database: 
43736


In [8]:
# get item names
item_id=pd.read_csv(path_data + '/icu/d_items.csv.gz')
item_id_1=item_id[['itemid','label']]
item_id_1.head()

outputs_2=pd.merge(outputs,item_id_1,on='itemid')
outputs_2.head()
print('Number of patients remaining in the database: ')
print(outputs_2['subject_id'].nunique())

Number of patients remaining in the database: 
43736


In [9]:
#Select entries
outputs_label_list=['Foley', 'Void', 'OR Urine', 'Chest Tube #1', 'Oral Gastric', 'Pre-Admission', 'TF Residual', 'OR EBL', 'Emesis', 'Nasogastric', 'Stool', 'Jackson Pratt #1', 'Straight Cath', 'TF Residual Output', 'Fecal Bag']
outputs_bis=outputs_2.loc[outputs_2['label'].isin(outputs_label_list)].copy()

print('Number of patients remaining in the database: ')
print(outputs_bis['subject_id'].nunique())
print('Number of datapoints remaining in the database: ')
print(len(outputs_bis.index))

outputs_3=outputs_bis.copy()

Number of patients remaining in the database: 
43571
Number of datapoints remaining in the database: 
3691000


In [10]:
# Verification that all input labels have the same amounts units
outputs_3.groupby('label')['valueuom'].value_counts() 

label               valueuom
Chest Tube #1       ml           261748
Emesis              ml             8247
Fecal Bag           ml            12446
Foley               ml          2896332
Jackson Pratt #1    ml            56642
Nasogastric         ml            28789
OR EBL              ml             9368
OR Urine            ml            17029
Oral Gastric        ml            24373
Pre-Admission       ml            11321
Stool               ml            14247
Straight Cath       ml             9997
TF Residual         ml            80015
TF Residual Output  ml             7854
Void                ml           252592
Name: valueuom, dtype: int64

In [11]:
outputs_3.to_csv(path_temp + '/processed/tables/outputs_processed.csv')