--------

--------

### Extracting and preprocessing ventilated cohort from MIMIC database:

--------

In [1]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import os, sys, pickle, json, time, math, glob

sys.path.append('../utils')
import mimicConcepts as mc
import dataPreparation as dp

In [20]:
filename='../processed_data/allTables.pkl'

if os.path.isfile(filename):
    (adms_df, vent_df, measures_df, seds_df, sbt_df) = pickle.load(open(filename, 'rb'))
else:
    adms_df, vent_df, measures_df, seds_df, sbt_df = dp.generateTables(save=True, savepath=filename)
    
hadms = (list(set(adms_df.hadm.unique()) & set(measures_df.hadm.unique()) & set(seds_df.hadm.unique())))
print "Number of unique hospital admissions:", len(hadms)

Number of unique hospital admissions: 7191


In [9]:
adms_df.head(2)

Unnamed: 0,subject,hadm,icustay,age,gender,ethnicity,admittype,diagnosis,first_careunit,admit_time,icu_admit,icu_discharge,discharge_time,icu_los,adm_los,h_exp,exp
14,61691,109307,200021,60.9,M,WHITE,EMERGENCY,ALTERED MENTAL STATUS,SICU,2114-12-26 19:44:00,2114-12-26 19:45:12,2114-12-27 22:46:28,2114-12-28 18:30:00,1.13,1.95,0,0
18,41710,181955,200028,64.9,M,WHITE,ELECTIVE,FIDELIS LEAD FRACTURE\IMPLANTABLE CARDIOVERER ...,CCU,2133-10-29 10:00:00,2133-10-29 17:13:50,2133-11-01 14:55:14,2133-11-01 14:54:00,2.9,3.2,0,0


In [6]:
vent_df.head(2)

Unnamed: 0,subject,hadm,icu_stay,vent_starttime,vent_endtime
0,61691,109307,200021.0,2114-12-26 20:45:00,2114-12-27 11:06:00
2,41710,181955,200028.0,2133-10-29 17:32:00,2133-10-30 09:03:00


In [7]:
measures_df.head(2)

Unnamed: 0,subject,hadm,icustay,itemid,label,unit,charttime,value
0,36,165660,241249,220235,Arterial CO2 Pressure,mmHg,2134-05-12 12:35:00,60.0
1,36,165660,241249,220235,Arterial CO2 Pressure,mmHg,2134-05-12 15:22:00,75.0


In [10]:
seds_df.head(2)

Unnamed: 0,subject,hadm,icustay,diagnosis,item,label,ordercat,input_start,input_end,amount,amountuom,rate,rateuom,ptweight,totalamount,totalamountuom
0,36,165660,241249,VENTRAL HERNIA/SDA,221833,Hydromorphone (Dilaudid),05-Med Bolus,2134-05-12 07:00:00,2134-05-12 07:01:00,0.5,mg,,,106.2,,
11,36,165660,241249,VENTRAL HERNIA/SDA,221668,Midazolam (Versed),05-Med Bolus,2134-05-12 16:00:00,2134-05-12 16:01:00,2.0,mg,,,106.2,,


In [11]:
sbt_df.head(2)

Unnamed: 0,subject,hadm,icustay,itemid,label,unit,charttime,value
0,533,100009,253656,224717,SBT Successfully Completed,,2162-05-17 22:00:00,
1,605,115545,248569,224715,SBT Started,,2197-11-14 07:25:00,


--------

--------

### Consolidating tables into per-admission dataframes:

--------

In [22]:
h = hadms[0]
frame_h = dp.getChartFrame(h, adms_df, vent_df, measures_df, seds_df, sbt_df)
frame_h.head()

Unnamed: 0,timestamp,hadm,firstICU,subject,Admittype,Admdays,Ethnicity,Gender,Age,Admission Weight (Kg),...,Propofol,Fentanyl (Concentrate),Midazolam (Versed),Fentanyl,Dexmedetomidine (Precedex),Morphine Sulfate,Hydromorphone (Dilaudid),Lorazepam (Ativan),Vented,SBT
0,2161-10-11 12:00:00,196611,290648,27800,1,8.18,0,0,65.4,93.0,...,0,0.03,1.0,0,0,0.0,0,0.0,0,
1,2161-10-11 13:00:00,196611,290648,27800,1,8.18,0,0,65.4,93.0,...,0,0.03,1.0,0,0,0.0,0,0.0,1,
2,2161-10-11 14:00:00,196611,290648,27800,1,8.18,0,0,65.4,93.0,...,0,0.01,0.0,0,0,0.0,0,0.0,1,
3,2161-10-11 15:00:00,196611,290648,27800,1,8.18,0,0,65.4,93.0,...,0,0.0,0.0,0,0,0.0,0,0.0,1,
4,2161-10-11 16:00:00,196611,290648,27800,1,8.18,0,0,65.4,93.0,...,0,0.0,0.0,0,0,0.0,0,0.0,1,


In [65]:
%run ../utils/buildScripts.py

-------- Creating hadms lists --------
Total # IDs: 7191
# Partitions: 200 Size: 36
-------- Creating slurm script --------
-------- Creating submit script --------


In [63]:
filename = "../processed_data/allFrames.pkl"
if os.path.isfile(filename):
    allFrames = pickle.load(open(filename, 'rb'))
else:
    # Process frames in parallel:
    #!../utils/submit.sh
    allFrames = {}
    for f in glob.glob("../processed_data/h_frames/*.pkl"):
        allFrames.update(pickle.load(open(f, 'rb')))
        pickle.dump(allFrames, open(filename, 'wb'))
print "After processing, # Admissions:", len(allFrames.keys())

After processing, # Admissions: 7053
