In [45]:
import pandas as pd
import numpy as np
import seaborn as sns
%matplotlib inline
import statistics
import matplotlib.pyplot as plt
import sqlite3 # library for working with sqlite database
conn = sqlite3.connect("./data/MIMIC.db") # Create a connection to the on-disk database

In [18]:
chartevents_life = pd.read_csv('./data/live_patient.csv')
chartevents_life.drop(columns='Unnamed: 0', inplace=True)
chartevents_dead = pd.read_csv('./data/dead_patient_merged.csv')


In [33]:
bad_rows = chartevents_dead['ROW_ID'] == 'ROW_ID'
chartevents_dead = chartevents_dead[~bad_rows]

In [41]:
chartevents_dead = chartevents_dead.apply(pd.to_numeric, errors='ignore')
chartevents_life = chartevents_life.apply(pd.to_numeric, errors='ignore')
chartevents_subset = pd.concat([chartevents_dead, chartevents_life])
chartevents_subset.to_csv('./subsets/chartevents_subset.csv')

In [56]:
uniques = chartevents_subset.HADM_ID.unique()
uniques.sort()
uniques

array([102390, 105348, 116543, 117382, 118776, 122406, 124326, 128770,
       129210, 133081, 133162, 135502, 144424, 150548, 150832, 151162,
       151408, 151881, 154543, 155418, 158150, 160370, 160709, 160781,
       163132, 163415, 165931, 166581, 174225, 174377, 175419, 175645,
       176453, 182705, 183200, 183330, 184273, 186199, 186278, 190494,
       192441, 194592, 196076])

In [71]:
admissions_subset = pd.read_sql("""SELECT *
                            FROM admissions
                            WHERE hadm_id IN (183200, 163132, 184273, 118776, 183330, 129210, 196076, 174225,
       182705, 186199, 133081, 160781, 160370, 151408, 122406, 150548,
       166581, 135502, 102390, 165931, 133162, 150832, 158150, 116543,
       151162, 175645, 105348, 186278, 160709, 190494, 194592, 117382,
       192441, 128770, 176453, 154543, 124326, 151881, 175419, 163415,
       144424, 174377, 155418)
                            """, conn)
admissions_subset.to_csv('./subsets/admissions_subset.csv')

In [72]:
cptevents_subset = pd.read_sql("""SELECT *
                            FROM cptevents
                            WHERE hadm_id IN (183200, 163132, 184273, 118776, 183330, 129210, 196076, 174225,
       182705, 186199, 133081, 160781, 160370, 151408, 122406, 150548,
       166581, 135502, 102390, 165931, 133162, 150832, 158150, 116543,
       151162, 175645, 105348, 186278, 160709, 190494, 194592, 117382,
       192441, 128770, 176453, 154543, 124326, 151881, 175419, 163415,
       144424, 174377, 155418)
                            """, conn)
cptevents_subset.to_csv('./subsets/cptevents_subset.csv')

In [73]:
datetimeevents_subset = pd.read_sql("""SELECT *
                            FROM datetimeevents
                            WHERE hadm_id IN (183200, 163132, 184273, 118776, 183330, 129210, 196076, 174225,
       182705, 186199, 133081, 160781, 160370, 151408, 122406, 150548,
       166581, 135502, 102390, 165931, 133162, 150832, 158150, 116543,
       151162, 175645, 105348, 186278, 160709, 190494, 194592, 117382,
       192441, 128770, 176453, 154543, 124326, 151881, 175419, 163415,
       144424, 174377, 155418)
                            """, conn)
datetimeevents_subset.to_csv('./subsets/datetime_subset.csv')

In [74]:
diagnoses_icd_subset = pd.read_sql("""SELECT *
                            FROM diagnoses_icd
                            WHERE hadm_id IN (183200, 163132, 184273, 118776, 183330, 129210, 196076, 174225,
       182705, 186199, 133081, 160781, 160370, 151408, 122406, 150548,
       166581, 135502, 102390, 165931, 133162, 150832, 158150, 116543,
       151162, 175645, 105348, 186278, 160709, 190494, 194592, 117382,
       192441, 128770, 176453, 154543, 124326, 151881, 175419, 163415,
       144424, 174377, 155418)
                            """, conn)
diagnoses_icd_subset.to_csv('./subsets/diagnoses_icd_subset.csv')

In [75]:
drgcodes_subset = pd.read_sql("""SELECT *
                            FROM drgcodes
                            WHERE hadm_id IN (183200, 163132, 184273, 118776, 183330, 129210, 196076, 174225,
       182705, 186199, 133081, 160781, 160370, 151408, 122406, 150548,
       166581, 135502, 102390, 165931, 133162, 150832, 158150, 116543,
       151162, 175645, 105348, 186278, 160709, 190494, 194592, 117382,
       192441, 128770, 176453, 154543, 124326, 151881, 175419, 163415,
       144424, 174377, 155418)
                            """, conn)
drgcodes_subset.to_csv('./subsets/drgcodes_subset.csv')

In [76]:
icustays_subset = pd.read_sql("""SELECT *
                            FROM icustays
                            WHERE hadm_id IN (183200, 163132, 184273, 118776, 183330, 129210, 196076, 174225,
       182705, 186199, 133081, 160781, 160370, 151408, 122406, 150548,
       166581, 135502, 102390, 165931, 133162, 150832, 158150, 116543,
       151162, 175645, 105348, 186278, 160709, 190494, 194592, 117382,
       192441, 128770, 176453, 154543, 124326, 151881, 175419, 163415,
       144424, 174377, 155418)
                            """, conn)
icustays_subset.to_csv('./subsets/icustays_subset.csv')

In [77]:
inputevents_cv_subset = pd.read_sql("""SELECT *
                            FROM inputevents_cv
                            WHERE hadm_id IN (183200, 163132, 184273, 118776, 183330, 129210, 196076, 174225,
       182705, 186199, 133081, 160781, 160370, 151408, 122406, 150548,
       166581, 135502, 102390, 165931, 133162, 150832, 158150, 116543,
       151162, 175645, 105348, 186278, 160709, 190494, 194592, 117382,
       192441, 128770, 176453, 154543, 124326, 151881, 175419, 163415,
       144424, 174377, 155418)
                            """, conn)
inputevents_cv_subset.to_csv('./subsets/inputevents_cv_subset.csv')

In [78]:
inputevents_mv_subset = pd.read_sql("""SELECT *
                            FROM inputevents_mv
                            WHERE hadm_id IN (183200, 163132, 184273, 118776, 183330, 129210, 196076, 174225,
       182705, 186199, 133081, 160781, 160370, 151408, 122406, 150548,
       166581, 135502, 102390, 165931, 133162, 150832, 158150, 116543,
       151162, 175645, 105348, 186278, 160709, 190494, 194592, 117382,
       192441, 128770, 176453, 154543, 124326, 151881, 175419, 163415,
       144424, 174377, 155418)
                            """, conn)
inputevents_mv_subset.to_csv('./subsets/inputevents_mv_subset.csv')

In [79]:
labevents_subset = pd.read_sql("""SELECT *
                            FROM labevents
                            WHERE hadm_id IN (183200, 163132, 184273, 118776, 183330, 129210, 196076, 174225,
       182705, 186199, 133081, 160781, 160370, 151408, 122406, 150548,
       166581, 135502, 102390, 165931, 133162, 150832, 158150, 116543,
       151162, 175645, 105348, 186278, 160709, 190494, 194592, 117382,
       192441, 128770, 176453, 154543, 124326, 151881, 175419, 163415,
       144424, 174377, 155418)
                            """, conn)
labevents_subset.to_csv('./subsets/labevents_subset.csv')

In [80]:
microbiologyevents_subset = pd.read_sql("""SELECT *
                            FROM microbiologyevents
                            WHERE hadm_id IN (183200, 163132, 184273, 118776, 183330, 129210, 196076, 174225,
       182705, 186199, 133081, 160781, 160370, 151408, 122406, 150548,
       166581, 135502, 102390, 165931, 133162, 150832, 158150, 116543,
       151162, 175645, 105348, 186278, 160709, 190494, 194592, 117382,
       192441, 128770, 176453, 154543, 124326, 151881, 175419, 163415,
       144424, 174377, 155418)
                            """, conn)
microbiologyevents_subset.to_csv('./subsets/microbiologyevents_subset.csv')

In [81]:
outputevents_subset = pd.read_sql("""SELECT *
                            FROM outputevents
                            WHERE hadm_id IN (183200, 163132, 184273, 118776, 183330, 129210, 196076, 174225,
       182705, 186199, 133081, 160781, 160370, 151408, 122406, 150548,
       166581, 135502, 102390, 165931, 133162, 150832, 158150, 116543,
       151162, 175645, 105348, 186278, 160709, 190494, 194592, 117382,
       192441, 128770, 176453, 154543, 124326, 151881, 175419, 163415,
       144424, 174377, 155418)
                            """, conn)
outputevents_subset.to_csv('./subsets/outputevents_subset.csv')

In [82]:
prescriptions_subset = pd.read_sql("""SELECT *
                            FROM prescriptions
                            WHERE hadm_id IN (183200, 163132, 184273, 118776, 183330, 129210, 196076, 174225,
       182705, 186199, 133081, 160781, 160370, 151408, 122406, 150548,
       166581, 135502, 102390, 165931, 133162, 150832, 158150, 116543,
       151162, 175645, 105348, 186278, 160709, 190494, 194592, 117382,
       192441, 128770, 176453, 154543, 124326, 151881, 175419, 163415,
       144424, 174377, 155418)
                            """, conn)
prescriptions_subset.to_csv('./subsets/prescriptions_subset.csv')

In [83]:
procedureevents_mv_subset = pd.read_sql("""SELECT *
                            FROM procedureevents_mv
                            WHERE hadm_id IN (183200, 163132, 184273, 118776, 183330, 129210, 196076, 174225,
       182705, 186199, 133081, 160781, 160370, 151408, 122406, 150548,
       166581, 135502, 102390, 165931, 133162, 150832, 158150, 116543,
       151162, 175645, 105348, 186278, 160709, 190494, 194592, 117382,
       192441, 128770, 176453, 154543, 124326, 151881, 175419, 163415,
       144424, 174377, 155418)
                            """, conn)
procedureevents_mv_subset.to_csv('./subsets/procedureevents_mv_subset.csv')

In [84]:
procedures_icd_subset = pd.read_sql("""SELECT *
                            FROM procedures_icd
                            WHERE hadm_id IN (183200, 163132, 184273, 118776, 183330, 129210, 196076, 174225,
       182705, 186199, 133081, 160781, 160370, 151408, 122406, 150548,
       166581, 135502, 102390, 165931, 133162, 150832, 158150, 116543,
       151162, 175645, 105348, 186278, 160709, 190494, 194592, 117382,
       192441, 128770, 176453, 154543, 124326, 151881, 175419, 163415,
       144424, 174377, 155418)
                            """, conn)
procedures_icd_subset.to_csv('./subsets/procedures_icd_subset.csv')