In [6]:
from collections import OrderedDict

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
# !pip install tableone
from tableone import TableOne
# !pip install psycopg2-binary
import psycopg2

In [23]:
# create a database connection
sqluser = 'asem'
dbname = 'mimiciv'
hostname = 'localhost'
password='qwerasdf'
hosp_schema_name = 'mimiciv_hosp'

# Connect to local postgres version of mimic
con = psycopg2.connect(dbname=dbname, user=sqluser, host=hostname, password=password)
cur = con.cursor()
cur.execute('SET search_path to {}'.format(hosp_schema_name))


In [30]:
query = \
"""
SELECT
      pat.subject_id
    , adm.hadm_id
    , DENSE_RANK() OVER hadm_window AS hosp_stay_num
    , CASE
        WHEN FIRST_VALUE(adm.hadm_id) OVER hadm_window = adm.hadm_id THEN 1
        ELSE 0
      END AS pat_count
    , pat.anchor_age + (EXTRACT(YEAR FROM adm.admittime) - pat.anchor_year) AS age
    , pat.gender
    , adm.insurance
    , mimiciv_derived.DATETIME_DIFF(adm.dischtime, adm.admittime, 'HOUR') / 24 AS hosp_los
    , pat.dod
    , mimiciv_derived.DATETIME_DIFF(pat.dod, CAST(adm.dischtime AS DATE), 'DAY') AS days_to_death
    -- mortality flags
    , CASE WHEN mimiciv_derived.DATETIME_DIFF(pat.dod, CAST(adm.dischtime AS DATE), 'DAY') = 0 THEN 1 ELSE 0 END AS hospital_mortality
FROM mimiciv_hosp.patients pat
INNER JOIN mimiciv_hosp.admissions adm
    ON pat.subject_id = adm.subject_id
WINDOW hadm_window AS (PARTITION BY pat.subject_id ORDER BY adm.admittime)
"""

hosp = pd.read_sql_query(query,con)

In [31]:

# add 1 year mortality
hosp['one_year_mortality'] = hosp['days_to_death'].notnull().astype(int)

# create a dataframe with the days to death for only the last ICU stay
last_dod = hosp.groupby('subject_id')[['hosp_stay_num']].max().reset_index()
last_dod = last_dod.merge(hosp[['subject_id', 'hosp_stay_num', 'days_to_death']], on=['subject_id', 'hosp_stay_num'], how='inner')
last_dod.rename(columns={'days_to_death': 'days_to_death_last_stay_id'}, inplace=True)

hosp = hosp.merge(last_dod, how='left', on=['subject_id', 'hosp_stay_num'])
del last_dod
hosp.sort_values(['subject_id', 'hosp_stay_num'], inplace=True)

# fix some data type issues
int_cols = hosp.dtypes.values=="Int64"
hosp.loc[:, int_cols] = hosp.loc[:, int_cols].astype(float)
hosp.loc[:, int_cols] = hosp.loc[:, int_cols].astype(int, errors="ignore")


In [33]:
query = \
"""
SELECT
      pat.subject_id
    , adm.hadm_id
    , icu.stay_id
    , ROW_NUMBER() OVER (PARTITION BY pat.subject_id ORDER BY icu.intime) AS icu_stay_num
    , DENSE_RANK() OVER (PARTITION BY pat.subject_id ORDER BY adm.admittime) AS hosp_stay_num
    , CASE
        WHEN FIRST_VALUE(icu.stay_id) OVER icustay_window = icu.stay_id THEN 1
        ELSE 0
      END AS pat_count
    , pat.anchor_age + (EXTRACT(YEAR FROM icu.intime) - pat.anchor_year) AS age
    , pat.gender
    , adm.insurance
    , icu.first_careunit
    , icu.los AS icu_los
    , mimiciv_derived.DATETIME_DIFF(adm.dischtime, adm.admittime, 'HOUR') / 24 AS hosp_los
    , pat.dod
    , mimiciv_derived.DATETIME_DIFF(pat.dod, CAST(adm.dischtime AS DATE), 'DAY') AS days_to_death
    -- mortality flags
    , CASE WHEN mimiciv_derived.DATETIME_DIFF(pat.dod, CAST(adm.dischtime AS DATE), 'DAY') = 0 THEN 1 ELSE 0 END AS hospital_mortality
    , CASE WHEN mimiciv_derived.DATETIME_DIFF(pat.dod, CAST(icu.outtime AS DATE), 'DAY') = 0 THEN 1 ELSE 0 END AS icu_mortality
FROM mimiciv_hosp.patients pat
INNER JOIN mimiciv_hosp.admissions adm
    ON pat.subject_id = adm.subject_id
INNER JOIN mimiciv_icu.icustays icu
    ON adm.hadm_id = icu.hadm_id
WINDOW hadm_window AS (PARTITION BY pat.subject_id ORDER BY adm.admittime)
     , icustay_window AS (PARTITION BY pat.subject_id ORDER BY icu.intime)
"""

data = pd.read_sql_query(query,con)

In [34]:
# add 1 year mortality
data['one_year_mortality'] = data['days_to_death'].notnull().astype(int)

# create a dataframe with the days to death for only the last ICU stay
last_dod = data.groupby('subject_id')[['icu_stay_num']].max().reset_index()
last_dod = last_dod.merge(data[['subject_id', 'icu_stay_num', 'days_to_death']], on=['subject_id', 'icu_stay_num'], how='inner')
last_dod.rename(columns={'days_to_death': 'days_to_death_last_stay_id'}, inplace=True)

data = data.merge(last_dod, how='left', on=['subject_id', 'icu_stay_num'])
del last_dod
data.sort_values(['subject_id', 'icu_stay_num'], inplace=True)

# add a grouping variable for table one so we can have hospital mortality as a group and a row
data['hosp_mort'] = data['hospital_mortality']

# fix some data type issues
int_cols = data.dtypes.values=="Int64"
data.loc[:, int_cols] = data.loc[:, int_cols].astype(float)
data.loc[:, int_cols] = data.loc[:, int_cols].astype(int, errors="ignore")

In [35]:


columns = [
    "pat_count",
    # , "hadm_count",
    "age", "gender", "insurance",
    # "first_careunit",
    # "icu_los",
    "hosp_los",
    # 'icu_mortality',
    'hospital_mortality',
    "one_year_mortality",
    # "days_to_death_last_stay_id"
]

categorical = [
    "pat_count",
    # "hadm_count",
    "gender", "insurance",
    # "first_careunit",
    # mortality flags
    # 'icu_mortality',
    'hospital_mortality',
    'one_year_mortality',
]

order = {
    "pat_count": [1, 0],
    "hadm_count": [1, 0],
    "gender": ["F", "M"],
    # "icu_mortality": [1, 0],
    "hospital_mortality": [1, 0],
    "one_year_mortality": [1, 0],
}

limit = {
    "pat_count": 1, "hadm_count": 1,
    "gender": 1,
    # "icu_mortality": 1,
    "hospital_mortality": 1,
    "one_year_mortality": 1,
}

rename = {
    "pat_count": "Distinct patients", "hadm_count": "Distinct hospitalizations",
    "age": "Age", "gender": "Administrative Gender", "insurance": "Insurance",
    "first_careunit": "First ICU stay, unit type",
    "icu_los": "ICU length of stay", "hosp_los": "Hospital length of stay",
    "icu_mortality": "In-ICU mortality",
    "hospital_mortality": "In-hospital mortality",
    "one_year_mortality": "One year mortality",
    # "days_to_death_last_stay_id": "Time to death (days)",
}

print('ICU demographics')
icu_table = TableOne(data, columns=columns, categorical=categorical, order=order, limit=limit, rename=rename)
display(icu_table)
print('Hospital demographics')
hosp_table = TableOne(hosp, columns=columns, categorical=categorical, order=order, limit=limit, rename=rename)
display(hosp_table)



ICU demographics




Unnamed: 0,Unnamed: 1,Missing,Overall
n,,,73181
"Distinct patients, n (%)",1,0.0,50920 (69.6)
"Age, mean (SD)",,0.0,64.7 (16.9)
"Administrative Gender, n (%)",F,0.0,32363 (44.2)
"Insurance, n (%)",Medicaid,0.0,5528 (7.6)
"Insurance, n (%)",Medicare,,33091 (45.2)
"Insurance, n (%)",Other,,34562 (47.2)
"Hospital length of stay, mean (SD)",,0.0,11.0 (13.3)
"In-hospital mortality, n (%)",1,0.0,8511 (11.6)
"One year mortality, n (%)",1,0.0,28274 (38.6)


Hospital demographics




Unnamed: 0,Unnamed: 1,Missing,Overall
n,,,431231
"Distinct patients, n (%)",1,0.0,180733 (41.9)
"Age, mean (SD)",,0.0,58.8 (19.2)
"Administrative Gender, n (%)",F,0.0,224990 (52.2)
"Insurance, n (%)",Medicaid,0.0,41330 (9.6)
"Insurance, n (%)",Medicare,,160560 (37.2)
"Insurance, n (%)",Other,,229341 (53.2)
"Hospital length of stay, mean (SD)",,0.0,4.5 (6.6)
"In-hospital mortality, n (%)",1,0.0,8869 (2.1)
"One year mortality, n (%)",1,0.0,106218 (24.6)
