# Exports raw data from mimic-iv database

Only the following care unit patients are exported:
- Coronary Care unit (CCU)
- Cardiac Vascular Intensive Care unit (CVICU)

In [1]:
from typing import Tuple
from tqdm import tqdm
from multiprocessing import Pool, RLock
from configobj import ConfigObj
import numpy as np
import getpass
import json
import math
import os
import psycopg2
import pandas as pd
import time

import matplotlib.pyplot as plt
%matplotlib inline

from projects.common import *
from projects.utils import *


In [4]:
db_dir = os.path.abspath('') + "/../../../db"

(query_schema_core,
 query_schema_hosp,
 query_schema_icu,
 query_schema_derived,
 conn) = connect_to_database(db_dir)


Table for icustays:  
['subject_id', 'hadm_id', 'stay_id', 'first_careunit', 'last_careunit', 'intime', 'outtime', 'los'] 

Table for transfers:  
['subject_id', 'hadm_id', 'transfer_id', 'eventtype', 'careunit', 'intime', 'outtime'] 

Table for patients:  
['subject_id', 'gender', 'anchor_age', 'anchor_year', 'anchor_year_group', 'dod']

Table for admissions:  
['subject_id', 'hadm_id', 'admittime', 'dischtime', 'deathtime', 'admission_type', 'admission_location', 'discharge_location', 'insurance', 'language', 'marital_status', 'ethnicity', 'edregtime', 'edouttime', 'hospital_expire_flag']

In [3]:
patients_df = get_database_table_as_dataframe(
    conn, query_schema_core, 'patients')
admissions_df = get_database_table_as_dataframe(
    conn, query_schema_core, 'admissions')
transfers_df = get_database_table_as_dataframe(
    conn, query_schema_core, 'transfers').sort_values(by=['intime', 'outtime'])
icustays_df = get_database_table_as_dataframe(
    conn, query_schema_icu, 'icustays').sort_values(by=['intime', 'outtime'])

assert len(patients_df.to_numpy()[:, 0]) == len(
    np.unique(patients_df.to_numpy()[:, 0]))
assert len(admissions_df.to_numpy()[:, 1]) == len(
    np.unique(admissions_df.to_numpy()[:, 1]))
assert len(icustays_df.to_numpy()[:, 2]) == len(
    np.unique(icustays_df.to_numpy()[:, 2]))

patients_list = patients_df['subject_id'].tolist()
admissions_list = admissions_df['hadm_id'].tolist()


Getting patients data
Number of entries for patients : 382278
Column names : ['subject_id', 'gender', 'anchor_age', 'anchor_year', 'anchor_year_group', 'dod']

Getting admissions data
Number of entries for admissions : 523740
Column names : ['subject_id', 'hadm_id', 'admittime', 'dischtime', 'deathtime', 'admission_type', 'admission_location', 'discharge_location', 'insurance', 'language', 'marital_status', 'ethnicity', 'edregtime', 'edouttime', 'hospital_expire_flag']

Getting transfers data


KeyboardInterrupt: 

In [None]:
_CAREUNITS = ['Coronary Care Unit (CCU)',
              'Cardiac Vascular Intensive Care Unit (CVICU)']

cu1 = get_id_list(conn, query_schema_icu, 'icustays', 'stay_id', 'first_careunit', tuple(_CAREUNITS))
cu2 = get_id_list(conn, query_schema_icu, 'icustays', 'stay_id', 'last_careunit', tuple(_CAREUNITS))
custom_icustays_list = list(set(cu1 + cu2))

custom_icustays_dict = dict()
for i in icustays_df.iterrows():
    j = i[1]
    if j['first_careunit'] in _CAREUNITS or j['last_careunit'] in _CAREUNITS:
        if j['subject_id'] in custom_icustays_dict:
            if j['hadm_id'] in custom_icustays_dict[j['subject_id']]:
                custom_icustays_dict[j['subject_id']][j['hadm_id']] += [j['stay_id']]  # noqa
            else:
                custom_icustays_dict[j['subject_id']][j['hadm_id']] = [j['stay_id']]  # noqa
        else:
            custom_icustays_dict[j['subject_id']] = {j['hadm_id']: [j['stay_id']]}  # noqa


# Export patient info.


In [None]:
data_entry_list = \
    get_database_table_column_name(conn, 'icustay_detail') + \
    ['first_careunit', 'last_careunit'] + \
    get_database_table_column_name(conn, 'charlson')[2:] + \
    ['icd'] + \
    get_database_table_column_name(conn, 'creatinine_baseline')[3:]

print("Entries that are saved as patient info:", data_entry_list)

uid_info = {idx: name for idx, name in enumerate(data_entry_list)}
uid_info_path = os.path.abspath('') + "/../../../" + UID_INFO_PATH
os.remove(uid_info_path)
with open(uid_info_path, 'w+') as f:
    json.dump(uid_info, f)


Entries that are saved as patient info: ['subject_id', 'hadm_id', 'stay_id', 'gender', 'dod', 'admittime', 'dischtime', 'los_hospital', 'admission_age', 'ethnicity', 'hospital_expire_flag', 'hospstay_seq', 'first_hosp_stay', 'icu_intime', 'icu_outtime', 'los_icu', 'icustay_seq', 'first_icu_stay', 'first_careunit', 'last_careunit', 'age_score', 'myocardial_infarct', 'congestive_heart_failure', 'peripheral_vascular_disease', 'cerebrovascular_disease', 'dementia', 'chronic_pulmonary_disease', 'rheumatic_disease', 'peptic_ulcer_disease', 'mild_liver_disease', 'diabetes_without_cc', 'diabetes_with_cc', 'paraplegia', 'renal_disease', 'malignant_cancer', 'severe_liver_disease', 'metastatic_solid_tumor', 'aids', 'charlson_comorbidity_index', 'icd', 'scr_min', 'ckd', 'mdrd_est', 'scr_baseline']


In [None]:
df = get_database_table_as_dataframe(conn, query_schema_derived, 'icustay_detail')
charlson_df = get_database_table_as_dataframe(conn, query_schema_derived, 'charlson')
diag_icd_df = get_database_table_as_dataframe(conn, query_schema_hosp, 'diagnoses_icd')
cr_base_df = get_database_table_as_dataframe(conn, query_schema_derived, 'creatinine_baseline')

it = InfoTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['stay_id'] in custom_icustays_list:

        it.data = load_info_dsv(STRUCTURED_EXPORT_DIR, df_row['stay_id'])

        # 1. icustay_detail
        c = 0
        for i, j in zip(uid_info, df_row):
            c = i
            it.append(uid=i, value=j)

        # 2. icustay
        c += 1
        j = icustays_df.loc[icustays_df['stay_id'] ==
                            df_row['stay_id']]['first_careunit'].item()
        it.append(uid=c, value=j)

        c += 1
        j = icustays_df.loc[icustays_df['stay_id'] ==
                            df_row['stay_id']]['last_careunit'].item()
        it.append(uid=c, value=j)

        # 3. charlson - Calculated
        for i in charlson_df.columns.tolist()[2:]:
            c += 1
            j = charlson_df.loc[charlson_df['hadm_id']
                                == it['hadm_id']][i].item()
            it.append(uid=c, value=j)

        # 4. diagnoses_icd
        c += 1
        cond = (diag_icd_df['subject_id'] == uid_info[0]) & \
            (diag_icd_df['hadm_id'] == uid_info[1])
        icd_df = diag_icd_df.loc[cond][['seq_num', 'icd_code', 'icd_version']]
        icd_df = icd_df.sort_values('seq_num')
        j = None
        for i in icd_df.iterrows():
            icd_version = i[1]['icd_version']
            icd_code = i[1]['icd_code'].replace(' ', '')
            if icd_version == 9:
                if icd_code[0] == 'E':
                    icd_code = icd_code[:4] + '.' + icd_code[4:]
                else:
                    icd_code = icd_code[:3] + '.' + icd_code[3:]
            elif icd_version == 10:
                icd_code = icd_code[:3] + '.' + icd_code[3:]
            else:
                raise ValueError("Unknown ICD code")
            j = j + ',' if j is not None else ''
            j = f"{j}{icd_version:02d}-{icd_code}"
        it.append(uid=c, value=j)

        # 5. creatinine_baseline - Calculated
        for i in cr_base_df.columns.tolist()[3:]:
            c += 1
            j = cr_base_df.loc[cr_base_df['hadm_id']
                               == it['hadm_id']][i].item()
            it.append(uid=c, value=j)

        save_info_dsv(STRUCTURED_EXPORT_DIR, df_row['stay_id'], pd.DataFrame(it.data))


del df, charlson_df, diag_icd_df, cr_base_df


# Export data info.


In [None]:
labitems = pd.read_csv("../../../"+LAB_ITEM_PATH, sep='\t', header=0)
labitems.fillna('None')


Create dummy .dsv files.

In [None]:
for icustay_id in tqdm(custom_icustays_list):
    save_data_dsv(STRUCTURED_EXPORT_DIR, icustay_id, pd.DataFrame(DataTable().data))

print("Created dummy .dsv files.")


In [None]:
# ['subject_id', 'stay_id', 'charttime', 'height']
df = get_database_table_as_dataframe(conn, query_schema_derived, 'height')

dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['stay_id'] in custom_icustays_list:
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, df_row['stay_id'])
        dt.append(
            uid=100001,
            value=df_row['height'],
            unit='cm',
            rate=None,
            rate_unit=None,
            category=None,
            specimen_id=None,
            starttime=df_row['charttime'],
            endtime=None,
        )
        save_data_dsv(STRUCTURED_EXPORT_DIR,
                      df_row['stay_id'], pd.DataFrame(dt.data))

print("Added height entries.")


In [None]:
# ['stay_id', 'starttime', 'endtime', 'weight', 'weight_type']
df = get_database_table_as_dataframe(
    conn, query_schema_derived, 'weight_durations')

dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['stay_id'] in custom_icustays_list:
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, df_row['stay_id'])
        dt.append(
            uid=100002,
            value=df_row['weight'],
            unit='kg',
            rate=None,
            rate_unit=None,
            category=df_row['weight_type'],
            specimen_id=None,
            starttime=df_row['starttime'],
            endtime=df_row['endtime'],
        )
        save_data_dsv(STRUCTURED_EXPORT_DIR,
                      df_row['stay_id'], pd.DataFrame(dt.data))

print("Added weight entries.")


In [None]:
id_mapping = {
    550862: 'albumin',
    550930: 'globulin',
    550976: 'total_protein',
    550868: 'aniongap',
    550882: 'bicarbonate',
    551006: 'bun',
    550893: 'calcium',
    550902: 'chloride',
    550912: 'creatinine',
    550931: 'glucose',
    550983: 'sodium',
    550971: 'potassium',
}
id_mapping = {j: i for i, j in id_mapping.items()}

# ['subject_id', 'hadm_id', 'charttime', 'specimen_id', 'albumin', 'globulin', 'total_protein', 'aniongap', 'bicarbonate', 'bun', 'calcium', 'chloride', 'creatinine', 'glucose', 'sodium', 'potassium']
df = get_database_table_as_dataframe(conn, query_schema_derived, 'chemistry')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['subject_id'] not in custom_icustays_dict:
        continue
    if df_row['hadm_id'] not in custom_icustays_dict[df_row['subject_id']]:
        continue

    stay_ids = custom_icustays_dict[df_row['subject_id']][df_row['hadm_id']]

    for stay_id in stay_ids:
        it.data = load_info_dsv(STRUCTURED_EXPORT_DIR, stay_id)
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, stay_id)

        icu_intime = it.data[13]
        icu_outtime = it.data[14]

        if icu_intime <= df_row['charttime'] <= icu_outtime:
            for col in df.columns.tolist():
                if col in id_mapping:
                    dt.append(
                        uid=id_mapping[col],
                        value=df_row[col],
                        unit=None,
                        rate=None,
                        rate_unit=None,
                        category='Chemistry',
                        specimen_id=df_row['specimen_id'],
                        starttime=df_row['charttime'],
                        endtime=None,
                    )
            save_data_dsv(STRUCTURED_EXPORT_DIR,
                          stay_id, pd.DataFrame(dt.data))

print("Added chemistry (lab) entries.")


In [None]:
id_mapping = {
    552028: 'specimen',
    550801: 'aado2',
    550802: 'baseexcess',
    550803: 'bicarbonate',
    550804: 'totalco2',
    550805: 'carboxyhemoglobin',
    550806: 'chloride',
    550808: 'calcium',
    550809: 'glucose',
    550810: 'hematocrit',
    550811: 'hemoglobin',
    550813: 'lactate',
    550814: 'methemoglobin',
    550816: 'fio2',
    550817: 'so2',
    550818: 'pco2',
    550820: 'ph',
    550821: 'po2',
    550822: 'potassium',
    550824: 'sodium',
    550825: 'temperature',
    223835: 'fio2_chartevents',
    500001: 'pao2fio2ratio',
    500002: 'aado2_calc',
    500003: 'specimen_pred',
    500004: 'specimen_prob',
}
id_mapping = {j: i for i, j in id_mapping.items()}

# ['subject_id', 'hadm_id', 'charttime', 'specimen', 'specimen_pred', 'specimen_prob', 'so2', 'po2', 'pco2', 'fio2_chartevents', 'fio2', 'aado2', 'aado2_calc', 'pao2fio2ratio', 'ph', 'baseexcess', 'bicarbonate', 'totalco2', 'hematocrit', 'hemoglobin', 'carboxyhemoglobin', 'methemoglobin', 'chloride', 'calcium', 'temperature', 'potassium', 'sodium', 'lactate', 'glucose']
df = get_database_table_as_dataframe(conn, query_schema_derived, 'bg')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['subject_id'] not in custom_icustays_dict:
        continue
    if df_row['hadm_id'] not in custom_icustays_dict[df_row['subject_id']]:
        continue

    stay_ids = custom_icustays_dict[df_row['subject_id']][df_row['hadm_id']]

    for stay_id in stay_ids:
        it.data = load_info_dsv(STRUCTURED_EXPORT_DIR, stay_id)
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, stay_id)

        icu_intime = it.data[13]
        icu_outtime = it.data[14]

        if icu_intime <= df_row['charttime'] <= icu_outtime:
            for col in df.columns.tolist():
                if col in id_mapping:
                    dt.append(
                        uid=id_mapping[col],
                        value=df_row[col],
                        unit=None,
                        rate=None,
                        rate_unit=None,
                        category='Blood Gas',
                        specimen_id=None,
                        starttime=df_row['charttime'],
                        endtime=None,
                    )
            save_data_dsv(STRUCTURED_EXPORT_DIR,
                          stay_id, pd.DataFrame(dt.data))

print("Added bg (lab) entries.")


In [None]:
# impute absolute count if percentage & WBC is available
id_mapping = {
    551146: 'basophils',
    552069: 'basophils_abs',
    551200: 'eosinophils',
    551254: 'monocytes',
    551256: 'neutrophils',
    552075: 'neutrophils_abs',
    551143: 'atypical_lymphocytes',
    551144: 'bands',
    552135: 'immature_granulocytes',
    551251: 'metamyelocytes',
    551257: 'nrbc',

    100003: 'wbc',  # TODO: May need to split due to category.
    100004: 'lymphocytes',
    100005: 'eosinophils_abs',
    100006: 'lymphocytes_abs',
    100007: 'monocytes_abs',
}
id_mapping = {j: i for i, j in id_mapping.items()}

# ['subject_id', 'hadm_id', 'charttime', 'specimen_id', 'wbc', 'basophils_abs', 'eosinophils_abs', 'lymphocytes_abs', 'monocytes_abs', 'neutrophils_abs', 'basophils', 'eosinophils', 'lymphocytes', 'monocytes', 'neutrophils', 'atypical_lymphocytes', 'bands', 'immature_granulocytes', 'metamyelocytes', 'nrbc']
df = get_database_table_as_dataframe(
    conn, query_schema_derived, 'blood_differential')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['subject_id'] not in custom_icustays_dict:
        continue
    if df_row['hadm_id'] not in custom_icustays_dict[df_row['subject_id']]:
        continue

    stay_ids = custom_icustays_dict[df_row['subject_id']][df_row['hadm_id']]

    for stay_id in stay_ids:
        it.data = load_info_dsv(STRUCTURED_EXPORT_DIR, stay_id)
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, stay_id)

        icu_intime = it.data[13]
        icu_outtime = it.data[14]

        if icu_intime <= df_row['charttime'] <= icu_outtime:
            for col in df.columns.tolist():
                if col in id_mapping:
                    dt.append(
                        uid=id_mapping[col],
                        value=df_row[col],
                        unit=None,
                        rate=None,
                        rate_unit=None,
                        category='Hematology, Chemistry' if col == 'wbc' else 'Hematology',
                        specimen_id=df_row['specimen_id'],
                        starttime=df_row['charttime'],
                        endtime=None,
                    )
            save_data_dsv(STRUCTURED_EXPORT_DIR,
                          stay_id, pd.DataFrame(dt.data))

print("Added blood_differential (lab) entries.")


In [None]:
id_mapping = {
    551002: 'troponin_i',
    551003: 'troponin_t',
    550911: 'ck_mb',
}
id_mapping = {j: i for i, j in id_mapping.items()}

# ['subject_id', 'hadm_id', 'charttime', 'specimen_id', 'troponin_i', 'troponin_t', 'ck_mb']
df = get_database_table_as_dataframe(
    conn, query_schema_derived, 'cardiac_marker')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['subject_id'] not in custom_icustays_dict:
        continue
    if df_row['hadm_id'] not in custom_icustays_dict[df_row['subject_id']]:
        continue

    stay_ids = custom_icustays_dict[df_row['subject_id']][df_row['hadm_id']]

    for stay_id in stay_ids:
        it.data = load_info_dsv(STRUCTURED_EXPORT_DIR, stay_id)
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, stay_id)

        icu_intime = it.data[13]
        icu_outtime = it.data[14]

        if icu_intime <= df_row['charttime'] <= icu_outtime:
            for col in df.columns.tolist():
                if col in id_mapping:
                    dt.append(
                        uid=id_mapping[col],
                        value=df_row[col],
                        unit=None,
                        rate=None,
                        rate_unit=None,
                        category='Chemistry',
                        specimen_id=df_row['specimen_id'],
                        starttime=df_row['charttime'],
                        endtime=None,
                    )
            save_data_dsv(STRUCTURED_EXPORT_DIR,
                          stay_id, pd.DataFrame(dt.data))

print("Added cardiac_marker (lab) entries.")


In [None]:
id_mapping = {
    551196: 'd_dimer',
    551214: 'fibrinogen',
    551297: 'thrombin',
    551237: 'inr',
    551274: 'pt',
    551275: 'ptt',
}
id_mapping = {j: i for i, j in id_mapping.items()}

# ['subject_id', 'hadm_id', 'charttime', 'specimen_id', 'd_dimer', 'fibrinogen', 'thrombin', 'inr', 'pt', 'ptt']
df = get_database_table_as_dataframe(conn, query_schema_derived, 'coagulation')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['subject_id'] not in custom_icustays_dict:
        continue
    if df_row['hadm_id'] not in custom_icustays_dict[df_row['subject_id']]:
        continue

    stay_ids = custom_icustays_dict[df_row['subject_id']][df_row['hadm_id']]

    for stay_id in stay_ids:
        it.data = load_info_dsv(STRUCTURED_EXPORT_DIR, stay_id)
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, stay_id)

        icu_intime = it.data[13]
        icu_outtime = it.data[14]

        if icu_intime <= df_row['charttime'] <= icu_outtime:
            for col in df.columns.tolist():
                if col in id_mapping:
                    dt.append(
                        uid=id_mapping[col],
                        value=df_row[col],
                        unit=None,
                        rate=None,
                        rate_unit=None,
                        category='Hematology',
                        specimen_id=df_row['specimen_id'],
                        starttime=df_row['charttime'],
                        endtime=None,
                    )
            save_data_dsv(STRUCTURED_EXPORT_DIR,
                          stay_id, pd.DataFrame(dt.data))

print("Added coagulation (lab) entries.")


In [None]:
id_mapping = {
    551221: 'hematocrit',
    551222: 'hemoglobin',
    551248: 'mch',
    551249: 'mchc',
    551250: 'mcv',
    551265: 'platelet',
    551279: 'rbc',
    551277: 'rdw',
    552159: 'rdwsd',
    # 551301: 'wbc', # present in blood_differential
}
id_mapping = {j: i for i, j in id_mapping.items()}

# ['subject_id', 'hadm_id', 'charttime', 'specimen_id', 'hematocrit', 'hemoglobin', 'mch', 'mchc', 'mcv', 'platelet', 'rbc', 'rdw', 'rdwsd', 'wbc']
df = get_database_table_as_dataframe(
    conn, query_schema_derived, 'complete_blood_count')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['subject_id'] not in custom_icustays_dict:
        continue
    if df_row['hadm_id'] not in custom_icustays_dict[df_row['subject_id']]:
        continue

    stay_ids = custom_icustays_dict[df_row['subject_id']][df_row['hadm_id']]

    for stay_id in stay_ids:
        it.data = load_info_dsv(STRUCTURED_EXPORT_DIR, stay_id)
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, stay_id)

        icu_intime = it.data[13]
        icu_outtime = it.data[14]

        if icu_intime <= df_row['charttime'] <= icu_outtime:
            for col in df.columns.tolist():
                if col in id_mapping:
                    dt.append(
                        uid=id_mapping[col],
                        value=df_row[col],
                        unit=None,
                        rate=None,
                        rate_unit=None,
                        category='Hematology',
                        specimen_id=df_row['specimen_id'],
                        starttime=df_row['charttime'],
                        endtime=None,
                    )
            save_data_dsv(STRUCTURED_EXPORT_DIR,
                          stay_id, pd.DataFrame(dt.data))

print("Added complete_blood_count (lab) entries.")


In [None]:
id_mapping = {
    550861: 'alt',
    550863: 'alp',
    550878: 'ast',
    550867: 'amylase',
    550885: 'bilirubin_total',
    550884: 'bilirubin_indirect',
    550883: 'bilirubin_direct',
    550910: 'ck_cpk',
    550911: 'ck_mb',
    550927: 'ggt',
    550954: 'ld_ldh',
}
id_mapping = {j: i for i, j in id_mapping.items()}

# ['subject_id', 'hadm_id', 'charttime', 'specimen_id', 'alt', 'alp', 'ast', 'amylase', 'bilirubin_total', 'bilirubin_direct', 'bilirubin_indirect', 'ck_cpk', 'ck_mb', 'ggt', 'ld_ldh']
df = get_database_table_as_dataframe(conn, query_schema_derived, 'enzyme')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['subject_id'] not in custom_icustays_dict:
        continue
    if df_row['hadm_id'] not in custom_icustays_dict[df_row['subject_id']]:
        continue

    stay_ids = custom_icustays_dict[df_row['subject_id']][df_row['hadm_id']]

    for stay_id in stay_ids:
        it.data = load_info_dsv(STRUCTURED_EXPORT_DIR, stay_id)
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, stay_id)

        icu_intime = it.data[13]
        icu_outtime = it.data[14]

        if icu_intime <= df_row['charttime'] <= icu_outtime:
            for col in df.columns.tolist():
                if col in id_mapping:
                    dt.append(
                        uid=id_mapping[col],
                        value=df_row[col],
                        unit=None,
                        rate=None,
                        rate_unit=None,
                        category='Chemistry',
                        specimen_id=df_row['specimen_id'],
                        starttime=df_row['charttime'],
                        endtime=None,
                    )
            save_data_dsv(STRUCTURED_EXPORT_DIR,
                          stay_id, pd.DataFrame(dt.data))

print("Added enzyme (lab) entries.")


In [None]:
id_mapping = {
    550889: 'crp',
}
id_mapping = {j: i for i, j in id_mapping.items()}

# ['subject_id', 'hadm_id', 'charttime', 'specimen_id', 'crp']
df = get_database_table_as_dataframe(
    conn, query_schema_derived, 'inflammation')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['subject_id'] not in custom_icustays_dict:
        continue
    if df_row['hadm_id'] not in custom_icustays_dict[df_row['subject_id']]:
        continue

    stay_ids = custom_icustays_dict[df_row['subject_id']][df_row['hadm_id']]

    for stay_id in stay_ids:
        it.data = load_info_dsv(STRUCTURED_EXPORT_DIR, stay_id)
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, stay_id)

        icu_intime = it.data[13]
        icu_outtime = it.data[14]

        if icu_intime <= df_row['charttime'] <= icu_outtime:
            for col in df.columns.tolist():
                if col in id_mapping:
                    dt.append(
                        uid=id_mapping[col],
                        value=df_row[col],
                        unit=None,
                        rate=None,
                        rate_unit=None,
                        category='Chemistry',
                        specimen_id=df_row['specimen_id'],
                        starttime=df_row['charttime'],
                        endtime=None,
                    )
            save_data_dsv(STRUCTURED_EXPORT_DIR,
                          stay_id, pd.DataFrame(dt.data))

print("Added inflammation (lab) entries.")


In [None]:
id_mapping = {
    227287: 'o2_flow_additional',

    100012: 'o2_flow',
    100008: 'o2_delivery_device_1',
    100009: 'o2_delivery_device_2',
    100010: 'o2_delivery_device_3',
    100011: 'o2_delivery_device_4',
}
id_mapping = {j: i for i, j in id_mapping.items()}

# ['subject_id', 'stay_id', 'charttime', 'o2_flow', 'o2_flow_additional', 'o2_delivery_device_1', 'o2_delivery_device_2', 'o2_delivery_device_3', 'o2_delivery_device_4']
df = get_database_table_as_dataframe(
    conn, query_schema_derived, 'oxygen_delivery')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['stay_id'] in custom_icustays_list:
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, df_row['stay_id'])

        for col in df.columns.tolist():
            if col in id_mapping:
                dt.append(
                    uid=id_mapping[col],
                    value=df_row[col],
                    unit=None if 'device' in col else 'L/min',
                    rate=None,
                    rate_unit=None,
                    category='Respiratory',
                    specimen_id=None,
                    starttime=df_row['charttime'],
                    endtime=None,
                )
        save_data_dsv(STRUCTURED_EXPORT_DIR, stay_id, pd.DataFrame(dt.data))

print("Added oxygen_delivery (chart) entries.")


In [None]:
id_mapping = {
    220048: 'heart_rhythm',
    224650: 'ectopy_type',
    224651: 'ectopy_frequency',
    226479: 'ectopy_type_secondary',
    226480: 'ectopy_frequency_secondary',
}
id_mapping = {j: i for i, j in id_mapping.items()}

# ['subject_id', 'charttime', 'heart_rhythm', 'ectopy_type', 'ectopy_frequency', 'ectopy_type_secondary', 'ectopy_frequency_secondary']
df = get_database_table_as_dataframe(conn, query_schema_derived, 'rhythm')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['subject_id'] not in custom_icustays_dict:
        continue

    for hadm_id, stay_ids in custom_icustays_dict[df_row['subject_id']]:

        for stay_id in stay_ids:
            it.data = load_info_dsv(STRUCTURED_EXPORT_DIR, stay_id)
            dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, stay_id)

            icu_intime = it.data[13]
            icu_outtime = it.data[14]

            if icu_intime <= df_row['charttime'] <= icu_outtime:
                for col in df.columns.tolist():
                    if col in id_mapping:
                        dt.append(
                            uid=id_mapping[col],
                            value=df_row[col],
                            unit=None,
                            rate=None,
                            rate_unit=None,
                            category='Routine Vital Signs',
                            specimen_id=None,
                            starttime=df_row['charttime'],
                            endtime=None,
                        )
                save_data_dsv(STRUCTURED_EXPORT_DIR,
                              stay_id, pd.DataFrame(dt.data))

print("Added rhythm (chart) entries.")


In [None]:
# ['stay_id', 'charttime', 'urineoutput']
df = get_database_table_as_dataframe(
    conn, query_schema_derived, 'urine_output')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['stay_id'] in custom_icustays_list:
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, df_row['stay_id'])
        dt.append(
            uid=100013,
            value=df_row['urineoutput'],
            unit='mL',
            rate=None,
            rate_unit=None,
            category='Output',
            specimen_id=None,
            starttime=df_row['charttime'],
            endtime=None,
        )
        save_data_dsv(STRUCTURED_EXPORT_DIR, stay_id, pd.DataFrame(dt.data))

print("Added urine_output (chart) entries.")


In [None]:
# -- attempt to calculate urine output per hour
# -- rate/hour is the interpretable measure of kidney function
# -- though it is difficult to estimate from aperiodic point measures
# -- first we get the earliest heart rate documented for the stay
id_mapping = {
    # 100013: 'uo', present in previous table.
    100014: 'urineoutput_6hr',  # output within 6hr (floor)
    100015: 'urineoutput_12hr',
    100016: 'urineoutput_24hr',
    100017: 'uo_mlkghr_6hr',  # (urineoutput_6hr/weight/uo_tm_6hr)
    100018: 'uo_mlkghr_12hr',
    100019: 'uo_mlkghr_24hr',
    100020: 'uo_tm_6hr',  # time from last uo measurement within 6hr (floor)
    100021: 'uo_tm_12hr',
    100022: 'uo_tm_24hr',
}
id_mapping = {j: i for i, j in id_mapping.items()}

unit_mapping = {
    100014: 'mL',  # output within 6hr (floor)
    100015: 'mL',
    100016: 'mL',
    100017: 'mL/kg/hr',  # (urineoutput_6hr/weight/uo_tm_6hr)
    100018: 'mL/kg/hr',
    100019: 'mL/kg/hr',
    100020: 'hr',  # time from last uo measurement within 6hr (floor)
    100021: 'hr',
    100022: 'hr',
}

# ['stay_id', 'charttime', 'weight', 'uo', 'urineoutput_6hr', 'urineoutput_12hr', 'urineoutput_24hr', 'uo_mlkghr_6hr', 'uo_mlkghr_12hr', 'uo_mlkghr_24hr', 'uo_tm_6hr', 'uo_tm_12hr', 'uo_tm_24hr']
df = get_database_table_as_dataframe(
    conn, query_schema_derived, 'urine_output_rate')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['stay_id'] in custom_icustays_list:
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, df_row['stay_id'])
        for col in df.columns.tolist():
            if col in id_mapping:
                dt.append(
                    uid=id_mapping[col],
                    value=df_row[col],
                    unit=unit_mapping[id_mapping[col]],
                    rate=None,
                    rate_unit=None,
                    category=None,
                    specimen_id=None,
                    starttime=df_row['charttime'],
                    endtime=None,
                )
        save_data_dsv(STRUCTURED_EXPORT_DIR, stay_id, pd.DataFrame(dt.data))

print("Added urine_output_rate (derived) entries.")


In [None]:
id_mapping = {
    224688: 'respiratory_rate_set',
    224690: 'respiratory_rate_total',
    224689: 'respiratory_rate_spontaneous',
    224687: 'minute_volume',
    224684: 'tidal_volume_set',
    224685: 'tidal_volume_observed',
    224686: 'tidal_volume_spontaneous',
    224696: 'plateau_pressure',
    100023: 'peep',
    # 223835: 'fio2',  # same as fio2_chartevents
    223849: 'ventilator_mode',
    229314: 'ventilator_mode_hamilton',
    223848: 'ventilator_type',
}
id_mapping = {j: i for i, j in id_mapping.items()}

# ['subject_id', 'stay_id', 'charttime', 'respiratory_rate_set', 'respiratory_rate_total', 'respiratory_rate_spontaneous', 'minute_volume', 'tidal_volume_set', 'tidal_volume_observed', 'tidal_volume_spontaneous', 'plateau_pressure', 'peep', 'fio2', 'ventilator_mode', 'ventilator_mode_hamilton', 'ventilator_type']
df = get_database_table_as_dataframe(
    conn, query_schema_derived, 'ventilator_setting')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['stay_id'] in custom_icustays_list:
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, df_row['stay_id'])

        for col in df.columns.tolist():
            if col in id_mapping:
                dt.append(
                    uid=id_mapping[col],
                    value=df_row[col],
                    unit=None,
                    rate=None,
                    rate_unit=None,
                    category='Respiratory',
                    specimen_id=None,
                    starttime=df_row['charttime'],
                    endtime=None,
                )
        save_data_dsv(STRUCTURED_EXPORT_DIR, stay_id, pd.DataFrame(dt.data))

print("Added ventilator_setting (chart) entries.")


In [None]:
id_mapping = {
    220045: 'heart_rate',
    100024: 'sbp',
    100025: 'dbp',
    100026: 'mbp',
    220179: 'sbp_ni',
    220180: 'dbp_ni',
    220181: 'mbp_ni',
    100027: 'resp_rate',
    100028: 'temperature',
    224642: 'temperature_site',
    220277: 'spo2',
    100029: 'glucose_chartevents',
}
id_mapping = {j: i for i, j in id_mapping.items()}

cat_mapping = {
    220045: 'Routine Vital Signs',
    100024: 'Routine Vital Signs',
    100025: 'Routine Vital Signs',
    100026: 'Routine Vital Signs',
    220179: 'Routine Vital Signs',
    220180: 'Routine Vital Signs',
    220181: 'Routine Vital Signs',
    100027: 'Respiratory',
    100028: 'Routine Vital Signs',
    224642: 'Routine Vital Signs',
    220277: 'Respiratory',
    100029: 'Labs',
}

unit_mapping = {
    220045: 'bpm',
    100024: 'mmHg',
    100025: 'mmHg',
    100026: 'mmHg',
    220179: 'mmHg',
    220180: 'mmHg',
    220181: 'mmHg',
    100027: 'insp/min',
    100028: 'degC',
    224642: '',
    220277: '%',
    100029: '',
}

# ['subject_id', 'stay_id', 'charttime', 'heart_rate', 'sbp', 'dbp', 'mbp', 'sbp_ni', 'dbp_ni', 'mbp_ni', 'resp_rate', 'temperature', 'temperature_site', 'spo2', 'glucose']
df = get_database_table_as_dataframe(conn, query_schema_derived, 'vitalsign')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['stay_id'] in custom_icustays_list:
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, df_row['stay_id'])

        for col in df.columns.tolist():
            if col in id_mapping:
                dt.append(
                    uid=id_mapping[col],
                    value=df_row[col],
                    unit=unit_mapping[id_mapping[col]],
                    rate=None,
                    rate_unit=None,
                    category=cat_mapping[id_mapping[col]],
                    specimen_id=None,
                    starttime=df_row['charttime'],
                    endtime=None,
                )
        save_data_dsv(STRUCTURED_EXPORT_DIR, stay_id, pd.DataFrame(dt.data))

print("Added vitalsign (chart) entries.")


In [None]:
# ['subject_id', 'hadm_id', 'stay_id', 'antibiotic', 'route', 'starttime', 'stoptime']
df = get_database_table_as_dataframe(conn, query_schema_derived, 'antibiotic')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['stay_id'] in custom_icustays_list:
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, df_row['stay_id'])
        dt.append(
            uid=100030,
            value=df_row['antibiotic'],
            unit=None,
            rate=None,
            rate_unit=None,
            category=df_row['route'],
            specimen_id=None,
            starttime=df_row['starttime'],
            endtime=df_row['stoptime'],
        )
        save_data_dsv(STRUCTURED_EXPORT_DIR, stay_id, pd.DataFrame(dt.data))

print("Added antibiotic (hosp.prescriptions) entries.")


In [None]:
med_ids = [
    220995,  # Sodium Bicarbonate 8.4%
    # 221794, # Furosemide (Lasix) **
    # 228340, # Furosemide (Lasix) 250/50 **
    100037,  # Furosemide (Lasix)
    221986,  # Milrinone
    229068,  # Protamine sulfate
    229639,  # Bumetanide (Bumex)

    221653,  # Dobutamine
    221662,  # Dopamine
    # 221289, # Epinephrine
    # 229617, # Epinephrine. ~145 entries only
    100036,  # Epinephrine
    221906,  # Norepinephrine
    221749,  # Phenylephrine
    222315,  # Vasopressin
]

df = get_database_table_as_dataframe(
    conn, query_schema_icu, 'inputevents', _chunk_size=1000)

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['icustay_id'] in custom_icustays_list:
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, df_row['icustay_id'])
        dt.append(
            uid=df_row['itemid'],
            value=df_row['amount'],
            unit=df_row['amountuom'],
            rate=df_row['rate'],
            rate_unit=df_row['rateuom'],
            category='Medication',
            specimen_id=None,
            starttime=df_row['starttime'],
            endtime=df_row['endtime'],
        )
        save_data_dsv(STRUCTURED_EXPORT_DIR, stay_id, pd.DataFrame(dt.data))

print("Added (medication) entries.")


In [None]:
id_mapping = {
    100031: 'creat_low_past_48hr',
    100032: 'creat_low_past_7day',
    100033: 'aki_stage_creat',
    100034: 'aki_stage_uo',
    100035: 'aki_stage',
}
id_mapping = {j: i for i, j in id_mapping.items()}

# ['subject_id', 'hadm_id', 'stay_id', 'charttime', 'creat_low_past_7day', 'creat_low_past_48hr', 'creat', 'aki_stage_creat', 'uo_rt_6hr', 'uo_rt_12hr', 'uo_rt_24hr', 'aki_stage_uo', 'aki_stage']
df = get_database_table_as_dataframe(conn, query_schema_derived, 'kdigo_stages')

it = InfoTable()
dt = DataTable()
for df_i in tqdm(df.iterrows(), total=len(df)):
    df_row = df_i[1]

    if df_row['stay_id'] in custom_icustays_list:
        dt.data = load_data_dsv(STRUCTURED_EXPORT_DIR, df_row['stay_id'])

        for col in df.columns.tolist()[4:]:
            if col in id_mapping:
                dt.append(
                    uid=id_mapping[col],
                    value=df_row[col],
                    unit=None,
                    rate=None,
                    rate_unit=None,
                    category='KDIGO',
                    specimen_id=None,
                    starttime=df_row['charttime'],
                    endtime=None,
                )
        save_data_dsv(STRUCTURED_EXPORT_DIR, stay_id, pd.DataFrame(dt.data))

print("Added kdigo_stages (derived) entries.")
