In [1]:
import os
import sys
from pathlib import Path

import pandas as pd
import plotly.express as px

sys.path.insert(0, str(Path("").resolve().parents[1]))
from utils.db_manager import MimicDBManager



In [2]:
width = 1200
height = 600
img_path = "/app/ml_model/EDA/images"
Path(img_path).mkdir(parents=True, exist_ok=True)
plot_fig = False
pd.set_option("display.max_rows", 100)

In [3]:
db_manager = MimicDBManager(port=5432)
db_manager.retrieve_table_names()

[2024-06-13 04:15:37,299 - db_manager.py:51 - generate_connection() - INFO Creating connection to PostgreSQL


['mimiciv_hosp.diagnoses_icd',
 'mimiciv_hosp.labevents',
 'mimiciv_hosp.d_icd_diagnoses',
 'mimiciv_hosp.d_icd_procedures',
 'mimiciv_hosp.emar',
 'mimiciv_hosp.hcpcsevents',
 'mimiciv_hosp.d_labitems',
 'mimiciv_hosp.pharmacy',
 'mimiciv_hosp.omr',
 'mimiciv_hosp.poe_detail',
 'mimiciv_hosp.poe',
 'mimiciv_hosp.procedures_icd',
 'mimiciv_hosp.provider',
 'mimiciv_hosp.drgcodes',
 'mimiciv_hosp.emar_detail',
 'mimiciv_hosp.d_hcpcs',
 'mimiciv_hosp.microbiologyevents',
 'mimiciv_hosp.prescriptions',
 'mimiciv_hosp.services',
 'mimiciv_hosp.transfers',
 'mimiciv_icu.caregiver',
 'mimiciv_icu.inputevents',
 'mimiciv_icu.ingredientevents',
 'mimiciv_icu.chartevents',
 'mimiciv_icu.icustays',
 'mimiciv_icu.d_items',
 'mimiciv_icu.datetimeevents',
 'mimiciv_icu.outputevents',
 'mimiciv_icu.procedureevents',
 'mimiciv_hosp.patients',
 'mimiciv_hosp.admissions',
 'mimiciv_derived.icustay_times',
 'mimiciv_derived.icustay_hourly',
 'mimiciv_derived.weight_durations',
 'mimiciv_derived.urine_ou

In [4]:
tables = (
    pd.DataFrame.from_dict(
        {table: db_manager.retrieve_column_types(table) for table in db_manager.retrieve_table_names()}, orient="index"
    )
    .fillna("-")
    .sort_index()
    .reset_index(names="full_table_name")
)
tables[["parent_table", "table_name"]] = tables.full_table_name.str.split(".", expand=True)

In [7]:
[table for table in tables.table_name.to_list() if "input" in table]

['inputevents']

In [5]:
table = "inputevents"
table_of_interest = tables[tables.table_name == table]
table_of_interest.loc[:, ~(table_of_interest == "-").all()].columns

Index(['full_table_name', 'subject_id', 'hadm_id', 'itemid', 'storetime',
       'starttime', 'caregiver_id', 'originalrate', 'stay_id', 'endtime',
       'amount', 'orderid', 'linkorderid', 'patientweight', 'totalamount',
       'isopenbag', 'continueinnextdept', 'originalamount', 'rate',
       'amountuom', 'ordercategorydescription', 'rateuom', 'totalamountuom',
       'statusdescription', 'ordercategoryname', 'secondaryordercategoryname',
       'ordercomponenttypedescription', 'parent_table', 'table_name'],
      dtype='object')

In [6]:
[table for table in tables.table_name.to_list() if "vent" in table]

['ventilation',
 'ventilator_setting',
 'hcpcsevents',
 'labevents',
 'microbiologyevents',
 'chartevents',
 'datetimeevents',
 'ingredientevents',
 'inputevents',
 'outputevents',
 'procedureevents']

In [11]:
table = "ventilation"
table_of_interest = tables[tables.table_name == table]
table_of_interest.loc[:, ~(table_of_interest == "-").all()].columns

Index(['full_table_name', 'starttime', 'stay_id', 'endtime',
       'ventilation_status', 'parent_table', 'table_name'],
      dtype='object')

In [10]:
[table for table in tables.table_name.to_list() if "kdigo" in table]

['kdigo_creatinine', 'kdigo_stages', 'kdigo_uo']

In [13]:
table = "kdigo_stages"
table_of_interest = tables[tables.table_name == table]
table_of_interest.loc[:, ~(table_of_interest == "-").all()]

Unnamed: 0,full_table_name,subject_id,hadm_id,charttime,stay_id,uo_rt_6hr,uo_rt_12hr,uo_rt_24hr,creat,creat_low_past_48hr,creat_low_past_7day,aki_stage_creat,aki_stage_uo,aki_stage_crrt,aki_stage,aki_stage_smoothed,parent_table,table_name
36,mimiciv_derived.kdigo_stages,integer,integer,timestamp without time zone,integer,numeric,numeric,numeric,double precision,double precision,double precision,integer,integer,integer,integer,integer,mimiciv_derived,kdigo_stages


In [14]:
[table for table in tables.table_name.to_list() if "vaso" in table]

['vasoactive_agent', 'vasopressin']

In [16]:
table = "vasoactive_agent"
table_of_interest = tables[tables.table_name == table]
table_of_interest.loc[:, ~(table_of_interest == "-").all()].columns

Index(['full_table_name', 'starttime', 'stay_id', 'endtime', 'dopamine',
       'epinephrine', 'norepinephrine', 'phenylephrine', 'vasopressin',
       'dobutamine', 'milrinone', 'parent_table', 'table_name'],
      dtype='object')

In [17]:
table = "vasopressin"
table_of_interest = tables[tables.table_name == table]
table_of_interest.loc[:, ~(table_of_interest == "-").all()].columns

Index(['full_table_name', 'starttime', 'stay_id', 'endtime', 'linkorderid',
       'vaso_rate', 'vaso_amount', 'parent_table', 'table_name'],
      dtype='object')