# Investigate the concepts available in the database (mimic-iv)

The concepts are available from the mimic-code github repo.

In [2]:
import getpass
import json
import math
import os
import psycopg2
import pandas as pd
import time

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np

from configobj import ConfigObj
from multiprocessing import Pool, RLock
from tqdm import tqdm
from typing import Tuple

from projects.utils import *
from projects.common import *


In [5]:
db_dir = os.path.abspath('') + "/../../../db"

(query_schema_core,
 query_schema_hosp,
 query_schema_icu,
 query_schema_derived,
 conn) = connect_to_database(db_dir)


Database: mimiciv
Username: mimiciv
>>>>> Connected to DB <<<<<


Table for icustays:  
['subject_id', 'hadm_id', 'stay_id', 'first_careunit', 'last_careunit', 'intime', 'outtime', 'los'] 

Table for transfers:  
['subject_id', 'hadm_id', 'transfer_id', 'eventtype', 'careunit', 'intime', 'outtime'] 

Table for patients:  
['subject_id', 'gender', 'anchor_age', 'anchor_year', 'anchor_year_group', 'dod']

Table for admissions:  
['subject_id', 'hadm_id', 'admittime', 'dischtime', 'deathtime', 'admission_type', 'admission_location', 'discharge_location', 'insurance', 'language', 'marital_status', 'ethnicity', 'edregtime', 'edouttime', 'hospital_expire_flag']

In [None]:
patients_df = get_database_table_as_dataframe(conn, query_schema_core, 'patients')
admissions_df = get_database_table_as_dataframe(conn, query_schema_core, 'admissions')
transfers_df = get_database_table_as_dataframe(conn, query_schema_core, 'transfers').sort_values(by=['intime', 'outtime'])
icustays_df = get_database_table_as_dataframe(conn, query_schema_icu, 'icustays').sort_values(by=['intime', 'outtime'])

assert len(patients_df.to_numpy()[:, 0]) == len(np.unique(patients_df.to_numpy()[:, 0])) 
assert len(admissions_df.to_numpy()[:, 1]) == len(np.unique(admissions_df.to_numpy()[:, 1])) 
assert len(icustays_df.to_numpy()[:, 2]) == len(np.unique(icustays_df.to_numpy()[:, 2])) 

patients_list = patients_df['subject_id'].tolist()
admissions_list = admissions_df['hadm_id'].tolist()

# Concept 2 : cormobity - charlson

In [None]:
_table = 'charlson'

df = get_database_table_as_dataframe(conn, query_schema_derived, _table)
df

# Concept 3 : weights

From chartevents : 224639, 226512


In [None]:
_table = 'weight_durations'

df = get_database_table_as_dataframe(conn, query_schema_derived, _table)
df

# Concept 4 : sepsis

In [None]:
_table = 'suspicion_of_infection'

df = get_database_table_as_dataframe(conn, query_schema_derived, _table)
df.dropna()

In [None]:
_table = 'sepsis3'

df = get_database_table_as_dataframe(conn, query_schema_derived, _table)
df

# Concept 5 : score

In [None]:
_table = 'sofa'

df = get_database_table_as_dataframe(conn, query_schema_derived, _table)
df

# Concept 6 : measurement

In [4]:
db_dir = os.path.abspath('') + "/../../../db"

(query_schema_core,
 query_schema_hosp,
 query_schema_icu,
 query_schema_derived,
 conn) = connect_to_database(db_dir)

_table = 'bg'

df = get_database_table_as_dataframe(conn, query_schema_derived, _table)
df

Database: mimiciv
Username: mimiciv
>>>>> Connected to DB <<<<<
Getting bg data
Number of entries for bg : 561212
Column names : ['subject_id', 'hadm_id', 'charttime', 'specimen', 'specimen_pred', 'specimen_prob', 'so2', 'po2', 'pco2', 'fio2_chartevents', 'fio2', 'aado2', 'aado2_calc', 'pao2fio2ratio', 'ph', 'baseexcess', 'bicarbonate', 'totalco2', 'hematocrit', 'hemoglobin', 'carboxyhemoglobin', 'methemoglobin', 'chloride', 'calcium', 'temperature', 'potassium', 'sodium', 'lactate', 'glucose']



Unnamed: 0,subject_id,hadm_id,charttime,specimen,specimen_pred,specimen_prob,so2,po2,pco2,fio2_chartevents,...,hemoglobin,carboxyhemoglobin,methemoglobin,chloride,calcium,temperature,potassium,sodium,lactate,glucose
0,10000935,25849114.0,2187-10-22 15:40:00,,ART.,0.945735,,86.0,33.0,,...,,,,,,,,,2.8,
1,10000980,20897796.0,2193-08-14 21:41:00,,,0.562355,,30.0,40.0,,...,,,,,,,,,,
2,10001884,29678536.0,2130-10-10 09:31:00,,ART.,0.927595,,73.0,58.0,,...,,,,,,,,,,
3,10001884,,2130-10-19 13:58:00,,,0.000818,35.0,23.0,56.0,,...,,,,,,,,,1.6,
4,10001884,28664981.0,2130-11-29 00:15:00,,ART.,0.983265,,103.0,49.0,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
561207,19999840,21033226.0,2164-09-17 13:34:00,,,0.000114,25.0,25.0,57.0,40.0,...,8.8,,,102.0,1.15,,4.2,134.0,4.0,275.0
561208,19999840,21033226.0,2164-09-17 13:39:00,,,0.000156,25.0,23.0,71.0,40.0,...,,,,102.0,1.10,,4.1,141.0,4.9,369.0
561209,19999987,,2145-11-02 20:27:00,,ART.,1.000000,,439.0,51.0,,...,,,,,,,,,,
561210,19999987,23865745.0,2145-11-03 05:28:00,,ART.,0.984502,,114.0,44.0,40.0,...,,,,,,,,,,


# Misc Concepts

In [23]:
from projects.utils import *

db_dir = os.path.abspath('') + "/../../../db"

(query_schema_core,
 query_schema_hosp,
 query_schema_icu,
 query_schema_derived,
 conn) = connect_to_database(db_dir)

_table = 'inputevents'

print(get_database_table_column_name(conn, _table))


Database: mimiciv
Username: mimiciv
>>>>> Connected to DB <<<<<
['subject_id', 'hadm_id', 'stay_id', 'starttime', 'endtime', 'storetime', 'itemid', 'amount', 'amountuom', 'rate', 'rateuom', 'orderid', 'linkorderid', 'ordercategoryname', 'secondaryordercategoryname', 'ordercomponenttypedescription', 'ordercategorydescription', 'patientweight', 'totalamount', 'totalamountuom', 'isopenbag', 'continueinnextdept', 'cancelreason', 'statusdescription', 'originalamount', 'originalrate']


In [28]:
db_dir = os.path.abspath('') + "/../../../db"

(query_schema_core,
 query_schema_hosp,
 query_schema_icu,
 query_schema_derived,
 conn) = connect_to_database(db_dir)

_table = 'inputevents'

_df = get_database_table_as_dataframe(
    conn, query_schema_icu, _table, 
    'itemid, amount, amountuom, rate, rateuom, totalamount, totalamountuom, originalamount, originalrate',
     _chunk_size=1000)

def _concat_df_chunk(df_iter):
    dfs_ = []
    for df in tqdm(df_iter):
        dfs_ += [df[['itemid', 'amount', 'amountuom', 'rate', 'rateuom', 'totalamount', 'totalamountuom', 'originalamount', 'originalrate']].to_numpy()]
    return np.concatenate(dfs_, axis=0)

inputevents_id = _concat_df_chunk(_df)

Database: mimiciv
Username: mimiciv
>>>>> Connected to DB <<<<<
Getting inputevents data


9461it [00:33, 281.81it/s]


Number of entries for inputevents : 9460658
Column names : ['itemid', 'amount', 'amountuom', 'rate', 'rateuom', 'totalamount', 'totalamountuom', 'originalamount', 'originalrate']



9461it [01:10, 134.97it/s]


In [34]:
df_input = pd.DataFrame(inputevents_id, columns=['itemid', 'amount', 'amountuom', 'rate', 'rateuom', 'totalamount', 'totalamountuom', 'originalamount', 'originalrate'])

In [53]:
df_input[(df_input['itemid']==221794) & (df_input['originalamount']>100) & (df_input['totalamount']>100)]

Unnamed: 0,itemid,amount,amountuom,rate,rateuom,totalamount,totalamountuom,originalamount,originalrate
3854466,221794,28.0,mg,9.999999,mg/hour,500.0,ml,500.0,10.000001
6798249,221794,24.009339,mg,10.003891,mg/hour,250.0,ml,214.25,10.000001
6798344,221794,187.999998,mg,5.0,mg/hour,250.0,ml,250.0,5.0
6798412,221794,35.750002,mg,15.0,mg/hour,250.0,ml,250.0,15.000001
8324633,221794,670.799998,mg,31.993639,mg/hour,250.0,ml,670.799988,32.000004
8325291,221794,329.199992,mg,23.999998,mg/hour,250.0,ml,1000.0,24.000002
8500782,221794,8.583333,mg,5.0,mg/hour,250.0,ml,250.0,5.0
8500786,221794,47.317,mg,9.99655,mg/hour,250.0,ml,241.416672,10.000001


In [6]:
db_dir = os.path.abspath('') + "/../../../db"

(query_schema_core,
 query_schema_hosp,
 query_schema_icu,
 query_schema_derived,
 conn) = connect_to_database(db_dir)

_table = 'meld'

_df = get_database_table_as_dataframe(conn, query_schema_derived, _table)
_df


Database: mimiciv
Username: mimiciv
>>>>> Connected to DB <<<<<
Getting meld data
Number of entries for meld : 76540
Column names : ['subject_id', 'hadm_id', 'stay_id', 'meld_initial', 'meld', 'rrt', 'creatinine_max', 'bilirubin_total_max', 'inr_max', 'sodium_min']



Unnamed: 0,subject_id,hadm_id,stay_id,meld_initial,meld,rrt,creatinine_max,bilirubin_total_max,inr_max,sodium_min
0,17182515,29779853,30040770,6.0,6.000,,0.9,,1.0,139.0
1,11641877,28355198,30040824,6.0,6.000,,0.5,,,136.0
2,13103745,26794760,30042091,17.0,17.000,,3.0,0.6,1.0,141.0
3,19710703,27874064,30042895,8.0,8.000,,1.1,,1.1,143.0
4,16010957,26173268,30043326,11.0,11.000,,0.8,,1.5,142.0
...,...,...,...,...,...,...,...,...,...,...
76535,15368898,27299174,39990887,8.0,8.000,,0.7,,1.2,138.0
76536,15721773,28911582,39991872,18.0,18.000,,1.3,4.4,1.4,139.0
76537,12275003,22562812,39992247,16.0,19.168,,2.2,,1.2,133.0
76538,17577670,24221219,39993265,10.0,10.000,,0.9,1.1,1.3,139.0
