# Investigate the concepts available in the database (mimic-iv)

The concepts are available from the mimic-code github repo.

In [1]:
import getpass
import json
import math
import os
import psycopg2
import pandas as pd
import time

import matplotlib.pyplot as plt
%matplotlib inline

import numpy as np

from configobj import ConfigObj
from multiprocessing import Pool, RLock
from tqdm import tqdm
from typing import Tuple

from projects.utils import *
from projects.common import *


In [2]:
def save_dsv(path: str, data: pd.DataFrame):
    save_dir, _ = os.path.split(path)
    os.makedirs(save_dir, exist_ok=True)
    data.to_csv(path, na_rep='', sep='$', index=False)


def create_patient_info(dtype=int):
    return {
        'UID': np.array([], dtype=int),
        'Value': np.array([], dtype=dtype),
    }


def sort_patient_table(x: dict):
    sorted_ids = np.argsort(x['UID'])
    for k in x.keys():
        x[k] = x[k][sorted_ids]


In [3]:
db_dir = os.path.abspath('') + "/../../../db"

(query_schema_core,
 query_schema_hosp,
 query_schema_icu,
 query_schema_derived,
 conn) = connect_to_database(db_dir)


Database: mimiciv
Username: mimiciv
>>>>> Connected to DB <<<<<


Table for icustays:  
['subject_id', 'hadm_id', 'stay_id', 'first_careunit', 'last_careunit', 'intime', 'outtime', 'los'] 

Table for transfers:  
['subject_id', 'hadm_id', 'transfer_id', 'eventtype', 'careunit', 'intime', 'outtime'] 

Table for patients:  
['subject_id', 'gender', 'anchor_age', 'anchor_year', 'anchor_year_group', 'dod']

Table for admissions:  
['subject_id', 'hadm_id', 'admittime', 'dischtime', 'deathtime', 'admission_type', 'admission_location', 'discharge_location', 'insurance', 'language', 'marital_status', 'ethnicity', 'edregtime', 'edouttime', 'hospital_expire_flag']

In [5]:
patients_df = get_database_table_as_dataframe(conn, query_schema_core, 'patients')
admissions_df = get_database_table_as_dataframe(conn, query_schema_core, 'admissions')
transfers_df = get_database_table_as_dataframe(conn, query_schema_core, 'transfers').sort_values(by=['intime', 'outtime'])
icustays_df = get_database_table_as_dataframe(conn, query_schema_icu, 'icustays').sort_values(by=['intime', 'outtime'])

assert len(patients_df.to_numpy()[:, 0]) == len(np.unique(patients_df.to_numpy()[:, 0])) 
assert len(admissions_df.to_numpy()[:, 1]) == len(np.unique(admissions_df.to_numpy()[:, 1])) 
assert len(icustays_df.to_numpy()[:, 2]) == len(np.unique(icustays_df.to_numpy()[:, 2])) 

patients_list = patients_df['subject_id'].tolist()
admissions_list = admissions_df['hadm_id'].tolist()

Getting patients data
Number of entries for patients : 382278
Column names : ['subject_id', 'gender', 'anchor_age', 'anchor_year', 'anchor_year_group', 'dod']
Getting admissions data
Number of entries for admissions : 523740
Column names : ['subject_id', 'hadm_id', 'admittime', 'dischtime', 'deathtime', 'admission_type', 'admission_location', 'discharge_location', 'insurance', 'language', 'marital_status', 'ethnicity', 'edregtime', 'edouttime', 'hospital_expire_flag']
Getting transfers data
Number of entries for transfers : 2189535
Column names : ['subject_id', 'hadm_id', 'transfer_id', 'eventtype', 'careunit', 'intime', 'outtime']
Getting icustays data
Number of entries for icustays : 76540
Column names : ['subject_id', 'hadm_id', 'stay_id', 'first_careunit', 'last_careunit', 'intime', 'outtime', 'los']


In [6]:
_CAREUNITS = ['Coronary Care Unit (CCU)',
              'Cardiac Vascular Intensive Care Unit (CVICU)']
custom_icustays_list = [i[1]['stay_id']
                        for i in icustays_df.iterrows()
                        if i[1]['first_careunit'] in _CAREUNITS or i[1]['last_careunit'] in _CAREUNITS]


# Concept 1 : icustay_detail


In [7]:
_table = 'icustay_detail'

df = get_database_table_as_dataframe(conn, query_schema_derived, _table)


Getting icustay_detail data
Number of entries for icustay_detail : 76540
Column names : ['subject_id', 'hadm_id', 'stay_id', 'gender', 'dod', 'admittime', 'dischtime', 'los_hospital', 'admission_age', 'ethnicity', 'hospital_expire_flag', 'hospstay_seq', 'first_hosp_stay', 'icu_intime', 'icu_outtime', 'los_icu', 'icustay_seq', 'first_icu_stay']


In [12]:
# uid_info = {idx: name
#             for idx, name in enumerate(df.columns.to_list() +
#                                        ['first_careunit', 'last_careunit'])}
# uid_info_path = os.path.abspath('') + "/../../../" + UID_INFO_PATH
# os.remove(uid_info_path)
# with open(uid_info_path, 'w+') as f:
#     json.dump(uid_info, f)

# for df_i in tqdm(df.iterrows(), total=len(df)):
#     df_row = df_i[1]

#     if df_row['stay_id'] in custom_icustays_list:

#         info_dict = create_patient_info()

#         c = 0
#         for i, j in zip(uid_info, df_row):
#             info_dict['UID'] = np.append(info_dict['UID'], i)
#             info_dict['Value'] = np.append(info_dict['Value'], j)
#             c = i

#         c += 1
#         info_dict['UID'] = np.append(info_dict['UID'], c)
#         j = icustays_df.loc[icustays_df['stay_id'] ==
#                             df_row['stay_id']]['first_careunit'].item()
#         info_dict['Value'] = np.append(info_dict['Value'], j)

#         c += 1
#         info_dict['UID'] = np.append(info_dict['UID'], c)
#         j = icustays_df.loc[icustays_df['stay_id'] ==
#                             df_row['stay_id']]['last_careunit'].item()
#         info_dict['Value'] = np.append(info_dict['Value'], j)

#         save_path = os.path.join(STRUCTURED_EXPORT_DIR,
#                                  'info_'+str(df_i[1]['stay_id'])+'.dsv')
#         save_dsv(save_path, pd.DataFrame(info_dict))


100%|██████████| 76540/76540 [01:34<00:00, 811.80it/s]
