In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys, os, pickle, utils

from tqdm.notebook import tqdm
from datetime import timedelta
#from utils import baseline_SCr

if os.getcwd()[-4:] == "code":
    os.chdir('../')

icu  = './data/mimic-iv-2.2-parquet/icu/'
hosp = './data/mimic-iv-2.2-parquet/hosp/'
ed   = './data/mimic-iv-2.2-parquet/ed/'

In [2]:
hosp_list = ['labevents', 'd_labitems', 'patients', 'admissions', 
             'diagnoses_icd', 'microbiologyevents', 'prescriptions', 'omr']
for i in hosp_list:
    globals()['{}'.format(i)] = pd.read_parquet(hosp+i+'.parquet')

In [3]:
icu_list = ['chartevents', 'd_items', 'icustays', 'inputevents', 'outputevents', 'procedureevents']
for i in icu_list:
    globals()['{}'.format(i)] = pd.read_parquet(icu+i+'.parquet')

In [4]:
vitalsign = pd.read_parquet(ed+'vitalsign.parquet')

In [5]:
labvalues = pd.read_csv('./data/origin/labvalues/labvalues.csv')
vitals = pd.read_csv('./data/origin/vitals/vitals.csv')
comorbidities = pd.read_csv('./data/origin/demographic/comorbidities.csv')

# Demographic

## Gender

In [6]:
if not os.path.isfile('./data/origin/demographic/patients_gender.parquet'):
    patients_gender = utils.cal_gender(patients)
    patients_gender.to_parquet('./data/origin/demographic/patients_gender.parquet')
else : patients_gender = pd.read_parquet('./data/origin/demographic/patients_gender.parquet')

## Age

In [7]:
if not os.path.isfile('./data/origin/demographic/icustays_age.parquet'):
    icustays_age = utils.cal_age(icustays,patients)
    icustays_age.to_parquet('./data/origin/demographic/icustays_age.parquet')
else : icustays_age = pd.read_parquet('./data/origin/demographic/icustays_age.parquet')

## Race

In [8]:
if not os.path.isfile('./data/origin/demographic/admissions_race.parquet'):
    admissions_race = utils.cal_race(admissions)
    admissions_race.to_parquet('./data/origin/demographic/admissions_race.parquet')
else : admissions_race = pd.read_parquet('./data/origin/demographic/admissions_race.parquet')

## Height

In [9]:
#admission_height = cal_height(icustays,chartevents,omr)

## Weight

In [10]:
if not os.path.isfile('./data/origin/demographic/admission_weight.parquet'):
    admission_weight = utils.cal_weight(icustays,chartevents,inputevents,omr)
    admission_weight.to_parquet('./data/origin/demographic/admission_weight.parquet')
else : admission_weight = pd.read_parquet('./data/origin/demographic/admission_weight.parquet')

## Comorbidites

In [11]:
if not os.path.isfile('./data/origin/demographic/comorbidities.parquet'):
    comorbidities = utils.cal_comorbidities(icustays, comorbidities,diagnoses_icd)
    comorbidities.to_parquet('./data/origin/demographic/comorbidities.parquet')
else : comorbidities = pd.read_parquet('./data/origin/demographic/comorbidities.parquet')

## Baseline SCr

In [12]:
if not os.path.isfile('./data/origin/demographic/baseline_SCr.parquet'):
    baseline_SCr = utils.cal_baseline_SCr(labevents,icustays,patients_gender,icustays_age,admissions_race)
    baseline_SCr.to_parquet('./data/origin/baseline_SCr.parquet')
else : baseline_SCr = pd.read_parquet('./data/origin/demographic/baseline_SCr.parquet')

# Lab values

In [13]:
for i,idx in enumerate(tqdm(labvalues.abbreviation)):
    if not os.path.isfile('./data/origin/labvalues/chartevents_%s.parquet'%idx):
        tmp = labvalues.iloc[i]
        globals()['chartevents_{}'.format(idx)] = utils.extract_labvalues(chartevents,labevents,tmp,is_in_icu=True)
        globals()['chartevents_{}'.format(idx)].to_parquet('./data/origin/labvalues/chartevents_%s.parquet'%idx)
    else : globals()['chartevents_{}'.format(idx)] = pd.read_parquet('./data/origin/labvalues/chartevents_%s.parquet'%idx)

  0%|          | 0/30 [00:00<?, ?it/s]

In [14]:
for i,idx in enumerate(tqdm(labvalues.abbreviation)):
    if not os.path.isfile('./data/origin/labvalues/labevents_%s.parquet'%idx):
        tmp = labvalues.iloc[i]
        globals()['labevents_{}'.format(idx)] = utils.extract_labvalues(chartevents,labevents,tmp,is_in_icu=False)
        globals()['labevents_{}'.format(idx)].to_parquet('./data/origin/labvalues/labevents_%s.parquet'%idx)
    else : globals()['labevents_{}'.format(idx)] = pd.read_parquet('./data/origin/labvalues/labevents_%s.parquet'%idx)

  0%|          | 0/30 [00:00<?, ?it/s]

In [15]:
for i,idx in enumerate(tqdm(labvalues.abbreviation)):
    if not os.path.isfile('./data/resample/labvalues/resample_%s.parquet'%idx):
        globals()['resample_{}'.format(idx)] = utils.resample_labvalues(globals()['chartevents_{}'.format(idx)],globals()['labevents_{}'.format(idx)],icustays,idx)
        globals()['resample_{}'.format(idx)].to_parquet('./data/resample/labvalues/resample_%s.parquet'%idx)
    else : globals()['resample_{}'.format(idx)] = pd.read_parquet('./data/resample/labvalues/resample_%s.parquet'%idx)

  0%|          | 0/30 [00:00<?, ?it/s]

# Vitals

In [16]:
for i,idx in enumerate(tqdm(vitals.abbreviation)):
    if not os.path.isfile('./data/origin/vitals/chartevents_%s.parquet'%idx):
        tmp = vitals.iloc[i]
        globals()['chartevents_{}'.format(idx)] = utils.extract_labvalues(chartevents,labevents,tmp,is_in_icu=True)
        globals()['chartevents_{}'.format(idx)].to_parquet('./data/origin/vitals/chartevents_%s.parquet'%idx)
    else : globals()['chartevents_{}'.format(idx)] = pd.read_parquet('./data/origin/vitals/chartevents_%s.parquet'%idx)

# Temperature
chartevents_tempF['valuenum'] = (chartevents_tempF['valuenum']-32)*5/9
chartevents_tempF.rename(columns={'valuenum2':'valuenum'},inplace=True)
chartevents_tempF['valuenum'] = chartevents_tempF['valuenum'].round(1)

chartevents_temp = pd.concat([chartevents_tempC,chartevents_tempF])
chartevents_temp.sort_values(by=['subject_id','hadm_id','stay_id','charttime'],inplace=True)
chartevents_temp.to_parquet('./data/origin/vitals/chartevents_temp.parquet')

os.remove('./data/origin/vitals/chartevents_tempC.parquet')
os.remove('./data/origin/vitals/chartevents_tempF.parquet')

  0%|          | 0/11 [00:00<?, ?it/s]

In [17]:
for idx in tqdm([x[12:-8] for x in os.listdir('./data/origin/vitals') if x[-8:] == '.parquet']):
    if not os.path.isfile('./data/resample/vitals/resample_%s.parquet'%idx):
        globals()['resample_{}'.format(idx)] = utils.resample_vitals(globals()['chartevents_{}'.format(idx)],icustays,idx)
        globals()['resample_{}'.format(idx)].to_parquet('./data/resample/labvalues/resample_%s.parquet'%idx)
    else : globals()['resample_{}'.format(idx)] = pd.read_parquet('./data/resample/vitals/resample_%s.parquet'%idx)

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/73181 [00:00<?, ?it/s]

KeyError: 'stay_id'

In [None]:
resample_SBP = utils.cal_BP(resample_ABPs, resample_NBPs)
resample_DBP = utils.cal_BP(resample_ABPd, resample_NBPd)
resample_MAP = utils.cal_MAP(resample_DBP,resample_SBP)

resample_SBP.to_parquet('./data/resample/vitals/resample_SBP.parquet')
resample_DBP.to_parquet('./data/resample/vitals/resample_DBP.parquet')
resample_MAP.to_parquet('./data/resample/vitals/resample_MAP.parquet')

AttributeError: module 'utils' has no attribute 'cal_MAP'

# Urine Output

In [None]:
outputevents_uo = utils.cal_uo(outputevents)

# Glasgow Coma Scale

In [None]:
chartevents_GCS_eye = chartevents[chartevents['itemid'].isin([220739])]
chartevents_GCS_verval = chartevents[chartevents['itemid'].isin([223900])]
chartevents_GCS_motor = chartevents[chartevents['itemid'].isin([223901])]

In [None]:
chartevents_GCS_eye.to_parquet('./data/others/chartevents_GCS_eye.parquet')
chartevents_GCS_verval.to_parquet('./data/others/chartevents_GCS_verval.parquet')
chartevents_GCS_motor.to_parquet('./data/others/chartevents_GCS_motor.parquet')

# Fluid/Vasopressor intake

## Fluid

In [6]:
if not os.path.isfile('./data/resample/inputs/resample_fluid.parquet'):
    fluid = [220949, 220950, 220952, 225158, 225159, 225161, 225828, 225797, 225799, 225823, 225825, 225827, 225830, 226089, 225941, 225943, 225944, 226361, 226363, 226364, 226375, 226377, 226452, 226453, 227533, 228140, 228141, 228142, 228341, 220955, 220967, 220968, 220953]
    inputevents_fluid = inputevents[inputevents['itemid'].isin(fluid)]

    inputevents_fluid.loc[inputevents_fluid['rateuom']=='mL/min', 'rate'] = inputevents_fluid['rate']/60
    inputevents_fluid.loc[inputevents_fluid['rateuom']=='mL/kg/hour', 'rate'] = inputevents_fluid['rate'] * inputevents_fluid['patientweight']

    resample_fluids = utils.resample_inputrates(icustays,inputevents_fluid,'fluid')
    resample_fluids.to_parquet('./data/resample/inputs/resample_fluid.parquet')
else : resample_fluids = pd.read_parquet('./data/resample/inputs/resample_fluid.parquet')

  0%|          | 0/73181 [00:00<?, ?it/s]

## Vasopressor

In [12]:
inputevents_epinephrine = inputevents[inputevents['itemid'].isin([221289])]
inputevents_dopamine = inputevents[inputevents['itemid'].isin([221662])]
inputevents_dobutamine = inputevents[inputevents['itemid'].isin([221653])]
inputevents_norephinephrine = inputevents[inputevents['itemid'].isin([221906])]
inputevents_phenylephrine = inputevents[inputevents['itemid'].isin([221749, 229630, 229632])]
inputevents_vasopressin = inputevents[inputevents['itemid'].isin([222315])]

In [None]:
# Norephinephrine
inputevents_norephinephrine.loc[inputevents_norephinephrine['rateuom']=='mg/kg/min','rate']= inputevents_norephinephrine['rate']*1e3
inputevents_norephinephrine = inputevents_norephinephrine[['subject_id','hadm_id','stay_id','starttime','endtime','rate']]

# Epinephrine
inputevents_epinephrine = inputevents_epinephrine[['subject_id','hadm_id','stay_id','starttime','endtime','rate']]

# Phenylephrine
inputevents_phenylephrine.loc[inputevents_phenylephrine['rateuom']=='mcg/min','rate'] = inputevents_phenylephrine['rate']/inputevents_phenylephrine['patientweight']
inputevents_phenylephrine = inputevents_phenylephrine[['subject_id','hadm_id','stay_id','starttime','endtime','rate']]

# Vasopressin
inputevents_vasopressin.loc[inputevents_vasopressin['rateuom']=='units/hour', 'rate'] = inputevents_vasopressin['rate']/60
inputevents_vasopressin = inputevents_vasopressin[['subject_id','hadm_id','stay_id','starttime','endtime','rate']]

### Dopamine
inputevents_dopamine = inputevents_dopamine[['subject_id','hadm_id','stay_id','starttime','endtime','rate']]

# Dobupamine
inputevents_dobutamine = inputevents_dobutamine[['subject_id','hadm_id','stay_id','starttime','endtime','rate']]

In [None]:
Vaso = ['epinephrine','dopamine','dobutamine','norephinephrine','phenylephrine','vasopressin']
for i,idx in enumerate(tqdm(Vaso)):
    globals()['resample_{}'.format(idx)] = utils.resample_inputrates(icustays, globals()['inputevents_{}'.format(idx)],idx)
    globals()['resample_{}'.format(idx)].to_parquet('./data/resample/inputs/resample_%s.parquet'%idx)

# Ventilator

In [19]:
if not os.path.isfile('./data/resample/procedures/resample_ventilation.parquet'):
    procedureevents_ventilation = procedureevents[procedureevents['itemid'].isin([225792,225794])]
    resample_ventilation = utils.resample_inputrates(icustays,procedureevents_ventilation,'ventilation')
    resample_ventilation.to_parquet('./data/resample_ventilation.parquet')
else : resample_ventilation = pd.read_parquet('./data/resample/procedures/resample_ventilation.parquet')

# RRT

In [21]:
if not os.path.isfile('./data/resample/procedures/resample_rrt.parquet'):
    procedureevents_rrt = procedureevents[procedureevents['itemid'].isin([
    225441, #Hemodialysis
    225802, #Dialysis - CRRT
    225803, #Dialysis - CVVHD
    225805, #Peritoneal Dialysis
    225809, #Dialysis - CVVHDF
    225955, #Dialysis - SCUF
    ])]
    resample_rrt = utils.resample_inputrates(icustays,procedureevents_rrt,'RRT')
    resample_rrt.to_parquet('./data/resample/procedures/resample_rrt.parquet')
else : resample_rrt = pd.read_parquet('./data/resample/procedures/resample_rrt.parquet')

# SOFA