In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys, os, pickle, utils

from tqdm.notebook import tqdm
from datetime import timedelta
#from utils import baseline_SCr

if os.getcwd()[-4:] == "code":
    os.chdir('../')

icu = './data/mimic-iv-2.2-parquet/icu/'
hosp = './data/mimic-iv-2.2-parquet/hosp/'

pd.set_option('mode.chained_assignment',  None) # 경고 off

In [2]:
d_items     = pd.read_parquet(icu+'d_items.parquet')
inputevents = pd.read_parquet(icu+'inputevents.parquet')
icustays    = pd.read_parquet(icu+'icustays.parquet')

In [3]:
d_antibiotics = d_items[(d_items['linksto']=='inputevents')&(d_items['category']=='Antibiotics')]
d_antibiotics = d_antibiotics[['itemid','label']]
d_antibiotics.reset_index(inplace=True)

# non-Antibiotics 제거
d_antibiotics = d_antibiotics[~d_antibiotics['itemid'].isin([
    225898,225877,225895,225868,225869,225885,225905,225838,
    225848,225844,225896,225837,225873,228003,225871,225903])]
d_antibiotics.dropna(subset='itemid',inplace=True)
d_antibiotics.reset_index(drop=True,inplace=True)

# I. Inject antibiotics at least one time.

In [36]:
cohort = inputevents[inputevents['itemid'].isin(d_antibiotics.itemid.unique())].stay_id.unique()
len(cohort)

44426

In [5]:
cohort_24hrs = icustays[icustays['los']>=3].stay_id.unique()

In [6]:
labvalues = pd.read_csv('./data/labvalues/labvalues.csv')
for i in labvalues.abbreviation.unique():
    globals()['resample_{}'.format(i)] = pd.read_parquet('./data/resample/resample_%s.parquet'%i)
    #globals()['resample_{}'.format(i)] = globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['stay_id'].isin(cohort)]

In [7]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,globals()['resample_{}'.format(i)].valuenum.isna().sum())

Alb: 6045091
Alk_Phos: 5993222
AG: 5681930
BUN: 5680686
Ca: 5717578
CK: 6053479
D_Bil: 6089827
Glu: 5684438
HCT: 5669912
INR: 5837318
PH: 5794335
PHOS: 5714993
Platelet: 5716713
Cl: 5657825
SCr: 5679572
Na: 5651078
Potassium: 5649265
T_Bil: 5990514
WBC: 5724317
Gl: 5684446
Mg: 5691936
Ca_ion: 5888568
HCO3: 5679451
AST: 5991103
ALT: 5991131
PTT: 5813728
baseexcess: 5805460
lactate: 5876867
PaO2: 5806033
PaCO2: 5805477


In [8]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,len(globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['valuenum'].isna()].stay_id.unique()))

Alb: 73180
Alk_Phos: 73176
AG: 73173
BUN: 73173
Ca: 73174
CK: 73181
D_Bil: 73180
Glu: 73174
HCT: 73174
INR: 73173
PH: 73173
PHOS: 73174
Platelet: 73174
Cl: 73173
SCr: 73173
Na: 73173
Potassium: 73173
T_Bil: 73176
WBC: 73174
Gl: 73174
Mg: 73173
Ca_ion: 73178
HCO3: 73173
AST: 73176
ALT: 73176
PTT: 73174
baseexcess: 73173
lactate: 73175
PaO2: 73173
PaCO2: 73173


## ffill

In [9]:
for i in labvalues.abbreviation.unique():
    globals()['resample_{}'.format(i)] = utils.resample_ffill(globals()['resample_{}'.format(i)])
    #globals()['resample_{}'.format(i)] = utils.resample_bfill(globals()['resample_{}'.format(i)])

In [10]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,globals()['resample_{}'.format(i)].valuenum.isna().sum())

Alb: 3298969
Alk_Phos: 2571768
AG: 368858
BUN: 353370
Ca: 563671
CK: 3904267
D_Bil: 5428931
Glu: 444757
HCT: 359251
INR: 803184
PH: 2231745
PHOS: 559214
Platelet: 375744
Cl: 348921
SCr: 352466
Na: 388564
Potassium: 374056
T_Bil: 2542695
WBC: 379760
Gl: 444757
Mg: 481986
Ca_ion: 2533880
HCO3: 354410
AST: 2530008
ALT: 2529419
PTT: 818542
baseexcess: 2245528
lactate: 1777729
PaO2: 2248815
PaCO2: 2245724


In [11]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,len(globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['valuenum'].isna()].stay_id.unique()))

Alb: 69162
Alk_Phos: 66831
AG: 59602
BUN: 59463
Ca: 60264
CK: 69691
D_Bil: 72689
Glu: 59766
HCT: 58267
INR: 60546
PH: 63073
PHOS: 60248
Platelet: 58573
Cl: 59384
SCr: 59452
Na: 59511
Potassium: 59520
T_Bil: 66798
WBC: 58702
Gl: 59766
Mg: 60137
Ca_ion: 65698
HCO3: 59499
AST: 66710
ALT: 66713
PTT: 60649
baseexcess: 63120
lactate: 62396
PaO2: 63258
PaCO2: 63122


## ffill & bfill

In [12]:
for i in labvalues.abbreviation.unique():
    #globals()['resample_{}'.format(i)] = utils.resample_ffill(globals()['resample_{}'.format(i)])
    globals()['resample_{}'.format(i)] = utils.resample_bfill(globals()['resample_{}'.format(i)])

In [13]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,globals()['resample_{}'.format(i)].valuenum.isna().sum())

Alb: 2681194
Alk_Phos: 2059464
AG: 38437
BUN: 37731
Ca: 122531
CK: 3532618
D_Bil: 5179828
Glu: 40214
HCT: 38839
INR: 379299
PH: 1951135
PHOS: 121323
Platelet: 42132
Cl: 36724
SCr: 37571
Na: 36661
Potassium: 36850
T_Bil: 2027918
WBC: 42541
Gl: 40214
Mg: 70955
Ca_ion: 2148185
HCO3: 37450
AST: 2014392
ALT: 2013785
PTT: 395631
baseexcess: 1967454
lactate: 1473678
PaO2: 1969899
PaCO2: 1967574


In [14]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,len(globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['valuenum'].isna()].stay_id.unique()))

Alb: 48047
Alk_Phos: 38830
AG: 2853
BUN: 2818
Ca: 5637
CK: 52386
D_Bil: 68485
Glu: 2911
HCT: 2850
INR: 11606
PH: 40569
PHOS: 5588
Platelet: 3041
Cl: 2777
SCr: 2810
Na: 2773
Potassium: 2771
T_Bil: 38460
WBC: 3061
Gl: 2911
Mg: 4048
Ca_ion: 42379
HCO3: 2818
AST: 38356
ALT: 38341
PTT: 11993
baseexcess: 40815
lactate: 31475
PaO2: 40851
PaCO2: 40819


## with cohort

In [15]:
for i in labvalues.abbreviation.unique():
    #globals()['resample_{}'.format(i)] = pd.read_parquet('./data/resample/resample_%s.parquet'%i)
    globals()['resample_{}'.format(i)] = globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['stay_id'].isin(cohort)]

In [16]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,globals()['resample_{}'.format(i)].valuenum.isna().sum())

Alb: 1798804
Alk_Phos: 1329297
AG: 10080
BUN: 9330
Ca: 79784
CK: 2635463
D_Bil: 3931379
Glu: 11616
HCT: 9549
INR: 181246
PH: 1020639
PHOS: 78583
Platelet: 10500
Cl: 8911
SCr: 9292
Na: 8910
Potassium: 9316
T_Bil: 1304215
WBC: 10619
Gl: 11616
Mg: 32389
Ca_ion: 1199155
HCO3: 9104
AST: 1297942
ALT: 1297842
PTT: 190399
baseexcess: 1029613
lactate: 676211
PaO2: 1031144
PaCO2: 1029655


In [17]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,len(globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['valuenum'].isna()].stay_id.unique()))

Alb: 26256
Alk_Phos: 20479
AG: 592
BUN: 562
Ca: 2822
CK: 31050
D_Bil: 40385
Glu: 639
HCT: 574
INR: 4252
PH: 17506
PHOS: 2777
Platelet: 620
Cl: 548
SCr: 560
Na: 547
Potassium: 560
T_Bil: 20194
WBC: 623
Gl: 639
Mg: 1392
Ca_ion: 18968
HCO3: 559
AST: 20218
ALT: 20204
PTT: 4443
baseexcess: 17636
lactate: 11646
PaO2: 17651
PaCO2: 17638


## 24hrs

In [18]:
for i in labvalues.abbreviation.unique():
    #globals()['resample_{}'.format(i)] = pd.read_parquet('./data/resample/resample_%s.parquet'%i)
    globals()['resample_{}'.format(i)] = globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['stay_id'].isin(cohort_24hrs)]

In [19]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,globals()['resample_{}'.format(i)].valuenum.isna().sum())

Alb: 1108282
Alk_Phos: 781198
AG: 496
BUN: 585
Ca: 2514
CK: 1869855
D_Bil: 2994672
Glu: 570
HCT: 271
INR: 65478
PH: 527924
PHOS: 2920
Platelet: 271
Cl: 496
SCr: 496
Na: 496
Potassium: 496
T_Bil: 761941
WBC: 347
Gl: 570
Mg: 496
Ca_ion: 681293
HCO3: 496
AST: 754442
ALT: 754874
PTT: 69221
baseexcess: 533468
lactate: 359090
PaO2: 534750
PaCO2: 533468


In [33]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,len(globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['valuenum'].isna()].stay_id.unique()))

Alb: 7731
Alk_Phos: 5538
AG: 5
BUN: 6
Ca: 28
CK: 10885
D_Bil: 16083
Glu: 6
HCT: 3
INR: 550
PH: 4101
PHOS: 33
Platelet: 3
Cl: 5
SCr: 5
Na: 5
Potassium: 5
T_Bil: 5387
WBC: 4
Gl: 6
Mg: 5
Ca_ion: 5010
HCO3: 5
AST: 5391
ALT: 5391
PTT: 584
baseexcess: 4148
lactate: 2690
PaO2: 4157
PaCO2: 4148


In [21]:
len(resample_SCr.stay_id.unique())

18910

In [22]:
resample_Alb[resample_Alb['valuenum'].isna()].subject_id.unique()

array([10002155, 10002428, 10004401, ..., 19997367, 19999287, 19999840],
      dtype=int64)

# Vitals

In [23]:
vitals = pd.read_csv('./data/vitals/vitals_new2.csv')
for i in vitals.abbreviation.unique():
    globals()['resample_{}'.format(i)] = pd.read_parquet('./data/resample/resample_%s.parquet'%i)
    #globals()['resample_{}'.format(i)] = globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['stay_id'].isin(cohort)]

In [24]:
for i in vitals.abbreviation.unique():
    print("%s:"%i,globals()['resample_{}'.format(i)].valuenum.isna().sum())

FiO2: 5329813
SaO2: 6014983
RR: 432107
SBP: 612241
DBP: 613584
temp: 4401740
HR: 362318
CVP: 5430652


In [25]:
for i in vitals.abbreviation.unique():
    globals()['resample_{}'.format(i)] = utils.resample_ffill(globals()['resample_{}'.format(i)])
    #globals()['resample_{}'.format(i)] = utils.resample_bfill(globals()['resample_{}'.format(i)])

In [26]:
for i in vitals.abbreviation.unique():
    print("%s:"%i,globals()['resample_{}'.format(i)].valuenum.isna().sum())

FiO2: 2044060
SaO2: 3964744
RR: 59036
SBP: 56127
DBP: 56243
temp: 194047
HR: 52620
CVP: 4223267


In [27]:
for i in vitals.abbreviation.unique():
    #globals()['resample_{}'.format(i)] = utils.resample_ffill(globals()['resample_{}'.format(i)])
    globals()['resample_{}'.format(i)] = utils.resample_bfill(globals()['resample_{}'.format(i)])

In [32]:
for i in vitals.abbreviation.unique():
    print("%s:"%i,globals()['resample_{}'.format(i)].valuenum.isna().sum())

FiO2: 1788213
SaO2: 3572820
RR: 2580
SBP: 2305
DBP: 2408
temp: 17800
HR: 146
CVP: 3996557


In [31]:
for i in vitals.abbreviation.unique():  
    print("%s:"%i,len(globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['valuenum'].isna()].stay_id.unique()))

FiO2: 38261
SaO2: 56289
RR: 100
SBP: 114
DBP: 117
temp: 811
HR: 18
CVP: 56750


In [42]:
for i in vitals.abbreviation.unique():  
    print("%s:"%i,len(globals()['resample_{}'.format(i)][(globals()['resample_{}'.format(i)]['stay_id'].isin(cohort))&(globals()['resample_{}'.format(i)]['valuenum'].isna())].stay_id.unique()))

FiO2: 15989
SaO2: 29225
RR: 24
SBP: 11
DBP: 12
temp: 242
HR: 1
CVP: 29206


In [43]:
for i in vitals.abbreviation.unique():  
    print("%s:"%i,len(globals()['resample_{}'.format(i)][(globals()['resample_{}'.format(i)]['stay_id'].isin(cohort_24hrs))&(globals()['resample_{}'.format(i)]['valuenum'].isna())].stay_id.unique()))

FiO2: 6057
SaO2: 13896
RR: 8
SBP: 12
DBP: 13
temp: 53
HR: 1
CVP: 15219
