In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import sys, os, pickle, utils

from tqdm.notebook import tqdm
from datetime import timedelta
#from utils import baseline_SCr

if os.getcwd()[-4:] == "code":
    os.chdir('../')

icu = './data/mimic-iv-2.2-parquet/icu/'
hosp = './data/mimic-iv-2.2-parquet/hosp/'

pd.set_option('mode.chained_assignment',  None) # 경고 off

In [2]:
d_items     = pd.read_parquet(icu+'d_items.parquet')
inputevents = pd.read_parquet(icu+'inputevents.parquet')
icustays    = pd.read_parquet(icu+'icustays.parquet')

In [3]:
d_antibiotics = d_items[(d_items['linksto']=='inputevents')&(d_items['category']=='Antibiotics')]
d_antibiotics = d_antibiotics[['itemid','label']]
d_antibiotics.reset_index(inplace=True)

# non-Antibiotics 제거
d_antibiotics = d_antibiotics[~d_antibiotics['itemid'].isin([
    225898,225877,225895,225868,225869,225885,225905,225838,
    225848,225844,225896,225837,225873,228003,225871,225903])]
d_antibiotics.dropna(subset='itemid',inplace=True)
d_antibiotics.reset_index(drop=True,inplace=True)

# I. Inject antibiotics at least one time.

In [4]:
cohort = inputevents[inputevents['itemid'].isin(d_antibiotics.itemid.unique())].stay_id.unique()
len(cohort)

44426

In [5]:
cohort_24hrs = icustays[icustays['los']>=1].stay_id.unique()

In [6]:
labvalues = pd.read_csv('./data/labvalues/labvalues.csv')
for i in labvalues.abbreviation.unique():
    globals()['resample_{}'.format(i)] = pd.read_parquet('./data/resample/resample_%s.parquet'%i)
    #globals()['resample_{}'.format(i)] = globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['stay_id'].isin(cohort)]

In [7]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,globals()['resample_{}'.format(i)].valuenum.isna().sum())

Alb: 6045091
Alk_Phos: 5993222
AG: 5681930
BUN: 5680686
Ca: 5717578
CK: 6053479
D_Bil: 6089827
Glu: 5684438
HCT: 5669912
INR: 5837318
PH: 5794335
PHOS: 5714993
Platelet: 5716713
Cl: 5657825
SCr: 5679572
nan: 5651078
Potassium: 5649265
T_Bil: 5990514
WBC: 5724317
Gl: 5684446
Mg: 5691936
Ca_ion: 5888568
HCO3: 5679451
AST: 5991103
ALT: 5991131
PTT: 5813728
baseexcess: 5805460
lactate: 5876867


In [8]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,len(globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['valuenum'].isna()].stay_id.unique()))

Alb: 73180
Alk_Phos: 73176
AG: 73173
BUN: 73173
Ca: 73174
CK: 73181
D_Bil: 73180
Glu: 73174
HCT: 73174
INR: 73173
PH: 73173
PHOS: 73174
Platelet: 73174
Cl: 73173
SCr: 73173
nan: 73173
Potassium: 73173
T_Bil: 73176
WBC: 73174
Gl: 73174
Mg: 73173
Ca_ion: 73178
HCO3: 73173
AST: 73176
ALT: 73176
PTT: 73174
baseexcess: 73173
lactate: 73175


## ffill

In [9]:
for i in labvalues.abbreviation.unique():
    globals()['resample_{}'.format(i)] = utils.resample_ffill(globals()['resample_{}'.format(i)])
    #globals()['resample_{}'.format(i)] = utils.resample_bfill(globals()['resample_{}'.format(i)])

In [10]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,globals()['resample_{}'.format(i)].valuenum.isna().sum())

Alb: 3298969
Alk_Phos: 2571768
AG: 368858
BUN: 353370
Ca: 563671
CK: 3904267
D_Bil: 5428931
Glu: 444757
HCT: 359251
INR: 803184
PH: 2231745
PHOS: 559214
Platelet: 375744
Cl: 348921
SCr: 352466
nan: 388564
Potassium: 374056
T_Bil: 2542695
WBC: 379760
Gl: 444757
Mg: 481986
Ca_ion: 2533880
HCO3: 354410
AST: 2530008
ALT: 2529419
PTT: 818542
baseexcess: 2245528
lactate: 1777729


In [11]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,len(globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['valuenum'].isna()].stay_id.unique()))

Alb: 69162
Alk_Phos: 66831
AG: 59602
BUN: 59463
Ca: 60264
CK: 69691
D_Bil: 72689
Glu: 59766
HCT: 58267
INR: 60546
PH: 63073
PHOS: 60248
Platelet: 58573
Cl: 59384
SCr: 59452
nan: 59511
Potassium: 59520
T_Bil: 66798
WBC: 58702
Gl: 59766
Mg: 60137
Ca_ion: 65698
HCO3: 59499
AST: 66710
ALT: 66713
PTT: 60649
baseexcess: 63120
lactate: 62396


## ffill & bfill

In [12]:
for i in labvalues.abbreviation.unique():
    #globals()['resample_{}'.format(i)] = utils.resample_ffill(globals()['resample_{}'.format(i)])
    globals()['resample_{}'.format(i)] = utils.resample_bfill(globals()['resample_{}'.format(i)])

In [13]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,globals()['resample_{}'.format(i)].valuenum.isna().sum())

Alb: 2681194
Alk_Phos: 2059464
AG: 38437
BUN: 37731
Ca: 122531
CK: 3532618
D_Bil: 5179828
Glu: 40214
HCT: 38839
INR: 379299
PH: 1951135
PHOS: 121323
Platelet: 42132
Cl: 36724
SCr: 37571
nan: 36661
Potassium: 36850
T_Bil: 2027918
WBC: 42541
Gl: 40214
Mg: 70955
Ca_ion: 2148185
HCO3: 37450
AST: 2014392
ALT: 2013785
PTT: 395631
baseexcess: 1967454
lactate: 1473678


In [14]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,len(globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['valuenum'].isna()].stay_id.unique()))

Alb: 48047
Alk_Phos: 38830
AG: 2853
BUN: 2818
Ca: 5637
CK: 52386
D_Bil: 68485
Glu: 2911
HCT: 2850
INR: 11606
PH: 40569
PHOS: 5588
Platelet: 3041
Cl: 2777
SCr: 2810
nan: 2773
Potassium: 2771
T_Bil: 38460
WBC: 3061
Gl: 2911
Mg: 4048
Ca_ion: 42379
HCO3: 2818
AST: 38356
ALT: 38341
PTT: 11993
baseexcess: 40815
lactate: 31475


## with cohort

In [15]:
for i in labvalues.abbreviation.unique():
    #globals()['resample_{}'.format(i)] = pd.read_parquet('./data/resample/resample_%s.parquet'%i)
    globals()['resample_{}'.format(i)] = globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['stay_id'].isin(cohort)]

In [16]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,globals()['resample_{}'.format(i)].valuenum.isna().sum())

Alb: 1798804
Alk_Phos: 1329297
AG: 10080
BUN: 9330
Ca: 79784
CK: 2635463
D_Bil: 3931379
Glu: 11616
HCT: 9549
INR: 181246
PH: 1020639
PHOS: 78583
Platelet: 10500
Cl: 8911
SCr: 9292
nan: 8910
Potassium: 9316
T_Bil: 1304215
WBC: 10619
Gl: 11616
Mg: 32389
Ca_ion: 1199155
HCO3: 9104
AST: 1297942
ALT: 1297842
PTT: 190399
baseexcess: 1029613
lactate: 676211


In [17]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,len(globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['valuenum'].isna()].stay_id.unique()))

Alb: 26256
Alk_Phos: 20479
AG: 592
BUN: 562
Ca: 2822
CK: 31050
D_Bil: 40385
Glu: 639
HCT: 574
INR: 4252
PH: 17506
PHOS: 2777
Platelet: 620
Cl: 548
SCr: 560
nan: 547
Potassium: 560
T_Bil: 20194
WBC: 623
Gl: 639
Mg: 1392
Ca_ion: 18968
HCO3: 559
AST: 20218
ALT: 20204
PTT: 4443
baseexcess: 17636
lactate: 11646


## 24hrs

In [18]:
for i in labvalues.abbreviation.unique():
    #globals()['resample_{}'.format(i)] = pd.read_parquet('./data/resample/resample_%s.parquet'%i)
    globals()['resample_{}'.format(i)] = globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['stay_id'].isin(cohort_24hrs)]

In [19]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,globals()['resample_{}'.format(i)].valuenum.isna().sum())

Alb: 1726141
Alk_Phos: 1271222
AG: 3952
BUN: 3301
Ca: 67837
CK: 2559178
D_Bil: 3839310
Glu: 5552
HCT: 3339
INR: 155316
PH: 949407
PHOS: 66580
Platelet: 3766
Cl: 2990
SCr: 3308
nan: 3021
Potassium: 3441
T_Bil: 1246258
WBC: 3896
Gl: 5552
Mg: 22589
Ca_ion: 1126725
HCO3: 3115
AST: 1240307
ALT: 1240198
PTT: 163462
baseexcess: 958116
lactate: 625178


In [20]:
for i in labvalues.abbreviation.unique():
    print("%s:"%i,len(globals()['resample_{}'.format(i)][globals()['resample_{}'.format(i)]['valuenum'].isna()].stay_id.unique()))

Alb: 22305
Alk_Phos: 17321
AG: 101
BUN: 78
Ca: 2047
CK: 26914
D_Bil: 35411
Glu: 152
HCT: 87
INR: 2716
PH: 13605
PHOS: 2000
Platelet: 97
Cl: 71
SCr: 78
nan: 72
Potassium: 85
T_Bil: 17043
WBC: 100
Gl: 152
Mg: 721
Ca_ion: 14999
HCO3: 75
AST: 17082
ALT: 17068
PTT: 2852
baseexcess: 13721
lactate: 8835


In [22]:
len(resample_SCr.stay_id.unique())

39295

In [23]:
resample_nan

Unnamed: 0,charttime,valuenum,subject_id,hadm_id,stay_id,itemid
0,2157-11-20 19:18:02,138.0,10001217,24597018,37067082,220645
1,2157-11-20 20:18:02,138.0,10001217,24597018,37067082,220645
2,2157-11-20 21:18:02,138.0,10001217,24597018,37067082,220645
3,2157-11-20 22:18:02,138.0,10001217,24597018,37067082,220645
4,2157-11-20 23:18:02,138.0,10001217,24597018,37067082,220645
...,...,...,...,...,...,...
123,2164-09-17 12:26:28,139.0,19999840,21033226,38978960,220645
124,2164-09-17 13:26:28,139.0,19999840,21033226,38978960,220645
125,2164-09-17 14:26:28,139.0,19999840,21033226,38978960,220645
126,2164-09-17 15:26:28,139.0,19999840,21033226,38978960,220645
