In [24]:
# Libraries
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
import math
import time
import random
import sklearn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import torch
from torch import nn
from torch import optim
from tqdm import tqdm
import torch.nn.functional as F

from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

# Remove printing error
pd.options.mode.chained_assignment = None

## Data

In [25]:
# Data
mimic_data = pd.read_csv('mimic_data.csv')


# Split into 'preprocessing dataset' and a 'train/vaid/test dataset'
stays = mimic_data['stay_id'].unique()

random.Random(0).shuffle(stays)
model_data2 = mimic_data.set_index("stay_id").loc[stays].reset_index()
n = round(0.5 * len(stays))
preprocessing_stays = stays[:n]
hold_out_stays = stays[n:]
preprocessing_data = model_data2[model_data2['stay_id'].isin(preprocessing_stays)]
hold_out_data = model_data2[model_data2['stay_id'].isin(hold_out_stays)]

eicu_data = pd.read_csv('eicu_data.csv')

In [21]:
preprocessing_data = preprocessing_data.iloc[:,:6]
hold_out_data = hold_out_data.iloc[:,:6]
eicu_data = eicu_data.iloc[:,:6]

In [26]:
preprocessing_data.stay_id.nunique()
hold_out_data.stay_id.nunique()
eicu_data.stay_id.nunique()

4347

4347

1668

In [10]:
preprocessing_data

Unnamed: 0,stay_id,date,iv_flag,first_po_flag,po_flag,iv_treatment_length
0,39409066.0,2137-05-22,1.0,1.0,0,0
1,39409066.0,2137-05-23,1.0,1.0,0,1
2,30322661.0,2169-10-09,1.0,,0,0
3,30322661.0,2169-10-10,1.0,1.0,0,1
4,30322661.0,2169-10-11,1.0,1.0,0,2
...,...,...,...,...,...,...
14515,37575956.0,2153-02-15,1.0,1.0,0,0
14516,37575956.0,2153-02-16,1.0,1.0,0,1
14517,37540542.0,2161-05-10,1.0,,0,0
14518,37540542.0,2161-05-11,1.0,1.0,0,1


In [191]:
hold_out_data

Unnamed: 0,stay_id,date,iv_flag,first_po_flag,po_flag,iv_treatment_length
14520,31658148.0,2154-01-11,1.0,1.0,0,0
14521,31658148.0,2154-01-12,1.0,1.0,0,1
14522,31658148.0,2154-01-13,1.0,1.0,0,2
14523,33816676.0,2117-08-21,1.0,1.0,0,0
14524,33816676.0,2117-08-22,1.0,1.0,0,1
...,...,...,...,...,...,...
28818,37914144.0,2136-05-23,1.0,1.0,0,1
28819,37246494.0,2127-07-17,1.0,1.0,0,0
28820,37246494.0,2127-07-18,1.0,1.0,0,1
28821,37246494.0,2127-07-19,1.0,1.0,0,2


In [12]:
eicu_data

Unnamed: 0,stay_id,date,iv_flag,first_po_flag,po_flag,iv_treatment_length
0,141196,2022-09-03,1.0,,0,0
1,141392,2022-09-02,1.0,,0,0
2,141470,2022-09-02,1.0,,0,0
3,142405,2022-09-02,1.0,,0,0
4,143068,2022-09-02,1.0,,0,0
...,...,...,...,...,...,...
4942,3335807,2022-09-02,1.0,,0,0
4943,3340875,2022-09-04,1.0,,0,0
4944,3341168,2022-09-03,1.0,,0,0
4945,3341168,2022-09-04,1.0,,0,1


In [29]:
# Import
admissions = pd.read_csv(r"mimic-iv-2.0/hosp/admissions.csv")
patients = pd.read_csv(r"mimic-iv-2.0/hosp/patients.csv")
icustays = pd.read_csv(r"mimic-iv-2.0/icu/icustays.csv")

In [16]:
admissions

Unnamed: 0,subject_id,hadm_id,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,marital_status,race,edregtime,edouttime,hospital_expire_flag
0,10000032,22595853,2180-05-06 22:23:00,2180-05-07 17:15:00,,URGENT,TRANSFER FROM HOSPITAL,HOME,Other,ENGLISH,WIDOWED,WHITE,2180-05-06 19:17:00,2180-05-06 23:30:00,0
1,10000032,22841357,2180-06-26 18:27:00,2180-06-27 18:49:00,,EW EMER.,EMERGENCY ROOM,HOME,Medicaid,ENGLISH,WIDOWED,WHITE,2180-06-26 15:54:00,2180-06-26 21:31:00,0
2,10000032,25742920,2180-08-05 23:44:00,2180-08-07 17:50:00,,EW EMER.,EMERGENCY ROOM,HOSPICE,Medicaid,ENGLISH,WIDOWED,WHITE,2180-08-05 20:58:00,2180-08-06 01:44:00,0
3,10000032,29079034,2180-07-23 12:35:00,2180-07-25 17:55:00,,EW EMER.,EMERGENCY ROOM,HOME,Medicaid,ENGLISH,WIDOWED,WHITE,2180-07-23 05:54:00,2180-07-23 14:00:00,0
4,10000068,25022803,2160-03-03 23:16:00,2160-03-04 06:26:00,,EU OBSERVATION,EMERGENCY ROOM,,Other,ENGLISH,SINGLE,WHITE,2160-03-03 21:55:00,2160-03-04 06:26:00,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
454319,19999828,25744818,2149-01-08 16:44:00,2149-01-18 17:00:00,,EW EMER.,TRANSFER FROM HOSPITAL,HOME HEALTH CARE,Other,ENGLISH,SINGLE,WHITE,2149-01-08 09:11:00,2149-01-08 18:12:00,0
454320,19999828,29734428,2147-07-18 16:23:00,2147-08-04 18:10:00,,EW EMER.,PHYSICIAN REFERRAL,HOME HEALTH CARE,Other,ENGLISH,SINGLE,WHITE,2147-07-17 17:18:00,2147-07-18 17:34:00,0
454321,19999840,21033226,2164-09-10 13:47:00,2164-09-17 13:42:00,2164-09-17 13:42:00,EW EMER.,EMERGENCY ROOM,DIED,Other,ENGLISH,WIDOWED,WHITE,2164-09-10 11:09:00,2164-09-10 14:46:00,1
454322,19999840,26071774,2164-07-25 00:27:00,2164-07-28 12:15:00,,EW EMER.,EMERGENCY ROOM,HOME,Other,ENGLISH,WIDOWED,WHITE,2164-07-24 21:16:00,2164-07-25 01:20:00,0


In [23]:
patients

Unnamed: 0,subject_id,gender,anchor_age,anchor_year,anchor_year_group,dod
0,10000032,F,52,2180,2014 - 2016,2180-09-09
1,10000048,F,23,2126,2008 - 2010,
2,10000068,F,19,2160,2008 - 2010,
3,10000084,M,72,2160,2017 - 2019,2161-02-13
4,10000102,F,27,2136,2008 - 2010,
...,...,...,...,...,...,...
315455,19999828,F,46,2147,2017 - 2019,
315456,19999829,F,28,2186,2008 - 2010,
315457,19999840,M,58,2164,2008 - 2010,2164-09-17
315458,19999914,F,49,2158,2017 - 2019,


In [17]:
icustays

Unnamed: 0,subject_id,hadm_id,stay_id,first_careunit,last_careunit,intime,outtime,los
0,10000032,29079034,39553978,Medical Intensive Care Unit (MICU),Medical Intensive Care Unit (MICU),2180-07-23 14:00:00,2180-07-23 23:50:47,0.410266
1,10000980,26913865,39765666,Medical Intensive Care Unit (MICU),Medical Intensive Care Unit (MICU),2189-06-27 08:42:00,2189-06-27 20:38:27,0.497535
2,10001217,24597018,37067082,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2157-11-20 19:18:02,2157-11-21 22:08:00,1.118032
3,10001217,27703517,34592300,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2157-12-19 15:42:24,2157-12-20 14:27:41,0.948113
4,10001725,25563031,31205490,Medical/Surgical Intensive Care Unit (MICU/SICU),Medical/Surgical Intensive Care Unit (MICU/SICU),2110-04-11 15:52:22,2110-04-12 23:59:56,1.338588
...,...,...,...,...,...,...,...,...
76938,19999442,26785317,32336619,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2148-11-19 14:23:43,2148-11-26 13:12:15,6.950370
76939,19999625,25304202,31070865,Medical/Surgical Intensive Care Unit (MICU/SICU),Medical/Surgical Intensive Care Unit (MICU/SICU),2139-10-10 19:18:00,2139-10-11 18:21:28,0.960741
76940,19999828,25744818,36075953,Medical Intensive Care Unit (MICU),Medical Intensive Care Unit (MICU),2149-01-08 18:12:00,2149-01-10 13:11:02,1.790995
76941,19999840,21033226,38978960,Trauma SICU (TSICU),Surgical Intensive Care Unit (SICU),2164-09-12 09:26:28,2164-09-17 16:35:15,5.297766


## LOS

In [20]:
# get list 
preprocessing_data_list = preprocessing_data.stay_id.unique().tolist()
hold_out_data_list = hold_out_data.stay_id.unique().tolist()

In [37]:
# preprocessing_data
icustays[icustays['stay_id'].isin(preprocessing_data_list)]['los'].mean()
icustays[icustays['stay_id'].isin(preprocessing_data_list)]['los'].std()
icustays[icustays['stay_id'].isin(preprocessing_data_list)]['los'].min()
icustays[icustays['stay_id'].isin(preprocessing_data_list)]['los'].max()

3.142316867635401

2.7796220164694145

0.3034953703703704

38.99027777777778

In [36]:
# hold_out_data
icustays[icustays['stay_id'].isin(hold_out_data_list)]['los'].mean()
icustays[icustays['stay_id'].isin(hold_out_data_list)]['los'].std()
icustays[icustays['stay_id'].isin(hold_out_data_list)]['los'].min()
icustays[icustays['stay_id'].isin(hold_out_data_list)]['los'].max()

3.1171784553416955

2.707265130140487

0.1560069444444444

28.41572916666667

## Age

In [31]:
# get list 
preprocessing_data_list = preprocessing_data.stay_id.unique().tolist()
hold_out_data_list = hold_out_data.stay_id.unique().tolist()

In [32]:
# Get subject_id from stay_id
preprocessing_data_icustays = icustays[icustays['stay_id'].isin(preprocessing_data_list)]
hold_out_data_icustays = icustays[icustays['stay_id'].isin(hold_out_data_list)]

In [45]:
# get list 
preprocessing_data_subject_id_list = preprocessing_data_icustays.subject_id.unique().tolist()
hold_out_data_subject_id_list = hold_out_data_icustays.subject_id.unique().tolist()

In [35]:
# preprocessing_data
patients[patients['subject_id'].isin(preprocessing_data_subject_id_list)]['anchor_age'].mean()
patients[patients['subject_id'].isin(preprocessing_data_subject_id_list)]['anchor_age'].std()
patients[patients['subject_id'].isin(preprocessing_data_subject_id_list)]['anchor_age'].min()
patients[patients['subject_id'].isin(preprocessing_data_subject_id_list)]['anchor_age'].max()

65.3012077294686

15.156439549309528

18

91

In [34]:
# hold_out_data
patients[patients['subject_id'].isin(hold_out_data_subject_id_list)]['anchor_age'].mean()
patients[patients['subject_id'].isin(hold_out_data_subject_id_list)]['anchor_age'].std()
patients[patients['subject_id'].isin(hold_out_data_subject_id_list)]['anchor_age'].min()
patients[patients['subject_id'].isin(hold_out_data_subject_id_list)]['anchor_age'].max()

65.44000973472865

15.231965427633112

18

91

## Gender

In [40]:
# preprocessing_data
patients[patients['subject_id'].isin(preprocessing_data_subject_id_list)]['gender'].value_counts()
2435 + 1705
2435 / (2435 + 1705)
1705 / (2435 + 1705)

M    2435
F    1705
Name: gender, dtype: int64

4140

0.5881642512077294

0.41183574879227053

In [41]:
# hold_out_data
patients[patients['subject_id'].isin(hold_out_data_subject_id_list)]['gender'].value_counts()
2421 + 1688
2421 / (2421 + 1688)
1688 / (2421 + 1688)

M    2421
F    1688
Name: gender, dtype: int64

4109

0.5891944512046726

0.4108055487953273

## Race

In [52]:
# Group race
admissions['race'] = admissions['race'].str.replace('SOUTH AMERICAN', 'HISPANIC')
admissions['race'] = admissions['race'].str.replace('MULTIPLE RACE/ETHNICITY', 'OTHER')
admissions['race'] = admissions['race'].str.replace('PORTUGUESE', 'OTHISPANICHER')
admissions['race'] = admissions['race'].str.replace('UNABLE TO OBTAIN', 'UNKNOWN')
admissions['race'] = admissions['race'].str.replace('OTHISPANICHER', 'OTHER')
admissions['race'] = admissions['race'].str.replace('PATIENT DECLINED TO ANSWER', 'UNKNOWN')

In [54]:
x = 0
string_list = ['NATIVE', 'ASIAN', 'HISPANIC', 'BLACK', 'WHITE', 'OTHER', 'UNKNOWN']
for string in string_list:
    x += 1
    sub_df = admissions[admissions['race'].str.contains(string, case=False, na=False)]
    sub_df['grouped_race'] = string # use filter string as final_label 
    if x == 1:
        new_admissions = sub_df
    else:
        new_admissions = pd.concat([new_admissions, sub_df])
new_admissions.drop(columns=['race'], inplace=True)

In [60]:
# preprocessing_data
new_admissions[new_admissions['hadm_id'].isin(preprocessing_data_hadm_id_list)]['grouped_race'].value_counts(normalize=True)

WHITE       0.679739
UNKNOWN     0.102772
BLACK       0.095560
OTHER       0.062204
HISPANIC    0.032680
ASIAN       0.023439
NATIVE      0.003606
Name: grouped_race, dtype: float64

In [59]:
# hold_out_data
new_admissions[new_admissions['hadm_id'].isin(hold_out_data_hadm_id_list)]['grouped_race'].value_counts(normalize=True)

WHITE       0.680933
BLACK       0.099185
UNKNOWN     0.093524
OTHER       0.054801
HISPANIC    0.039402
ASIAN       0.029438
NATIVE      0.002717
Name: grouped_race, dtype: float64

## Marital_status

In [61]:
# preprocessing_data
admissions[admissions['hadm_id'].isin(preprocessing_data_hadm_id_list)]['marital_status'].value_counts(normalize=True)

MARRIED     0.512883
SINGLE      0.267239
WIDOWED     0.143313
DIVORCED    0.076564
Name: marital_status, dtype: float64

In [62]:
# hold_out_data
admissions[admissions['hadm_id'].isin(hold_out_data_hadm_id_list)]['marital_status'].value_counts(normalize=True)

MARRIED     0.501349
SINGLE      0.280844
WIDOWED     0.139563
DIVORCED    0.078244
Name: marital_status, dtype: float64

## Language

In [63]:
# preprocessing_data
admissions[admissions['hadm_id'].isin(preprocessing_data_hadm_id_list)]['language'].value_counts(normalize=True)

ENGLISH    0.907493
?          0.092507
Name: language, dtype: float64

In [64]:
# hold_out_data
admissions[admissions['hadm_id'].isin(hold_out_data_hadm_id_list)]['language'].value_counts(normalize=True)

ENGLISH    0.894542
?          0.105458
Name: language, dtype: float64

## Insurance

In [65]:
# preprocessing_data
admissions[admissions['hadm_id'].isin(preprocessing_data_hadm_id_list)]['insurance'].value_counts(normalize=True)

Medicare    0.489130
Other       0.449121
Medicaid    0.061748
Name: insurance, dtype: float64

In [66]:
# hold_out_data
admissions[admissions['hadm_id'].isin(hold_out_data_hadm_id_list)]['insurance'].value_counts(normalize=True)

Medicare    0.494681
Other       0.433164
Medicaid    0.072155
Name: insurance, dtype: float64

## Antimicrobial treatment length

In [215]:
# preprocessing_data
preprocessing_data.groupby('stay_id').size().mean()
preprocessing_data.groupby('stay_id').size().std()
preprocessing_data.groupby('stay_id').size().min()
preprocessing_data.groupby('stay_id').size().max()

3.3402346445824707

2.1649528700974052

1

40

In [216]:
# hold_out_data
hold_out_data.groupby('stay_id').size().mean()
hold_out_data.groupby('stay_id').size().std()
hold_out_data.groupby('stay_id').size().min()
hold_out_data.groupby('stay_id').size().max()

3.2903151598803775

2.0139654082164844

1

24

## IV & PO days

In [200]:
# Create new iv_only_flag
preprocessing_data['iv_only_flag'] = 0
pos = -1
for x in range(len(preprocessing_data)):
    pos += 1
    if preprocessing_data.iloc[x]['iv_flag'] == 1:
        if preprocessing_data.iloc[x]['first_po_flag'] != preprocessing_data.iloc[x]['first_po_flag']: # check if NaN 
            preprocessing_data.loc[x,'iv_only_flag'] = 1

In [193]:
preprocessing_data

Unnamed: 0,stay_id,date,iv_flag,first_po_flag,po_flag,iv_treatment_length,iv_only_flag
0,39409066.0,2137-05-22,1.0,1.0,0,0,0
1,39409066.0,2137-05-23,1.0,1.0,0,1,0
2,30322661.0,2169-10-09,1.0,,0,0,1
3,30322661.0,2169-10-10,1.0,1.0,0,1,0
4,30322661.0,2169-10-11,1.0,1.0,0,2,0
...,...,...,...,...,...,...,...
14515,37575956.0,2153-02-15,1.0,1.0,0,0,0
14516,37575956.0,2153-02-16,1.0,1.0,0,1,0
14517,37540542.0,2161-05-10,1.0,,0,0,1
14518,37540542.0,2161-05-11,1.0,1.0,0,1,0


In [84]:
# Number of iv treatment days
preprocessing_data.groupby('stay_id')['iv_flag'].sum().mean()
preprocessing_data.groupby('stay_id')['iv_flag'].sum().std()

2.792500575109271

1.4698093454278869

In [83]:
# Number of po treatment days
preprocessing_data.groupby('stay_id')['first_po_flag'].sum().mean()
preprocessing_data.groupby('stay_id')['first_po_flag'].sum().std()

2.7561536691971473

1.9093300898312386

In [82]:
# Number of only IV treatment days
preprocessing_data.groupby('stay_id')['iv_only_flag'].sum().mean()
preprocessing_data.groupby('stay_id')['iv_only_flag'].sum().std()

0.5840809753853232

1.194019689817728

In [81]:
# Number of only po treatment days
preprocessing_data.groupby('stay_id')['po_flag'].sum().mean()
preprocessing_data.groupby('stay_id')['po_flag'].sum().std()

0.5477340694731999

1.391649309420526

In [205]:
# Create new iv_only_flag
hold_out_data.reset_index(inplace=True, drop=True)
hold_out_data['iv_only_flag'] = 0
pos = -1
for x in range(len(hold_out_data)):
    pos += 1
    if hold_out_data.iloc[x]['iv_flag'] == 1:
        if hold_out_data.iloc[x]['first_po_flag'] != hold_out_data.iloc[x]['first_po_flag']: # check if NaN 
            hold_out_data.loc[x,'iv_only_flag'] = 1

In [207]:
# Number of iv treatment days
hold_out_data.groupby('stay_id')['iv_flag'].sum().mean()
hold_out_data.groupby('stay_id')['iv_flag'].sum().std()

2.746031746031746

1.4598176013710564

In [208]:
# Number of po treatment days
hold_out_data.groupby('stay_id')['first_po_flag'].sum().mean()
hold_out_data.groupby('stay_id')['first_po_flag'].sum().std()

2.6919714745801704

1.8072404999558938

In [209]:
# Number of only IV treatment days
hold_out_data.groupby('stay_id')['iv_only_flag'].sum().mean()
hold_out_data.groupby('stay_id')['iv_only_flag'].sum().std()

0.598343685300207

1.1744625107948052

In [210]:
# Number of only po treatment days
hold_out_data.groupby('stay_id')['po_flag'].sum().mean()
hold_out_data.groupby('stay_id')['po_flag'].sum().std()

0.5442834138486312

1.2272996712028428

## Antibiotics

In [85]:
antibiotics = pd.read_csv(r"mimic-iv-2.0/antibiotic.csv")

In [86]:
antibiotics

Unnamed: 0,subject_id,hadm_id,stay_id,antibiotic,route,starttime,stoptime
0,18480379,22159705,,Cipro,ORAL,2124-05-03 20:00:00,2124-05-04 16:00:00
1,18480379,22159705,,Cipro,ORAL,2124-05-03 20:00:00,2124-05-03 18:00:00
2,17068038,23263906,,Cipro,ORAL,2189-04-23 20:00:00,2189-04-24 20:00:00
3,18480379,22159705,,Cipro,ORAL,2124-05-03 20:00:00,2124-05-03 19:00:00
4,10167765,23327987,,AveLOX,ORAL,2174-07-22 21:00:00,2174-07-24 20:00:00
...,...,...,...,...,...,...,...
773626,13019601,25893500,31850495.0,Piperacillin-Tazobactam,IV,2194-08-12 12:00:00,2194-08-12 11:00:00
773627,13019601,25893500,31850495.0,Piperacillin-Tazobactam,IV,2194-08-09 10:00:00,2194-08-12 11:00:00
773628,13019601,25893500,31850495.0,Piperacillin-Tazobactam,IV,2194-08-12 12:00:00,2194-08-16 06:00:00
773629,13019601,25893500,31850495.0,Piperacillin-Tazobactam,IV,2194-08-09 01:00:00,2194-08-10 00:00:00


In [98]:
# Filter for relevant delivery methods
route_list = ['IV', 'PO/NG', 'PO', 'NU', 'ORAL']
antibiotics['flag'] = np.where(antibiotics.route.str.contains('|'.join(route_list), na=False, case=False),1,0)
antibiotics = antibiotics[antibiotics['flag'] == 1]
antibiotics.drop(columns=['flag'], inplace=True)
# Need to remove some others that got through the filter 
antibiotics = antibiotics.groupby('route').filter(lambda x: len(x) > 100)
antibiotics['route'] = antibiotics['route'].replace({'PO/NG':'PO', 'NU':'PO', 'ORAL':'PO'})

antibiotics['starttime'] = pd.to_datetime(antibiotics['starttime'])
antibiotics['stoptime'] = pd.to_datetime(antibiotics['stoptime'])
#antibiotics.stay_id.nunique()

## Filter so only ICU stays given data we are using ##
antibiotics = antibiotics[~antibiotics['stay_id'].isna()]

# Filter out Mupirocin
antibiotics = antibiotics[~antibiotics['antibiotic'].str.contains('Mupirocin', case=False)]

In [100]:
# preprocessing_data all
len(preprocessing_data_list)
antibiotics[antibiotics['stay_id'].isin(preprocessing_data_list)].stay_id.nunique()
antibiotics[antibiotics['stay_id'].isin(preprocessing_data_list)]['antibiotic'].value_counts(normalize=True)

4347

4347

Vancomycin                    0.259636
CefePIME                      0.088680
Azithromycin                  0.063832
Piperacillin-Tazobactam       0.061237
Levofloxacin                  0.047985
                                ...   
Ciprofloxacin                 0.000055
bactrim                       0.000055
Meropenem Graded Challenge    0.000055
Vancomycin Ora                0.000055
Cefepime Graded Challenge     0.000055
Name: antibiotic, Length: 70, dtype: float64

In [5]:
0.063098 + 0.061568

0.124666

In [6]:
0.042065 + 0.036099

0.078164

In [7]:
0.031128 + 0.014685

0.045813

In [8]:
0.008642 + 0.084818

0.09346

In [101]:
# preprocessing_data IV
antibiotics[(antibiotics['stay_id'].isin(preprocessing_data_list)) & (antibiotics['route'] == 'IV')]['antibiotic'].value_counts(normalize=True)

Vancomycin                       0.359618
CefePIME                         0.122830
Piperacillin-Tazobactam          0.084818
CeFAZolin                        0.063098
CefazoLIN                        0.061568
CeftriaXONE                      0.042065
CefTRIAXone                      0.036099
MetRONIDAZOLE (FLagyl)           0.031128
Ciprofloxacin IV                 0.031128
Meropenem                        0.028834
Azithromycin                     0.022027
Levofloxacin                     0.017591
CefTAZidime                      0.016597
MetroNIDAZOLE                    0.014685
Ampicillin-Sulbactam             0.011472
Piperacillin-Tazobactam Na       0.008642
Linezolid                        0.008107
Aztreonam                        0.007572
Clindamycin                      0.007036
Ampicillin Sodium                0.003901
Tobramycin Sulfate               0.003059
Sulfamethoxazole-Trimethoprim    0.002753
Gentamicin Sulfate               0.002677
Doxycycline Hyclate              0

In [14]:
0.061768 + 0.056405

0.118173

In [15]:
0.036743 + 0.042304

0.079047

In [102]:
# preprocessing_data PO
antibiotics[(antibiotics['stay_id'].isin(preprocessing_data_list)) & (antibiotics['route'] == 'PO')]['antibiotic'].value_counts(normalize=True)

Azithromycin                              0.229593
Vancomycin Oral Liquid                    0.134459
Ciprofloxacin HCl                         0.127706
Levofloxacin                              0.126912
Sulfameth/Trimethoprim SS                 0.061768
Sulfameth/Trimethoprim DS                 0.056405
MetRONIDAZOLE (FLagyl)                    0.042304
MetroNIDAZOLE                             0.036743
Doxycycline Hyclate                       0.027011
Amoxicillin-Clavulanic Acid               0.026415
Sulfameth/Trimethoprim Suspension         0.025621
Linezolid                                 0.017478
Cephalexin                                0.015690
Cefpodoxime Proxetil                      0.013307
Clindamycin                               0.011321
Amoxicillin                               0.007944
Rifampin                                  0.006753
Clarithromycin                            0.005164
Nitrofurantoin Monohyd (MacroBID)         0.004965
Ampicillin                     

In [104]:
# hold_out_data
len(hold_out_data_list)
antibiotics[antibiotics['stay_id'].isin(hold_out_data_list)].stay_id.nunique()
antibiotics[antibiotics['stay_id'].isin(hold_out_data_list)]['antibiotic'].value_counts(normalize=True)

4347

4347

Vancomycin                   0.259180
CefePIME                     0.102254
Azithromycin                 0.062858
Piperacillin-Tazobactam      0.055656
Levofloxacin                 0.048071
                               ...   
Neomycin Sulfate             0.000055
Tetracycline                 0.000055
RifAMPin                     0.000055
Moxifloxacin                 0.000055
Cefepime Graded Challenge    0.000055
Name: antibiotic, Length: 66, dtype: float64

In [9]:
0.061761 + 0.059066

0.120827

In [10]:
0.007860 + 0.076359

0.084219

In [11]:
0.043569 + 0.036832

0.080401

In [12]:
0.031816 + 0.016170

0.047986

In [105]:
# hold_out_data IV
antibiotics[(antibiotics['stay_id'].isin(hold_out_data_list)) & (antibiotics['route'] == 'IV')]['antibiotic'].value_counts(normalize=True)

Vancomycin                       0.355592
CefePIME                         0.140290
Piperacillin-Tazobactam          0.076359
CeFAZolin                        0.061761
CefazoLIN                        0.059066
CeftriaXONE                      0.043569
CefTRIAXone                      0.036832
MetRONIDAZOLE (FLagyl)           0.031816
Meropenem                        0.031666
Ciprofloxacin IV                 0.030768
Levofloxacin                     0.019913
Azithromycin                     0.019539
MetroNIDAZOLE                    0.016170
CefTAZidime                      0.014823
Ampicillin-Sulbactam             0.010181
Piperacillin-Tazobactam Na       0.007860
Linezolid                        0.007711
Clindamycin                      0.005315
Aztreonam                        0.005315
Ampicillin Sodium                0.004941
Sulfamethoxazole-Trimethoprim    0.003593
Doxycycline Hyclate              0.003069
Gentamicin                       0.002770
Tobramycin Sulfate               0

In [16]:
0.052526 + 0.047897

0.10042300000000001

In [17]:
0.053934 + 0.039243

0.09317700000000001

In [106]:
# hold_out_data PO
antibiotics[(antibiotics['stay_id'].isin(hold_out_data_list)) & (antibiotics['route'] == 'PO')]['antibiotic'].value_counts(normalize=True)

Azithromycin                         0.231837
Vancomycin Oral Liquid               0.142282
Levofloxacin                         0.123767
Ciprofloxacin HCl                    0.121352
MetRONIDAZOLE (FLagyl)               0.053934
Sulfameth/Trimethoprim SS            0.052526
Sulfameth/Trimethoprim DS            0.047897
MetroNIDAZOLE                        0.039243
Doxycycline Hyclate                  0.031797
Sulfameth/Trimethoprim Suspension    0.026363
Amoxicillin-Clavulanic Acid          0.024753
Cephalexin                           0.018112
Linezolid                            0.016905
Cefpodoxime Proxetil                 0.012679
Clindamycin                          0.010062
Amoxicillin                          0.008855
Nitrofurantoin Monohyd (MacroBID)    0.006037
Clarithromycin                       0.006037
Rifampin                             0.003622
Ampicillin                           0.003421
Minocycline                          0.002616
Amoxicillin-Clavulanate Susp.     

## Diagnosis

In [39]:
# Import diagnosis
diagnoses_icd = pd.read_csv(r"mimic-iv-2.0/hosp/diagnoses_icd.csv")
d_icd_diagnoses = pd.read_csv(r"mimic-iv-2.0/hosp/d_icd_diagnoses.csv")

In [40]:
diagnoses_icd = pd.merge(diagnoses_icd, d_icd_diagnoses)
diagnoses_icd.drop_duplicates(inplace=True)

In [35]:
len(preprocessing_data_hadm_id_list)

4324

In [307]:
len(preprocessing_data_hadm_id_list)
diagnoses_icd[diagnoses_icd['hadm_id'].isin(preprocessing_data_hadm_id_list)]['hadm_id'].nunique()

4324

4324

In [42]:
# preprocessing_data
values = diagnoses_icd[diagnoses_icd['hadm_id'].isin(preprocessing_data_hadm_id_list)]['long_title'].value_counts()

In [43]:
values

Acute kidney failure, unspecified                                                                                1077
Hyperlipidemia, unspecified                                                                                      1003
Unspecified essential hypertension                                                                                993
Acute posthemorrhagic anemia                                                                                      974
Other and unspecified hyperlipidemia                                                                              891
                                                                                                                 ... 
Coarctation of aorta                                                                                                1
Malignant neoplasm of cerebrum, except lobes and ventricles                                                         1
Acute sialoadenitis                                     

In [325]:
# Sepsis
diagnoses_icd[diagnoses_icd['hadm_id'].isin(preprocessing_data_hadm_id_list)&(diagnoses_icd['long_title'].str.contains('sepsis', case=False))]['hadm_id'].nunique()
diagnoses_icd[diagnoses_icd['hadm_id'].isin(preprocessing_data_hadm_id_list)&(diagnoses_icd['long_title'].str.contains('sepsis', case=False))]['hadm_id'].nunique()/ diagnoses_icd[diagnoses_icd['hadm_id'].isin(preprocessing_data_hadm_id_list)]['hadm_id'].nunique()

787

0.18200740055504164

In [326]:
# Pneumonia
diagnoses_icd[diagnoses_icd['hadm_id'].isin(preprocessing_data_hadm_id_list)&(diagnoses_icd['long_title'].str.contains('pneumonia', case=False))]['hadm_id'].nunique()
diagnoses_icd[diagnoses_icd['hadm_id'].isin(preprocessing_data_hadm_id_list)&(diagnoses_icd['long_title'].str.contains('pneumonia', case=False))]['hadm_id'].nunique()/ diagnoses_icd[diagnoses_icd['hadm_id'].isin(preprocessing_data_hadm_id_list)]['hadm_id'].nunique()

1137

0.2629509713228492

In [328]:
# UTI
diagnoses_icd[diagnoses_icd['hadm_id'].isin(preprocessing_data_hadm_id_list)&(diagnoses_icd['long_title'].str.contains("urinary tract infection", case=False))| \
                                  (diagnoses_icd['long_title'].str.contains("pyelonephritis", case=False))]['hadm_id'].nunique()
diagnoses_icd[diagnoses_icd['hadm_id'].isin(preprocessing_data_hadm_id_list)&(diagnoses_icd['long_title'].str.contains("urinary tract infection", case=False))| \
                                  (diagnoses_icd['long_title'].str.contains("pyelonephritis", case=False))]['hadm_id'].nunique()/ diagnoses_icd[diagnoses_icd['hadm_id'].isin(preprocessing_data_hadm_id_list)]['hadm_id'].nunique()

2815

0.6510175763182239

In [117]:
# hold_out_data
diagnoses_icd[diagnoses_icd['hadm_id'].isin(hold_out_data_hadm_id_list)]['long_title'].value_counts(normalize=True)

Acute kidney failure, unspecified                                             0.014697
Hyperlipidemia, unspecified                                                   0.012647
Unspecified essential hypertension                                            0.012310
Acute posthemorrhagic anemia                                                  0.011223
Other and unspecified hyperlipidemia                                          0.010773
                                                                                ...   
Abnormality in fetal heart rate and rhythm complicating labor and delivery    0.000012
Inflammatory disease of uterus, unspecified                                   0.000012
Other diseases of pharynx                                                     0.000012
Displacement of nephrostomy catheter, initial encounter                       0.000012
Foreign body granuloma of muscle                                              0.000012
Name: long_title, Length: 5486, dtype: floa

In [324]:
# Sepsis
diagnoses_icd[diagnoses_icd['hadm_id'].isin(hold_out_data_hadm_id_list)&(diagnoses_icd['long_title'].str.contains('sepsis', case=False))]['hadm_id'].nunique()
diagnoses_icd[diagnoses_icd['hadm_id'].isin(hold_out_data_hadm_id_list)&(diagnoses_icd['long_title'].str.contains('sepsis', case=False))]['hadm_id'].nunique()/ len(hold_out_data_hadm_id_list)

849

0.1963459759481961

In [323]:
# Pneumonia
diagnoses_icd[diagnoses_icd['hadm_id'].isin(hold_out_data_hadm_id_list)&(diagnoses_icd['long_title'].str.contains('pneumonia', case=False))]['hadm_id'].nunique()
diagnoses_icd[diagnoses_icd['hadm_id'].isin(hold_out_data_hadm_id_list)&(diagnoses_icd['long_title'].str.contains('pneumonia', case=False))]['hadm_id'].nunique()/ len(hold_out_data_hadm_id_list)

1151

0.2661887141535615

In [327]:
# UTI
diagnoses_icd[(diagnoses_icd['hadm_id'].isin(hold_out_data_hadm_id_list))&(diagnoses_icd['long_title'].str.contains("urinary tract infection", case=False))| \
                                  (diagnoses_icd['long_title'].str.contains("pyelonephritis", case=False))]['hadm_id'].nunique()
diagnoses_icd[(diagnoses_icd['hadm_id'].isin(hold_out_data_hadm_id_list))&(diagnoses_icd['long_title'].str.contains("urinary tract infection", case=False))| \
                                  (diagnoses_icd['long_title'].str.contains("pyelonephritis", case=False))]['hadm_id'].nunique()/ len(hold_out_data_hadm_id_list)

2797

0.646854764107308

## Organism

In [None]:
# Import microbiologyevents
microbiologyevents = pd.read_csv(r"mimic-iv-2.0/hosp/microbiologyevents.csv")

In [119]:
microbiologyevents

Unnamed: 0,microevent_id,subject_id,hadm_id,micro_specimen_id,chartdate,charttime,spec_itemid,spec_type_desc,test_seq,storedate,...,org_name,isolate_num,quantity,ab_itemid,ab_name,dilution_text,dilution_comparison,dilution_value,interpretation,comments
0,1,10000032,,636109,2180-03-23 00:00:00,2180-03-23 11:51:00,70093,Blood (Toxo),1,2180-03-26 00:00:00,...,,,,,,,,,,NEGATIVE FOR TOXOPLASMA IgG ANTIBODY BY EIA. ...
1,2,10000032,,1836584,2180-03-23 00:00:00,2180-03-23 11:51:00,70017,SEROLOGY/BLOOD,1,2180-03-24 00:00:00,...,,,,,,,,,,POSITIVE BY EIA. A positive IgG result genera...
2,3,10000032,,4131591,2180-03-23 00:00:00,2180-03-23 11:51:00,70087,Blood (CMV AB),1,2180-03-26 00:00:00,...,,,,,,,,,,___
3,4,10000032,,4131591,2180-03-23 00:00:00,2180-03-23 11:51:00,70087,Blood (CMV AB),2,2180-03-26 00:00:00,...,,,,,,,,,,NEGATIVE FOR CMV IgM ANTIBODY BY EIA. INTERPR...
4,5,10000032,,6028147,2180-03-23 00:00:00,2180-03-23 11:51:00,70088,Blood (EBV),1,2180-03-25 00:00:00,...,,,,,,,,,,POSITIVE BY EIA.
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3395224,3395225,19999987,23865745.0,9008390,2145-11-05 00:00:00,2145-11-05 10:35:00,70012,BLOOD CULTURE,1,2145-11-11 00:00:00,...,,,,,,,,,,NO GROWTH.
3395225,3395226,19999987,23865745.0,3162785,2145-11-05 00:00:00,2145-11-05 12:15:00,70012,BLOOD CULTURE,1,2145-11-11 00:00:00,...,,,,,,,,,,NO GROWTH.
3395226,3395227,19999987,23865745.0,5530834,2145-11-06 00:00:00,2145-11-06 13:15:00,70012,BLOOD CULTURE,1,2145-11-12 00:00:00,...,,,,,,,,,,NO GROWTH.
3395227,3395228,19999987,23865745.0,8701158,2145-11-06 00:00:00,2145-11-06 17:05:00,70012,BLOOD CULTURE,1,2145-11-12 00:00:00,...,,,,,,,,,,NO GROWTH.


In [269]:
len(preprocessing_data_hadm_id_list)
microbiologyevents[microbiologyevents['hadm_id'].isin(preprocessing_data_hadm_id_list)]['hadm_id'].nunique()

4324

3976

In [132]:
# preprocessing_data
microbiologyevents[microbiologyevents['hadm_id'].isin(preprocessing_data_hadm_id_list)]['org_name'].value_counts() / len(microbiologyevents[(microbiologyevents['hadm_id'].isin(preprocessing_data_hadm_id_list))])
#len(microbiologyevents[(microbiologyevents['hadm_id'].isin(preprocessing_data_hadm_id_list))])
#3165 / len(microbiologyevents[(microbiologyevents['hadm_id'].isin(preprocessing_data_hadm_id_list))])

# Number with no growth
len(microbiologyevents[(microbiologyevents['hadm_id'].isin(preprocessing_data_hadm_id_list))&(microbiologyevents['org_name'].isna())]) / len(microbiologyevents[(microbiologyevents['hadm_id'].isin(preprocessing_data_hadm_id_list))])

ESCHERICHIA COLI                                           0.068603
STAPH AUREUS COAG +                                        0.066934
KLEBSIELLA PNEUMONIAE                                      0.028395
PSEUDOMONAS AERUGINOSA                                     0.022304
YEAST                                                      0.018749
                                                             ...   
NEISSERIA SUBFLAVA                                         0.000022
PROPIONIBACTERIUM SPECIES                                  0.000022
PRESUMPTIVE POSITIVE FOR LEGIONELLA SEROGROUP 1 ANTIGEN    0.000022
INFLUENZA B VIRUS                                          0.000022
GEMELLA SPECIES                                            0.000022
Name: org_name, Length: 177, dtype: float64

0.6723962284599545

In [2]:
1 - 0.6723962284599545

0.3276037715400455

In [131]:
# Check sums to one
(microbiologyevents[microbiologyevents['hadm_id'].isin(preprocessing_data_hadm_id_list)]['org_name'].value_counts() / len(microbiologyevents[(microbiologyevents['hadm_id'].isin(preprocessing_data_hadm_id_list))])).sum() + len(microbiologyevents[(microbiologyevents['hadm_id'].isin(preprocessing_data_hadm_id_list))&(microbiologyevents['org_name'].isna())]) / len(microbiologyevents[(microbiologyevents['hadm_id'].isin(preprocessing_data_hadm_id_list))])

1.0

In [133]:
# hold_out_data
microbiologyevents[microbiologyevents['hadm_id'].isin(hold_out_data_hadm_id_list)]['org_name'].value_counts() / len(microbiologyevents[(microbiologyevents['hadm_id'].isin(hold_out_data_hadm_id_list))])

# Number with no growth
len(microbiologyevents[(microbiologyevents['hadm_id'].isin(hold_out_data_hadm_id_list))&(microbiologyevents['org_name'].isna())]) / len(microbiologyevents[(microbiologyevents['hadm_id'].isin(hold_out_data_hadm_id_list))])

STAPH AUREUS COAG +                  0.073341
ESCHERICHIA COLI                     0.065078
KLEBSIELLA PNEUMONIAE                0.027828
PSEUDOMONAS AERUGINOSA               0.022822
YEAST                                0.016067
                                       ...   
PARAINFLUENZA VIRUS TYPE 2           0.000022
CANDIDA LUSITANIAE                   0.000022
CYTOMEGALOVIRUS                      0.000022
ASPERGILLUS VERSICOLOR GROUP         0.000022
RESPIRATORY SYNCYTIAL VIRUS (RSV)    0.000022
Name: org_name, Length: 166, dtype: float64

0.6692316100120231

In [1]:
1 - 0.6692316100120231

0.33076838998797686

# eICU

In [145]:
# get list 
eicu_data_list = eicu_data.stay_id.unique().tolist()

In [146]:
# Import
patient = pd.read_csv(r"eicu-collaborative-research-database-2.0/patient.csv")
patient.rename(columns={'patientunitstayid': 'stay_id'}, inplace=True)
patient = patient[patient['stay_id'].isin(eicu_data_list)]

In [147]:
patient

Unnamed: 0,stay_id,patienthealthsystemstayid,gender,age,ethnicity,hospitalid,wardid,apacheadmissiondx,admissionheight,hospitaladmittime24,...,unitadmitsource,unitvisitnumber,unitstaytype,admissionweight,dischargeweight,unitdischargetime24,unitdischargeoffset,unitdischargelocation,unitdischargestatus,uniquepid
4,141196,128943,Male,71,Caucasian,67,109,,162.6,20:21:00,...,ICU to SDU,2,stepdown/other,,63.20,22:23:00,1463,Floor,Alive,002-37665
34,141392,129109,Female,78,Caucasian,73,97,"Sepsis, renal/UTI (including bladder)",160.0,08:32:00,...,Emergency Department,1,admit,,85.20,00:34:00,961,Floor,Alive,002-49989
43,141470,129172,Female,58,Caucasian,66,90,"Obstruction/other, surgery for (with or withou...",162.6,07:23:00,...,Operating Room,1,admit,,108.60,21:19:00,831,Floor,Alive,002-56630
165,142405,129878,Female,52,Asian,63,95,"Respiratory - medical, other",,09:45:00,...,Emergency Department,1,admit,,55.10,23:08:00,795,Floor,Alive,002-47719
258,143068,130383,Male,64,Caucasian,73,92,Heart transplant,170.0,19:57:00,...,Operating Room,1,admit,103.9,106.70,18:35:00,3602,Floor,Alive,002-72540
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
196543,3247135,2647037,Female,87,Caucasian,449,1082,"Bleeding, lower GI",157.5,14:16:24,...,Other ICU,1,admit,41.8,64.41,18:03:00,5703,Floor,Alive,033-4795
198023,3335807,2727344,Female,23,African American,459,1108,Asthma,175.3,11:34:00,...,Emergency Department,1,admit,98.2,95.25,16:06:00,190,Telemetry,Alive,035-2456
198834,3340875,2731924,Female,70,African American,458,1109,Emphysema/bronchitis,168.8,19:50:00,...,Emergency Department,1,admit,67.4,67.50,18:49:00,2595,Telemetry,Alive,035-2884
198883,3341168,2732192,Male,61,African American,459,1108,"Sepsis, pulmonary",180.3,22:36:00,...,Emergency Department,1,admit,87.6,84.10,19:03:00,2591,Telemetry,Alive,035-10304


LOS

In [159]:
# eicu_data
(patient[['unitdischargeoffset']]/60/24).mean()
(patient[['unitdischargeoffset']]/60/24).std()
(patient[['unitdischargeoffset']]/60/24).min()
(patient[['unitdischargeoffset']]/60/24).max()

unitdischargeoffset    3.173888
dtype: float64

unitdischargeoffset    2.833997
dtype: float64

unitdischargeoffset    0.097917
dtype: float64

unitdischargeoffset    24.745833
dtype: float64

Age

In [170]:
# Change to int
patient['age'].replace('> 89', 90, inplace=True)
patient['age'] = patient['age'].astype(int)

In [169]:
patient['age']

4         71
34        78
43        58
165       52
258       64
          ..
196543    87
198023    23
198834    70
198883    61
199794    58
Name: age, Length: 1668, dtype: object

In [172]:
# eicu_data
patient['age'].mean()
patient['age'].std()
patient['age'].min()
patient['age'].max()

64.73681055155875

15.91220400359517

16

90

Gender / Sex

In [174]:
patient['gender'].value_counts(normalize=True)

Male      0.502998
Female    0.497002
Name: gender, dtype: float64

Race

In [182]:
patient.ethnicity.value_counts(normalize=True)

Caucasian           0.787696
African American    0.150784
Other/Unknown       0.034982
Hispanic            0.013872
Asian               0.008444
Native American     0.004222
Name: ethnicity, dtype: float64

Antimicrobial treatment length

In [217]:
# hold_out_data
eicu_data.groupby('stay_id').size().mean()
eicu_data.groupby('stay_id').size().std()
eicu_data.groupby('stay_id').size().min()
eicu_data.groupby('stay_id').size().max()

2.9658273381294964

1.9420290933876612

1

17

IV & PO days

In [219]:
# Create new iv_only_flag
eicu_data['iv_only_flag'] = 0
pos = -1
for x in range(len(eicu_data)):
    pos += 1
    if eicu_data.iloc[x]['iv_flag'] == 1:
        if eicu_data.iloc[x]['first_po_flag'] != eicu_data.iloc[x]['first_po_flag']: # check if NaN 
            eicu_data.loc[x,'iv_only_flag'] = 1

In [220]:
# Number of iv treatment days
eicu_data.groupby('stay_id')['iv_flag'].sum().mean()
eicu_data.groupby('stay_id')['iv_flag'].sum().std()

2.4646282973621103

1.5472779413928923

In [221]:
# Number of po treatment days
eicu_data.groupby('stay_id')['first_po_flag'].sum().mean()
eicu_data.groupby('stay_id')['first_po_flag'].sum().std()

1.1396882494004796

1.7807968259368603

In [222]:
# Number of only IV treatment days
eicu_data.groupby('stay_id')['iv_only_flag'].sum().mean()
eicu_data.groupby('stay_id')['iv_only_flag'].sum().std()

1.8261390887290168

1.5101298298551233

In [223]:
# Number of only po treatment days
eicu_data.groupby('stay_id')['po_flag'].sum().mean()
eicu_data.groupby('stay_id')['po_flag'].sum().std()

0.5011990407673861

1.321457268052521

Antibiotics

In [225]:
# Find where eicu data made
import glob

pattern = './**/*.ipynb'
query = 'eicu_iv_switch_stayid_dates_method_3'

for filepath in glob.iglob(pattern, recursive=True):
    with open(filepath) as file:
        s = file.read()
        if (s.find(query) > -1):
            print(filepath)

./eicu_catch22.ipynb
./initial_analysis_patient_outset.ipynb
./review_demographics.ipynb


In [227]:
# Load potential antibiotics 
infection_treatment_categories = pd.read_csv(r"eicu-collaborative-research-database-2.0/infection_treatment_categories.csv")
infection_treatment_categories2 = infection_treatment_categories.rx.str.split(pat='|', expand=True)
infection_treatment_categories2.rename(columns={0:'a', 1:'b', 2:'c', 3:'d', 4:'e'}, inplace=True)
infection_treatment_categories3 = infection_treatment_categories2.d.str.split(pat='/', expand=True)
# Create lists of antibiotics 
list1 = infection_treatment_categories2.d.to_list()
list2 = infection_treatment_categories2.e.to_list()
# Add: 'linezolid', 'Zyvox', 'Synercid', 'quinupristin', 'dalfopristin', 'quinupristin/dalfopristin', 'cephalosporin', 'ticarcillin', 'amoxicillin', 'penicillin', 'benzathine', 'piperacillin', 'ampicillin', 
# Worked these out by splitting other sections
list3 = ['linezolid', 'Zyvox', 'Synercid', 'quinupristin', 'dalfopristin', 'quinupristin/dalfopristin', 'cephalosporin', 'ticarcillin', 'amoxicillin', 'penicillin', 'benzathine', 'piperacillin', 'ampicillin']
antibiotic_list = list1 + list2 + list3
# Drop duplicates through set
antibiotic_list = list(set(antibiotic_list))
# Drop None
antibiotic_list = [x for x in antibiotic_list if x is not None]
print(len(antibiotic_list))

101


In [None]:
# Load all medications
medication = pd.read_csv(r"eicu-collaborative-research-database-2.0/medication.csv", dtype={'drugname': 'object'})
medication = medication[['medicationid', 'patientunitstayid', 'drugstartoffset', 'drugname', 'routeadmin', 'drugivadmixture', 'drugordercancelled', 'drugstopoffset']]
# Filter for antibiotics 
medication['flag'] = np.where(medication.drugname.str.contains('|'.join(antibiotic_list), na=False, case=False),1,0)
antibiotics = medication[medication['flag'] == 1]
antibiotics = antibiotics[antibiotics['drugordercancelled'] == 'No']
assert len(antibiotics[antibiotics['drugordercancelled'] == 'No']) == len(antibiotics)
print(antibiotics.patientunitstayid.nunique())
# Set those where 'drugivadmixture' == 'Yes' to 'IV' route
antibiotics.loc[antibiotics['drugivadmixture'] == 'Yes', 'routeadmin'] = 'IV'
# Filter for relevant delivery methods
route_list = ['IV', 'Intrav', 'PO', 'tube', 'ORAL']
antibiotics = antibiotics[antibiotics.routeadmin.str.contains('|'.join(route_list), na=False, case=False)]
routeadmin_value_counts = antibiotics.routeadmin.value_counts()
# Group so all IV or PO
iv_route_list = ['IV', 'Intrav']
po_route_list = ['PO', 'tube', 'ORAL']
antibiotics.loc[antibiotics.routeadmin.str.contains('|'.join(iv_route_list), na=False, case=False), 'routeadmin'] = 'IV'
antibiotics.loc[antibiotics.routeadmin.str.contains('|'.join(po_route_list), na=False, case=False), 'routeadmin'] = 'PO'
antibiotics = antibiotics.drop(columns=['drugivadmixture', 'drugordercancelled', 'flag'])

In [230]:
# Rename
antibiotics.rename(columns={'patientunitstayid': 'stay_id'}, inplace=True)

In [257]:
# Get first workd to group
antibiotics['antibiotic'] = antibiotics.drugname.str.split().str.get(0)
# Make lowecase for group
antibiotics['antibiotic'] = antibiotics['antibiotic'].str.lower()

In [253]:
# Also looked at selecting the longest word bu I dont think this will work
max(antibiotics.drugname.iloc[0].split(), key=len)

'(REPACKAGE)'

In [259]:
antibiotics

Unnamed: 0,medicationid,stay_id,drugstartoffset,drugname,routeadmin,drugstopoffset,antibiotic
25,7573839,141194,13422,VANCOMYCIN 1.25 GM IN NS 250 ML IVPB (REPACKAGE),IV,12622,vancomycin
32,10670852,141203,-129,100 ML - METRONIDAZOLE IN NACL 5-0.79 MG/ML-...,IV,1058,100
41,7814668,141203,1281,100 ML - METRONIDAZOLE IN NACL 5-0.79 MG/ML-...,IV,2414,100
42,8867081,141203,2361,VANCOMYCIN 1.25 GM IN NS 250 ML IVPB (REPACKAGE),IV,2414,vancomycin
55,8076824,141227,-1323,CEFEPIME HCL 2 G IJ SOLR,IV,507,cefepime
...,...,...,...,...,...,...,...
7290293,111808256,3347198,274,PIPERACILLIN-TAZOBACTAM 3.375 G MINI-BAG PLUS,IV,80,piperacillin-tazobactam
7292726,110617275,3348568,1830,LEVOFLOXACIN,PO,4673,levofloxacin
7295823,111068263,3350223,2431,LEVOFLOXACIN,PO,5311,levofloxacin
7295987,111598516,3350309,2750,PIPERACILLIN-TAZOBACTAM 3.375 G MINI-BAG PLUS,IV,3726,piperacillin-tazobactam


In [260]:
# preprocessing_data
len(eicu_data_list)
antibiotics[antibiotics['stay_id'].isin(eicu_data_list)].stay_id.nunique()
antibiotics[antibiotics['stay_id'].isin(eicu_data_list)]['drugname'].value_counts(normalize=True)
len(antibiotics[antibiotics['stay_id'].isin(eicu_data_list)]['drugname'].value_counts(normalize=True))
# For grouped - with some missing
antibiotics[antibiotics['stay_id'].isin(eicu_data_list)]['antibiotic'].value_counts(normalize=True)
len(antibiotics[antibiotics['stay_id'].isin(eicu_data_list)]['antibiotic'].value_counts(normalize=True))

1668

1668

LEVOFLOXACIN 750 MG PO TABS                                        0.079190
LEVOFLOXACIN IN D5W 750 MG/150ML IV SOLN                           0.068707
vancomycin                                                         0.053690
VANCOMYCIN HCL 1000 MG IV SOLR                                     0.050149
VANCOMYCIN HCL IN DEXTROSE 1 GM/200ML IV SOLN                      0.045332
LEVOFLOXACIN 500 MG PO TABS                                        0.045049
LEVOFLOXACIN                                                       0.043491
METRONIDAZOLE 500 MG PO TABS                                       0.040232
metroNIDAZOLE                                                      0.040091
VANCOMYCIN HCL 1 GM VIAL                                           0.037541
cefTRIAXone                                                        0.031733
azithromycin                                                       0.031308
VANCOMYCIN HCL 10 G IV SOLR                                        0.027483
LEVOFLOXACIN

49

vancomycin                 0.324550
levofloxacin               0.262077
metronidazole              0.103272
azithromycin               0.056949
piperacillin/tazobactam    0.041791
ceftriaxone                0.041366
1                          0.026349
150                        0.025074
piperacillin               0.021958
cefepime                   0.019408
piperacillin-tazobactam    0.018700
meropenem                  0.013175
cefazolin                  0.011616
100                        0.011475
20                         0.007933
ciprofloxacin              0.007791
clindamycin                0.006517
Name: antibiotic, dtype: float64

17

In [261]:
# preprocessing_data IV
antibiotics[(antibiotics['stay_id'].isin(eicu_data_list)) & (antibiotics['routeadmin'] == 'IV')]['drugname'].value_counts(normalize=True)
len(antibiotics[(antibiotics['stay_id'].isin(eicu_data_list)) & (antibiotics['routeadmin'] == 'IV')]['drugname'].value_counts(normalize=True))
# For grouped - with some missing
antibiotics[(antibiotics['stay_id'].isin(eicu_data_list)) & (antibiotics['routeadmin'] == 'IV')]['antibiotic'].value_counts(normalize=True)
len(antibiotics[(antibiotics['stay_id'].isin(eicu_data_list)) & (antibiotics['routeadmin'] == 'IV')]['antibiotic'].value_counts(normalize=True))

LEVOFLOXACIN IN D5W 750 MG/150ML IV SOLN                           0.101295
VANCOMYCIN HCL 1000 MG IV SOLR                                     0.073935
VANCOMYCIN HCL IN DEXTROSE 1 GM/200ML IV SOLN                      0.066834
VANCOMYCIN HCL 1 GM VIAL                                           0.055347
cefTRIAXone                                                        0.046784
VANCOMYCIN HCL 10 G IV SOLR                                        0.040518
vancomycin                                                         0.037385
LEVOFLOXACIN IN D5W 500 MG/100ML IV SOLN                           0.037385
150 ML FLEX CONT : LEVOFLOXACIN IN D5W 5 MG/ML IV SOLN             0.036967
PIPERACILLIN/TAZOBACTAM SOD 3.375 GM  VIAL                         0.036967
METRONIDAZOLE 500 MG/100 ML                                        0.033835
VANCOMYCIN 1 G/200 ML D5W                                          0.033417
PIPERACILLIN SOD-TAZOBACTAM SO 3-0.375 G IV SOLR                   0.032373
1 EACH VIAL 

48

vancomycin                 0.419799
levofloxacin               0.141186
piperacillin/tazobactam    0.061612
ceftriaxone                0.060986
metronidazole              0.044486
1                          0.038847
150                        0.036967
piperacillin               0.032373
cefepime                   0.028613
piperacillin-tazobactam    0.027569
azithromycin               0.025898
meropenem                  0.019424
cefazolin                  0.017126
100                        0.016917
20                         0.011696
ciprofloxacin              0.011487
clindamycin                0.005013
Name: antibiotic, dtype: float64

17

In [262]:
# preprocessing_data PO
antibiotics[(antibiotics['stay_id'].isin(eicu_data_list)) & (antibiotics['routeadmin'] == 'PO')]['drugname'].value_counts(normalize=True)
len(antibiotics[(antibiotics['stay_id'].isin(eicu_data_list)) & (antibiotics['routeadmin'] == 'PO')]['drugname'].value_counts(normalize=True))
# For grouped - with some missing
antibiotics[(antibiotics['stay_id'].isin(eicu_data_list)) & (antibiotics['routeadmin'] == 'PO')]['antibiotic'].value_counts(normalize=True)
len(antibiotics[(antibiotics['stay_id'].isin(eicu_data_list)) & (antibiotics['routeadmin'] == 'PO')]['antibiotic'].value_counts(normalize=True))

LEVOFLOXACIN 750 MG PO TABS           0.245266
LEVOFLOXACIN 500 MG PO TABS           0.136944
LEVOFLOXACIN                          0.134302
METRONIDAZOLE 500 MG PO TABS          0.125055
metroNIDAZOLE                         0.102158
vancomycin                            0.088067
azithromycin                          0.080581
AZITHROMYCIN                          0.041832
VANCOMYCIN HCL                        0.033025
clindamycin                           0.009687
VANCOMYCIN                            0.002642
LEVOFLOXACIN 500mg in D5W 100mL RT    0.000440
Name: drugname, dtype: float64

12

levofloxacin     0.516953
metronidazole    0.227213
vancomycin       0.123734
azithromycin     0.122413
clindamycin      0.009687
Name: antibiotic, dtype: float64

5

Diagnosis

In [275]:
# Import
diagnosis = pd.read_csv(r"eicu-collaborative-research-database-2.0/diagnosis.csv")
diagnosis.rename(columns={'patientunitstayid': 'stay_id'}, inplace=True)
diagnosis = diagnosis[diagnosis['stay_id'].isin(eicu_data_list)]

In [276]:
diagnosis

Unnamed: 0,diagnosisid,stay_id,activeupondischarge,diagnosisoffset,diagnosisstring,icd9code,diagnosispriority
188,3782286,141392,True,38,endocrine|glucose metabolism|diabetes mellitus,,Other
189,3366078,141392,True,38,cardiovascular|shock / hypotension|hypotension,"458.9, I95.9",Other
190,3557636,141392,True,38,cardiovascular|shock / hypotension|sepsis,"038.9, A41.9",Primary
191,3707533,141392,True,38,cardiovascular|ventricular disorders|congestiv...,"428.0, I50.9",Other
218,3920901,141470,True,13,cardiovascular|shock / hypotension|sepsis,"038.9, A41.9",Primary
...,...,...,...,...,...,...,...
2698981,46224569,3346897,False,193,cardiovascular|shock / hypotension|sepsis|seps...,"995.92, R65.20",Other
2698982,46269856,3346897,False,194,gastrointestinal|biliary disease|cholecystitis,,Other
2698983,46106248,3346897,False,193,cardiovascular|vascular disorders|swollen extr...,,Other
2698984,46129347,3346897,False,194,cardiovascular|shock / hypotension|hypotension,"458.9, I95.9",Other


In [309]:
len(eicu_data_list)
diagnosis.stay_id.nunique()

1668

1573

In [278]:
# Mean number of diagnosis time points
(diagnosis.groupby('stay_id')['diagnosisoffset'].nunique()).mean()

3.745073108709472

In [286]:
# eicu_data
diagnosis['diagnosisstring'].value_counts(normalize=True)

pulmonary|respiratory failure|acute respiratory failure                             0.026099
renal|disorder of kidney|acute renal failure                                        0.022300
pulmonary|respiratory failure|hypoxemia                                             0.020715
endocrine|glucose metabolism|diabetes mellitus                                      0.017164
endocrine|glucose metabolism|hyperglycemia                                          0.017164
                                                                                      ...   
surgery|cardiac surgery|pericardial problems                                        0.000019
neurologic|CNS mass lesions|brain tumor                                             0.000019
gastrointestinal|intestinal disease|viscus perforation                              0.000019
cardiovascular|vascular disorders|peripheral vascular ischemia|with claudication    0.000019
gastrointestinal|pancreatic disease|pancreatitis|relapsing            

In [290]:
# Find just infections
infection_count = diagnosis[diagnosis['diagnosisstring'].str.contains('infectious diseases')]['diagnosisstring'].str.split('|').explode('diagnosisstring').value_counts()

In [292]:
infection_count[:50]

infectious diseases                                                 5769
systemic/other infections                                           2976
chest/pulmonary infections                                          1316
pneumonia                                                           1218
sepsis                                                              1060
hematological effect of infection                                    728
GI infections                                                        715
signs and symptoms of sepsis (SIRS)                                  458
leukocytosis                                                         434
GU infections                                                        419
diarrhea due to infection                                            379
C. difficile colitis                                                 377
lower urinary tract infection                                        359
fever                                              

In [None]:
# Pneumonia and sespsis most common

In [318]:
# Sepsis
diagnosis[diagnosis['diagnosisstring'].str.contains('sepsis', case=False)]['stay_id'].nunique()
diagnosis[diagnosis['diagnosisstring'].str.contains('sepsis', case=False)]['stay_id'].nunique()/ len(eicu_data_list)

549

0.329136690647482

In [319]:
# Pneumonia
diagnosis[diagnosis['diagnosisstring'].str.contains('pneumonia', case=False)]['stay_id'].nunique()
diagnosis[diagnosis['diagnosisstring'].str.contains('pneumonia', case=False)]['stay_id'].nunique()/ len(eicu_data_list)

529

0.31714628297362113

In [321]:
# UTI
diagnosis[(diagnosis['diagnosisstring'].str.contains("urinary tract infection", case=False))| \
                                  (diagnosis['diagnosisstring'].str.contains("pyelonephritis", case=False))]['stay_id'].nunique()
diagnosis[(diagnosis['diagnosisstring'].str.contains("urinary tract infection", case=False))| \
                                  (diagnosis['diagnosisstring'].str.contains("pyelonephritis", case=False))]['stay_id'].nunique() / len(eicu_data_list)

170

0.10191846522781775

Organism

In [265]:
# Import
microLab = pd.read_csv(r"eicu-collaborative-research-database-2.0/microLab.csv")
microLab.rename(columns={'patientunitstayid': 'stay_id'}, inplace=True)
microLab = microLab[microLab['stay_id'].isin(eicu_data_list)]

In [266]:
microLab

Unnamed: 0,microlabid,stay_id,culturetakenoffset,culturesite,organism,antibiotic,sensitivitylevel
2602,1570419,3038044,3570,Nasopharynx,Other,,
2603,1554336,3038044,-207,"Blood, Venipuncture",Other,penicillin G,Resistant
2604,1570420,3038044,3570,Nasopharynx,Other,,
2605,1570267,3038044,-17,Nasopharynx,Other,,
2606,1571109,3038044,2403,"Blood, Venipuncture",no growth,,
...,...,...,...,...,...,...,...
5525,1555477,3115723,-1,Rectal Swab,Other,,
5526,1574046,3115723,-112,"Urine, Catheter Specimen",Other,Other,Resistant
5527,1574048,3115723,-112,"Urine, Catheter Specimen",Other,Other,Sensitive
5528,1574047,3115723,-112,"Urine, Catheter Specimen",Other,vancomycin,Sensitive


In [267]:
microLab.stay_id.nunique()

14

In [270]:
# eicu_data
microLab['organism'].value_counts() / len(microLab)

Other        0.869919
no growth    0.130081
Name: organism, dtype: float64