# Baseline Model Dev
---

## Pull in data

In [1]:
!ls ../../src

__init__.py   [34mmodels[m[m        train.py
[34m__pycache__[m[m   preprocess.py utils.py


In [2]:
import sys
sys.path.append('../../')

import importlib
import src.utils

In [7]:
# IF YOU EVER CHANGE .PY FILES, RERUN THIS CODE BLOCK
importlib.reload(src.utils)
from src.utils import get_mimic_data, filter_mimic_day1, MimicDataMI, MimicDataSepsis

In [4]:
df = get_mimic_data(loc='../../data/CHARTEVENTS_reduced_24_hour_blocks_plus_admissions_plus_patients_plus_scripts_plus_icds_plus_notes.csv')

Reading in MIMIC data at the HADMI_ID/HADMID_DAY-level


  df = pd.read_csv(loc)


Fetched data of shape: (628391, 235)


In [8]:
baseline_df = filter_mimic_day1(df)
baseline_df.head()

Filtering only on day 1 in the ICU
Baseline data shape: (58034, 235)


Unnamed: 0,BUN,HDL,INR,Inspired O2 Fraction,LDL,PEEP Set,PTT,RBCs,WBCs,anion gap,...,tacrolimus,trazodone,vancomycin,vasopressin,warfarin,zolpidem,HADM_ID,CKD,Infection,ct_angio
0,41.0,41.0,15.3,50.0,81.0,5.0,32.4,3.39,11.2,18.0,...,0.0,0.0,0.0,0.0,0.0,0.0,100001,0,1,0
5,49.0,41.0,15.3,50.0,81.0,5.0,32.4,3.39,13.4,10.0,...,0.0,0.0,1.0,0.0,0.0,0.0,100003,0,1,0
8,23.0,41.0,15.3,50.0,81.0,5.0,32.4,3.39,10.9,13.0,...,0.0,0.0,0.0,0.0,1.0,0.0,100006,0,1,1
14,12.0,41.0,15.3,50.0,81.0,5.0,32.4,4.01,12.3,13.0,...,0.0,0.0,0.0,0.0,0.0,0.0,100007,0,1,0
22,23.0,41.0,15.3,50.0,81.0,5.0,32.4,3.39,10.9,13.0,...,0.0,0.0,0.0,0.0,0.0,0.0,100010,0,0,1


In [9]:
sepsis_data = MimicDataSepsis(baseline_df)
mi_data = MimicDataMI(baseline_df)

Total sepsis events:
0    45973
1    12061
Name: Sepsis, dtype: int64
Total MI events:
0    56968
1     1066
Name: MI, dtype: int64


## Logistic Regression for Day 1

In [10]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, roc_auc_score

sepsis_data.split_train_test()

sepsis_lr = LogisticRegression(solver='liblinear', random_state=0)
sepsis_lr.fit(sepsis_data.get_train_feats(), sepsis_data.get_train_target())

pred_y_test_sepsis = sepsis_lr.predict(sepsis_data.get_test_feats())
test_acc_sepsis = accuracy_score(pred_y_test_sepsis, sepsis_data.get_test_target())
test_roc_sepsis = roc_auc_score(pred_y_test_sepsis, sepsis_data.get_test_target())

print(f'Accuracy: {test_acc_sepsis}')
print(f'AUROC: {test_roc_sepsis}')

Splitting data with 30.00% test split
Train data has shape: (40623, 241)
Test data shape: (17411, 241)
Stratified by target variable: Sepsis
8443/40623 events in train; 3618/17411 events in test
Dropping columns: ['HADM_ID', 'SUBJECT_ID', 'HADMID_DAY', 'DOB', 'ADMITTIME', 'hr_sepsis', 'respiratory rate_sepsis', 'wbc_sepsis', 'temperature f_sepsis', 'sepsis_points', 'Sepsis']
Shape of X features: (40623, 230)
Accuracy: 0.8947791625983573
AUROC: 0.8573308983436795




In [11]:
mi_data.split_train_test()

mi_lr = LogisticRegression(solver='liblinear', random_state=0)
mi_lr.fit(mi_data.get_train_feats(), mi_data.get_train_target())

pred_y_test_mi = mi_lr.predict(mi_data.get_test_feats())
test_acc_mi = accuracy_score(pred_y_test_mi, mi_data.get_test_target())
test_roc_mi = roc_auc_score(pred_y_test_mi, mi_data.get_test_target())

print(f'Accuracy: {test_acc_mi}')
print(f'AUROC: {test_roc_mi}')

Splitting data with 30.00% test split
Train data has shape: (40623, 236)
Test data shape: (17411, 236)
Stratified by target variable: MI
746/40623 events in train; 320/17411 events in test
Dropping columns: ['HADM_ID', 'SUBJECT_ID', 'HADMID_DAY', 'DOB', 'ADMITTIME', 'troponin', 'troponin_std', 'troponin_min', 'troponin_max', 'MI']
Shape of X features: (40623, 226)
Accuracy: 0.98144850956292
AUROC: 0.7239215665270532




## Multilayer Perceptron (MLP) for Day 1

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from tensorflow.keras.optimizers import SGD, Adam
from tensorflow.keras import metrics

N_EPOCHS = 10
BATCH_SIZE = 30

mi_mlp = Sequential()
num_features = mi_data.get_feats().shape[1]

mi_mlp = Sequential()
mi_mlp.add(Dense(units=32, activation='relu', input_dim=num_features, name='hidden_layer'))
mi_mlp.add(Dense(units=1, activation='sigmoid', name='output_layer')) 

opt = SGD(learning_rate=0.1)

METRICS = [
      metrics.BinaryAccuracy(name='accuracy'),
      #metrics.AUC(name='auc'),
]

mi_mlp.compile(optimizer=opt, loss='binary_crossentropy', metrics=METRICS)
mi_mlp.summary()

history = mi_mlp.fit(x=mi_data.get_train_feats(), 
         y=mi_data.get_train_target(),
         validation_data = (mi_data.get_test_feats(), mi_data.get_test_target()),
         batch_size=BATCH_SIZE,
         epochs=N_EPOCHS)

In [None]:
N_EPOCHS = 10
BATCH_SIZE = 30

sepsis_mlp = Sequential()
num_features = sepsis_data.get_feats().shape[1]

sepsis_mlp = Sequential()
sepsis_mlp.add(Dense(units=32, activation='relu', input_dim=num_features, name='hidden_layer'))
sepsis_mlp.add(Dense(units=1, activation='sigmoid', name='output_layer')) 

opt = SGD(learning_rate=0.1)

METRICS = [
      metrics.BinaryAccuracy(name='accuracy'),
      #metrics.AUC(name='auc'),
]

sepsis_mlp.compile(optimizer=opt, loss='binary_crossentropy', metrics=METRICS)
sepsis_mlp.summary()

history = sepsis_mlp.fit(x=sepsis_data.get_train_feats(), 
         y=sepsis_data.get_train_target(),
         validation_data = (sepsis_data.get_test_feats(), sepsis_data.get_test_target()),
         batch_size=BATCH_SIZE,
         epochs=N_EPOCHS)