In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.layers import Dense, Dropout
from keras import metrics, losses, callbacks, optimizers, regularizers
# import matplotlib.pyplot as plt
# from sklearn.metrics import roc_auc_score
# import copy
# from datetime import datetime

pd.set_option('max_rows', 500)
pd.set_option('display.max_columns', 300)
np.random.seed(666)
pd.set_option('display.max_rows', 500)
pd.set_option('display.width', 1000)
pd.set_option('display.float_format', '{:20,.3f}'.format)
pd.set_option('display.max_colwidth', None)

In [2]:
model_name = 'data0211_2_corr85_and_weight_removed_nn_default'
TARGET_COL = 'diabetes_mellitus'
cat_cols = ['ethnicity', 'gender', 'hospital_admit_source', 'icu_admit_source', 'icu_stay_type', 'icu_type']
cols_to_onehot = ['ethnicity', 'gender', 'icu_admit_source', 'icu_stay_type', 'icu_type']
vars_to_encode = ['icu_id','apache_3j_diagnosis','hospital_admit_source']
vars_to_exclude = ['hospital_id','encounter_id','apache_2_diagnosis']

def target_encode(var, dv, weight = 36):
    mean = dv.mean()
    true_weight = weight * max(1, mean/(1-mean))
    agg = dv.groupby(var,dropna=False).agg(['count','mean'])
    counts = agg['count']
    means = agg['mean']
    return (counts * means + true_weight) / (counts + true_weight/mean)

def target_encode_train_test(var_train, dv_train, var_test, weight = 36):
    encode_key = target_encode(var_train,dv_train,weight)
    return var_train.map(encode_key), var_test.map(encode_key).fillna(dv_train.mean())

# Data Prep

In [15]:
train = pd.read_csv('Data/train_capped_ratio_all.csv')
test = pd.read_csv('Data/test_capped_ratio_all.csv')

In [4]:
for col in train.columns[train.columns.str.endswith('_ratio')]:
    train[col], test[col] = np.log(train[col]), np.log(test[col])

In [16]:
# onehot low-cardinality categoricals
target = train[TARGET_COL].astype('bool')
train[cols_to_onehot] = train[cols_to_onehot].fillna('missing')
test[cols_to_onehot] = test[cols_to_onehot].fillna('missing')
train_onehot = pd.get_dummies(train.drop(TARGET_COL,axis=1), columns = cols_to_onehot)
test_onehot = pd.get_dummies(test, columns = cols_to_onehot)
# train_onehot = train_onehot.replace(np.inf, np.nan)
# test_onehot = test_onehot.replace(np.inf, np.nan)
# TODO: Binary for missingness of numericals?

In [17]:
trainX_onehot, validX_onehot, trainY, validY = train_test_split(
    train_onehot, target, test_size = .2, stratify = target, random_state=666)
trainX_onehot = trainX_onehot.reset_index()
validX_onehot = validX_onehot.reset_index()

In [18]:
# target encode selected variables
for col in vars_to_encode:
    trainX_onehot[col+'_encoded'], validX_onehot[col+'_encoded'] = target_encode_train_test(
        trainX_onehot[col],trainY,validX_onehot[col])

In [19]:
binary_vars = list(trainX_onehot.columns[trainX_onehot.nunique() == 2] & trainX_onehot.columns[trainX_onehot.isna().sum() == 0])

In [20]:
# normalize nonbinary variables to 0 mean 1 SD
trainX_to_scale = trainX_onehot.drop(binary_vars+vars_to_encode+vars_to_exclude,axis=1)
validX_to_scale = validX_onehot.drop(binary_vars+vars_to_encode+vars_to_exclude,axis=1)
trainX_scaler = StandardScaler().fit(trainX_to_scale)
trainX_scaled = pd.DataFrame(trainX_scaler.transform(trainX_to_scale), columns = trainX_to_scale.columns)
validX_scaled = pd.DataFrame(trainX_scaler.transform(validX_to_scale), columns = validX_to_scale.columns)
trainX_scaled = trainX_scaled.fillna(0)
validX_scaled = validX_scaled.fillna(0)
trainX_ready = pd.concat([trainX_scaled, trainX_onehot[binary_vars]],axis=1)#[binary_vars]
validX_ready = pd.concat([validX_scaled, validX_onehot[binary_vars]],axis=1)#[binary_vars]

In [21]:
num_cols = len(trainX_ready.columns)
base_units = np.floor(np.sqrt(num_cols))
base_batch_size = int(np.power(2,np.floor(np.log2(np.sqrt(len(trainX_ready))))))
tuning_num_trials = 60
early_stop = callbacks.EarlyStopping(
    monitor='val_loss'
    ,min_delta=0.001
    ,patience=9
    ,verbose=1
    ,mode='auto'
    ,baseline=None
    ,restore_best_weights=True
)
reduce_lr_on_plateau = callbacks.ReduceLROnPlateau(
    monitor='val_loss'
    ,factor=0.4
    ,patience=3
    ,verbose=1
    ,mode='auto'
    ,min_delta=0.001
    ,cooldown=0
    ,min_lr=0
)

In [22]:
baselineModel = Sequential()
baselineModel.add(Dense(
    units = base_units
    ,input_dim=num_cols
    ,activation='relu'
    ,use_bias=True
    ,kernel_initializer='he_uniform'
    ,bias_initializer='zeros'
    ,kernel_regularizer=None
    ,bias_regularizer=None
    ,activity_regularizer=None
    ,kernel_constraint=None
    ,bias_constraint=None
))
baselineModel.add(Dense(
    units=1
    ,activation='sigmoid'
))
baselineModel.compile(
    optimizer=optimizers.Adam()
    ,loss=losses.BinaryCrossentropy()
    ,metrics=['AUC']
    ,loss_weights=None
    ,weighted_metrics=None
    ,run_eagerly=None
)

In [23]:
baselineModel.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 18)                5940      
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 19        
Total params: 5,959
Trainable params: 5,959
Non-trainable params: 0
_________________________________________________________________


In [24]:
history = baselineModel.fit(
    x=trainX_ready,
    y=trainY,
    batch_size=base_batch_size,
    epochs=100,
    verbose=2,
    callbacks=[early_stop,reduce_lr_on_plateau],
    validation_data=(validX_ready,validY),
    shuffle=True,
    class_weight=None,
    sample_weight=None,
    initial_epoch=0,
    steps_per_epoch=None,
    validation_steps=None,
    validation_batch_size=None,
    validation_freq=1
)

Epoch 1/100
392/392 - 1s - loss: 0.4353 - auc: 0.7854 - val_loss: 0.4040 - val_auc: 0.8250
Epoch 2/100
392/392 - 1s - loss: 0.3981 - auc: 0.8294 - val_loss: 0.3961 - val_auc: 0.8326
Epoch 3/100
392/392 - 1s - loss: 0.3906 - auc: 0.8368 - val_loss: 0.3953 - val_auc: 0.8333
Epoch 4/100
392/392 - 1s - loss: 0.3868 - auc: 0.8405 - val_loss: 0.3924 - val_auc: 0.8362
Epoch 5/100
392/392 - 1s - loss: 0.3837 - auc: 0.8436 - val_loss: 0.3918 - val_auc: 0.8367
Epoch 6/100
392/392 - 1s - loss: 0.3816 - auc: 0.8457 - val_loss: 0.3910 - val_auc: 0.8380
Epoch 7/100
392/392 - 1s - loss: 0.3794 - auc: 0.8479 - val_loss: 0.3901 - val_auc: 0.8389
Epoch 8/100
392/392 - 1s - loss: 0.3779 - auc: 0.8493 - val_loss: 0.3905 - val_auc: 0.8378
Epoch 9/100

Epoch 00009: ReduceLROnPlateau reducing learning rate to 0.0004000000189989805.
392/392 - 1s - loss: 0.3765 - auc: 0.8508 - val_loss: 0.3903 - val_auc: 0.8386
Epoch 10/100
392/392 - 1s - loss: 0.3720 - auc: 0.8550 - val_loss: 0.3886 - val_auc: 0.8405
Epoch 11

In [None]:
model.weights

In [None]:
model.evaluate(trainX_ready, trainY)