### Imports and load the data

In [2]:
from showupforhealth.utils import perform_train_test_split, scale_df
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.model_selection import cross_val_score
import matplotlib.pyplot as plt

import pandas as pd
import numpy as np
from imblearn.over_sampling import SMOTE

from tensorflow.keras import Sequential, layers
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import Recall
import tensorflow as tf

import keras_tuner as kt

# DL model with UNBALANCED data

### Load and shuffle the data

In [3]:
data = pd.read_csv('/Users/alessio/code/janduplessis883/data-showup/data/output-data/full_train_data.csv')

### Split in X and y

In [4]:
X = data.drop(columns=['Appointment_status'])
y = data['Appointment_status']

### Baseline accuracy

In [5]:
print(f'The baseline accuracy if we assume that all patients will show-up is {y.value_counts()[1] / X.shape[0]}')

The baseline accuracy if we assume that all patients will show-up is 0.9618598006025406


### Split in Train and Test

In [6]:
X_train, X_test, y_train, y_test = perform_train_test_split(X,y)

✅ OUTPUT: X_train, X_test, y_train, y_test
Train Set:  X_train, y_train - (721212, 36), (721212,)
 Test Set:  X_test, y_test - - (180304, 36), (180304,)


### Scale X_train and X_test

In [7]:
def mm_scaler(X_train, X_test):
    scaler = MinMaxScaler()
    scaler.fit(X_train)
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled

X_train_scaled, X_test_scaled = mm_scaler(X_train, X_test)

### Split Train and Validation

In [8]:
X_train_val, X_val, y_train_val, y_val = perform_train_test_split(X_train, y_train)

✅ OUTPUT: X_train, X_test, y_train, y_test
Train Set:  X_train, y_train - (576969, 36), (576969,)
 Test Set:  X_test, y_test - - (144243, 36), (144243,)


### Model for UNBALANCED data

In [9]:
# metrics = [tf.keras.metrics.BinaryAccuracy(name='accuracy'),
#           tf.keras.metrics.Recall(name='recall'),
#           tf.keras.metrics.Precision(name='precision'),
#           tf.keras.metrics.AUC(name='auc')]

In [10]:
def model_builder(hp):
    model = Sequential()
    
    # hp
    hp_units_1 = hp.Int('units_1', min_value=32, max_value=512, step=32)
    hp_units_2 = hp.Int('units_2', min_value=32, max_value=512, step=32)
    hp_units_3 = hp.Int('units_3', min_value=32, max_value=512, step=32)
    hp_learning_rate = hp.Choice('learning_rate', values=[1e-1, 1e-2, 1e-3, 1e-4])

    # Layers
    model.add(layers.Dense(units=hp_units_1, activation='relu', input_dim=X.shape[1]))
    model.add(layers.Dense(units=hp_units_2, activation='relu'))
    model.add(layers.Dense(units=hp_units_3, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    
    # Compiler
    adam = tf.keras.optimizers.Adam(learning_rate=hp_learning_rate)
    model.compile(optimizer=adam, loss='binary_crossentropy', metrics='accuracy')

    return model

In [11]:
tuner = kt.Hyperband(model_builder,
                     objective='val_accuracy',
                     max_epochs=10,
                     factor=3,
                     directory='my_dir',
                     project_name='intro3')

stop_early = EarlyStopping(monitor='val_loss', patience=5)
                     
tuner.search(X_train_val, y_train_val, 
             epochs=50, 
             validation_data=(X_val, y_val), 
             callbacks=[stop_early])

Trial 3 Complete [00h 02m 31s]
val_accuracy: 0.9619877338409424

Best val_accuracy So Far: 0.9619877338409424
Total elapsed time: 00h 05m 27s

Search: Running Trial #4

Value             |Best Value So Far |Hyperparameter
128               |96                |units_1
256               |32                |units_2
448               |256               |units_3
0.01              |0.1               |learning_rate
2                 |2                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
2                 |2                 |tuner/bracket
0                 |0                 |tuner/round

Epoch 1/2
Epoch 2/2
 3843/18031 [=====>........................] - ETA: 45s - loss: 0.1612 - accuracy: 0.9624

KeyboardInterrupt: 

In [None]:
model = tuner.hypermodel.build(best_hps)
history = model.fit(X_train_val, y_train_val, epochs=50, validation_data=(X_val, y_val))

In [None]:
model = Sequential()
es = EarlyStopping(patience=5, monitor='val_accuracy', restore_best_weights=True)

model.add(layers.Dense(512, activation='relu', input_dim=X.shape[1]))
model.add(layers.Dense(512, activation='relu'))
model.add(layers.Dense(256, activation='relu'))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1, activation='sigmoid'))

adam = tf.keras.optimizers.Adam(learning_rate=0.00001)

model.compile(loss='binary_crossentropy', optimizer=adam, metrics=metrics)

history = model.fit(X_train_val, y_train_val,
                    validation_data=(X_val, y_val),
                    batch_size=256,
                    epochs=10,
                    shuffle=True,
                    callbacks=[es],
                    verbose=2)

In [None]:
def plot_loss_precision_recall_curve(history):
    fig, ax = plt.subplots(2, 2, figsize=(20, 15))

        # --- ACCURACY 
    ax[1, 0].plot(history.history['accuracy'], color="#a10606")
    ax[1, 0].plot(history.history['val_accuracy'], color="#1b5743")
    ax[1, 0].set_title('Model accuracy', fontsize = 18)
    ax[1, 0].set_ylabel('Accuracy', fontsize = 14)
    ax[1, 0].legend(['Train', 'Val'], loc='upper right')
    ax[1, 0].grid(axis="x", linewidth=0.5)
    ax[1, 0].grid(axis="y", linewidth=0.5)
    
    # --- LOSS 
    ax[0, 0].plot(history.history['loss'], color="#a10606")
    ax[0, 0].plot(history.history['val_loss'], color="#1b5743")
    ax[0, 0].set_title('Model loss', fontsize = 18)
    ax[0, 0].set_ylabel('Loss', fontsize = 14)
    ax[0, 0].legend(['Train', 'Val'], loc='upper right')
    ax[0, 0].grid(axis="x", linewidth=0.5)
    ax[0, 0].grid(axis="y", linewidth=0.5)

#     --- RECALL
    ax[0, 1].plot(history.history['recall']) 
    ax[0, 1].plot(history.history['val_recall'])
    ax[0, 1].set_title('Model recall', fontsize = 18)
    ax[0, 1].set_ylabel('Recall', fontsize = 14) 
    ax[0, 1].legend(['Train', 'Val'], loc='lower right')
    ax[0, 1].grid(axis="x", linewidth=0.5)
    ax[0, 1].grid(axis="y", linewidth=0.5)

#     --- PRECISION
    ax[1, 0].plot(history.history['precision'])
    ax[1, 0].plot(history.history['val_precision'])
    ax[1, 0].set_title('Model precision', fontsize = 18)
    ax[1, 0].set_ylabel('Precision', fontsize = 14)
    ax[1, 0].legend(['Train', 'Val'], loc='lower right')
    ax[1, 0].grid(axis="x", linewidth=0.5)
    ax[1, 0].grid(axis="y", linewidth=0.5)
    
    # --- AUC
    ax[1, 1].plot(history.history['auc'])
    ax[1, 1].plot(history.history['val_auc'])
    ax[1, 1].set_title('Model AUC', fontsize = 18) 
    ax[1, 1].set_ylabel('AUC', fontsize = 14)
    ax[1, 1].legend(['Train', 'Val'], loc='lower right')
    ax[1, 1].grid(axis="x", linewidth=0.5)
    ax[1, 1].grid(axis="y", linewidth=0.5)

    # Set common labels  
    fig.text(0.5, 0.04, 'Epoch', ha='center', va='center', fontsize=14) 
    plt.show()

In [None]:
plot_loss_precision_recall_curve(history)