* ROC-AUC: [https://mmuratarat.github.io/2019-10-01/how-to-compute-AUC-plot-ROC-by-hand](https://mmuratarat.github.io/2019-10-01/how-to-compute-AUC-plot-ROC-by-hand)
* Precision, Recall, F1-Score: [https://machinelearningmastery.com/precision-recall-and-f-measure-for-imbalanced-classification/](https://machinelearningmastery.com/precision-recall-and-f-measure-for-imbalanced-classification/)

In [1]:
import os
import pandas as pd
from sklearn.preprocessing import StandardScaler
from keras.models import Model
from keras.layers import Input, Dense
from keras.callbacks import TensorBoard, ModelCheckpoint
from sklearn.metrics import roc_auc_score
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import tensorflow as tf
import numpy as np
from sklearn.metrics import roc_auc_score
from keras.callbacks import Callback

np.random.seed(42)
tf.random.set_seed(42)

In [2]:
class RocAUCScore(Callback):
    def __init__(self, training_data, validation_data):
        self.x = training_data[0]
        self.y = training_data[1]
        self.x_val = validation_data[0]
        self.y_val = validation_data[1]
        super(RocAUCScore, self).__init__()

    def on_epoch_end(self, epoch, logs={}):
        y_pred = self.model.predict(self.x)
        roc = roc_auc_score(self.y, y_pred)
        y_pred_val = self.model.predict(self.x_val)
        roc_val = roc_auc_score(self.y_val, y_pred_val)
        print('\n  *** ROC AUC Score: %s - roc-auc_val: %s ***' % (str(roc), str(roc_val)))
        return

In [3]:
TRAIN_DATA = "./data/train/train_data.csv"
VAL_DATA = "./data/val/val_data.csv"
TEST_DATA = "./data/test/test_data.csv"

In [4]:
def load_data():
    """Loads train, val, and test datasets from disk"""
    train = pd.read_csv(TRAIN_DATA)
    val = pd.read_csv(VAL_DATA)
    test = pd.read_csv(TEST_DATA)

    # we will use a dict to keep all this data tidy.
    data = dict()
    data["train_y"] = train.pop('y')
    data["val_y"] = val.pop('y')
    data["test_y"] = test.pop('y')

    # we will use sklearn's StandardScaler to scale our data to 0 mean, unit variance.
    scaler = StandardScaler()
    train = scaler.fit_transform(train)
    val = scaler.transform(val)
    test = scaler.transform(test)

    data["train_X"] = train
    data["val_X"] = val
    data["test_X"] = test
    # it's a good idea to keep the scaler (or at least the mean/variance) so we can unscale predictions
    data["scaler"] = scaler
    return data

In [5]:
def build_network(input_features=None):
    # first we specify an input layer, with a shape == features
    inputs = Input(shape=(input_features,), name="input")
    x = Dense(128, activation='relu', name="hidden1")(inputs)
    x = Dense(64, activation='relu', name="hidden2")(x)
    x = Dense(64, activation='relu', name="hidden3")(x)
    x = Dense(32, activation='relu', name="hidden4")(x)
    x = Dense(16, activation='relu', name="hidden5")(x)
    prediction = Dense(1, activation='sigmoid', name="final")(x)
    model = Model(inputs=inputs, outputs=prediction)
    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=["accuracy"])
    return model

In [6]:
def create_callbacks(data):
    tensorboard_callback = TensorBoard(log_dir=os.path.join(os.getcwd(), "tb_log", "5h_adam_20epochs"), histogram_freq=1, batch_size=32,
                                       write_graph=True, write_grads=False)

    roc_auc_callback = RocAUCScore(training_data=(data["train_X"], data["train_y"]),
                                   validation_data=(data["val_X"], data["val_y"]))

    checkpoint_callback = ModelCheckpoint(filepath="./model_weights/model-weights.{epoch:02d}-{val_accuracy:.6f}.hdf5", monitor='val_accuracy',
                                          verbose=1, save_best_only=True)

    return [tensorboard_callback, roc_auc_callback, checkpoint_callback]


In [7]:
def class_from_prob(x, operating_point=0.5):
    x[x >= operating_point] = 1
    x[x < operating_point] = 0
    return x

In [8]:
def main():
    data = load_data()
    callbacks = create_callbacks(data)
    print("Data Loaded...")
    print("Train Shape X:" + str(data["train_X"].shape)+ " y: "+str(data["train_y"].shape))

    input_features = data["train_X"].shape[1]
    model = build_network(input_features=input_features)
    print("Network Structure")
    print(model.summary())
    model.fit(x=data["train_X"], y=data["train_y"], batch_size=32, epochs=20, verbose=1,
              validation_data=(data["val_X"], data["val_y"]), callbacks=callbacks)
     
    y_prob_train = model.predict(data["train_X"])
    y_hat_train = class_from_prob(y_prob_train)
    y_prob_val = model.predict(data["val_X"])
    y_hat_val = class_from_prob(y_prob_val)

    print("Model Train Accuracy: " + str(accuracy_score(data["train_y"], y_hat_train)))
    print("Model Val Accuracy: " + str(accuracy_score(data["val_y"], y_hat_val)))
    print("Val ROC: " + str(roc_auc_score(data["val_y"], y_prob_val)))
    print("Val Classification Report")
    print(classification_report(data["val_y"], y_hat_val))

In [9]:
main()

Data Loaded...
Train Shape X:(9200, 178) y: (9200,)
Network Structure
Model: "model"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input (InputLayer)           [(None, 178)]             0         
_________________________________________________________________
hidden1 (Dense)              (None, 128)               22912     
_________________________________________________________________
hidden2 (Dense)              (None, 64)                8256      
_________________________________________________________________
hidden3 (Dense)              (None, 64)                4160      
_________________________________________________________________
hidden4 (Dense)              (None, 32)                2080      
_________________________________________________________________
hidden5 (Dense)              (None, 16)                528       
_________________________________________________________