In [1]:
import pandas as pd
print("pandas version: {}". format(pd.__version__))

import numpy as np
print("numpy version: {}". format(np.__version__))

import sklearn
from sklearn.preprocessing import StandardScaler
from sklearn.compose import ColumnTransformer
from sklearn.decomposition import PCA
from sklearn.model_selection import ShuffleSplit, StratifiedShuffleSplit, train_test_split, cross_val_score
from sklearn.metrics import precision_recall_curve, roc_curve, confusion_matrix, roc_auc_score
print("sklearn version: {}". format(sklearn.__version__))

import keras
from keras.models import Sequential
from keras.layers import Dense, Activation, Dropout, BatchNormalization
from keras.callbacks import ModelCheckpoint
print("keras version: {}". format(keras.__version__))

import tensorflow as tf
print("tensorflow version: {}". format(tf.__version__))

import optuna
print("optuna version: {}". format(optuna.__version__))

import mlflow
from mlflow.utils.mlflow_tags import MLFLOW_PARENT_RUN_ID
from mlflow.tracking import MlflowClient
print("mlflow version: {}". format(mlflow.__version__))

import matplotlib.pyplot as plt
import seaborn as sns

from datetime import datetime
import os
import warnings
warnings.simplefilter('ignore')

pandas version: 1.4.3
numpy version: 1.23.2
sklearn version: 1.0.2
keras version: 2.10.0
tensorflow version: 2.10.0


  from .autonotebook import tqdm as notebook_tqdm


optuna version: 3.0.1
mlflow version: 1.28.0


In [2]:
RANDOM_STATE=42
N_TRAILS=10 # run for x runs
TIMEOUT=600 # run for max 10 minutes (if the last run at 9 minutes runs for 1h, the active run is not killed)

In [3]:
client = MlflowClient()
try:
    experiment = client.create_experiment("Titanic")
except:
    experiment = client.get_experiment_by_name("Titanic").experiment_id

parent_run = client.create_run(experiment_id=experiment)

In [4]:
def load_data():
    # load prepared training and test dataset
    df_train = pd.read_pickle('../03_dataCleaningPreparation/df_train_prepared_reduced.pkl')
    df_test = pd.read_pickle('../03_dataCleaningPreparation/df_test_prepared_reduced.pkl')

    # split the training and test dataset to the input features (x_train, x_test) and the survival class (y_train)
    y_train = df_train['Survived']
    x_train = df_train.drop(['Survived'], axis=1)
    x_test = df_test

    x_train, x_validate, y_train, y_validate = train_test_split(x_train, y_train, test_size=0.3, stratify=y_train, random_state=RANDOM_STATE)

    return x_train, y_train, x_validate, y_validate, x_test

x_train, y_train, x_validate, y_validate, x_test = load_data()

In [5]:
def evaluate_model(x_train, y_train, y_validate, y_validate_pred, y_validate_scores, child_run):
    """
    evaluate the classification model with
    - classification report
    - precision-recall-curve
    - ROC curve
    """


    def plot_confusion_matrix(y_validate, y_validate_pred):
        group_names = ["True Neg", "False Pos", "False Neg", "True Pos"]
        group_counts = ["{0:0.0f}".format(value) for value in
                        confusion_matrix(y_validate, y_validate_pred).flatten()]
        group_percentages = ["{0:.2%}".format(value) for value in
                             confusion_matrix(y_validate, y_validate_pred).flatten()/np.sum(confusion_matrix(y_validate, y_validate_pred))]
        labels = [f"{v1}\n{v2}\n{v3}" for v1, v2, v3 in
                  zip(group_names,group_counts,group_percentages)]
        labels = np.asarray(labels).reshape(2,2)

        fig2, ax2 = plt.subplots()
        sns.heatmap(confusion_matrix(y_validate, y_validate_pred), annot=labels, fmt="", cmap='Blues')
        plt.xlabel('Predicted Label')
        plt.ylabel('True Label')
        client.log_figure(child_run.info.run_id, fig2, 'plot_confusion_matrix.png')
        plt.close()


    def plot_precision_recall_vs_threshold(y_validate, y_scores, child_run):
        precisions, recalls, thresholds = precision_recall_curve(y_validate, y_scores)

        # convert to f score
        fscore = (2 * precisions * recalls) / (precisions + recalls)
        # locate the index of the largest f score
        ix = np.argmax(fscore)
        
        client.log_metric(child_run.info.run_id, "f1_score", round(fscore[ix], 5))

        fig3, ax3 = plt.subplots()
        ax3.plot(thresholds, precisions[:-1], "b", label="Precision")
        ax3.plot(thresholds, recalls[:-1], "g", label="Recall")
        ax3.plot(thresholds, fscore[:-1], "r", label="F1 Score")
        ax3.axvline(x=thresholds[ix], color='red', linestyle='--')
        plt.axhline(y=precisions[ix], color='b', linestyle='--')
        plt.axhline(y=recalls[ix], color='g', linestyle='--')
        ax3.set_xlabel("Threshold")
        ax3.legend(loc="upper left")
        ax3.set_ylim([0,1])
        client.log_figure(child_run.info.run_id, fig3, 'plot_f1.png')
        plt.close()

        fig4, ax4 = plt.subplots()
        ax4.plot(recalls, precisions, marker='.', label='Logistic')
        ax4.scatter(recalls[ix], precisions[ix], 200, marker='o', color='red', label='Best')
        ax4.set_xlabel('Recall')
        ax4.set_ylabel('Precision')
        client.log_figure(child_run.info.run_id, fig4, 'plot_precision_recall.png')
        plt.close()
        

    def plot_roc_curve(y_validate, y_scores, child_run):
        fpr, tpr, thresholds = roc_curve(y_validate, y_scores)

        roc_auc = round(roc_auc_score(y_validate, y_scores), 3)
        
        optimal_idx = np.argmax(tpr - fpr)

        fig5, ax5 = plt.subplots()
        ax5.plot(fpr, tpr, linewidth=2)
        ax5.plot([0,1], [0,1], 'k--')
        ax5.axis([0,1,0,1])
        ax5.scatter(fpr[optimal_idx], tpr[optimal_idx], 200, marker='o', color='red', label='Best')
        ax5.set_xlabel('False Positive Rate')
        ax5.set_ylabel('True Positive Rate')
        client.log_figure(child_run.info.run_id, fig5, 'plot_roc_curve.png')
        plt.close()

        client.log_metric(child_run.info.run_id, "roc_auc", roc_auc)

        

    plot_confusion_matrix(y_validate, y_validate_pred)
    plot_precision_recall_vs_threshold(y_validate, y_validate_scores, child_run)
    plot_roc_curve(y_validate, y_validate_scores, child_run)

In [6]:
def create_model(trial):

    child_run = client.create_run(
        experiment_id=experiment,
        tags={
            MLFLOW_PARENT_RUN_ID: parent_run.info.run_id
        }
    )


    model = Sequential()
    model.add(keras.Input(shape=(x_train.shape[1],)))

    n_layers = trial.suggest_int('n_layers', 1, 4)
    client.log_param(child_run.info.run_id, "n_layers", n_layers-1)

    for layer in range(1,n_layers):
        n_units = trial.suggest_int('n_units_'+str(layer), 1, 20)
        client.log_param(child_run.info.run_id, 'n_units_'+str(layer), n_units)
        activation = trial.suggest_categorical('activation_'+str(layer), ['relu', 'tanh', 'elu'])
        client.log_param(child_run.info.run_id, 'activation_'+str(layer), activation)

        model.add(Dense(units=n_units))
        model.add(BatchNormalization())
        model.add(Activation(activation))
        model.add(Dropout(0.5))

    model.add(Dense(1, activation='sigmoid'))

    # model.summary()

    return model, child_run

In [7]:
class Objective:
    
    def __init__(self):
        self.best_model = None
        self._model = None

    
    def __call__(self, trial):
    
        model, child_run = create_model(trial)

        adam = keras.optimizers.Adam(lr=0.01, beta_1=0.9, beta_2=0.999, amsgrad=False)

        # compile the keras model
        model.compile(
            loss='binary_crossentropy',
            optimizer=adam,
            metrics=['accuracy']
            )


        # SETUP A EARLY STOPPING CALL and model check point API
        earlystopping = keras.callbacks.EarlyStopping(
            monitor='accuracy',
            patience=10,
            verbose=1,
            mode='min'
            )

        logdir = os.path.join('tensorboard', datetime.now().strftime('%Y-%m-%d_%H-%M-%S'))
        tensorboard_callback = keras.callbacks.TensorBoard(log_dir=logdir)

        checkpointer = ModelCheckpoint(
            filepath='bestvalue',
            moniter='val_loss',
            verbose=0,
            save_best_only=True
            )

        
        callback_list = [earlystopping]



        # fit the keras model on the dataset
        model.fit(
            x_train, y_train,
            epochs=150,
            batch_size=10,
            callbacks=callback_list,
            validation_data=(x_validate, y_validate)
        )

        # # predict the training outcome
        # y_validate_pred = model.predict(x_validate)

        # # predict probabilities
        # y_validate_proba = model.predict_proba(x_validate)
        # # keep probabilities for the positive outcome only
        # y_validate_scores = y_validate_proba[:, 1]


        y_validate_scores = model.predict(x_validate)
        y_validate_pred = tf.greater(y_validate_scores, .5)



        evaluate_model(x_train, y_train, y_validate, y_validate_pred, y_validate_scores, child_run)

        # evaluate the keras model
        _, accuracy = model.evaluate(x_validate, y_validate)
        client.log_metric(child_run.info.run_id, "cv_score", accuracy)

        return accuracy

    def callback(self, study, trial):
        if study.best_trial == trial:
            self.best_model = self._model


In [8]:
objective = Objective()

study = optuna.create_study(direction="maximize")

study.optimize(
  objective,
  n_trials=N_TRAILS,
  timeout=TIMEOUT,
  n_jobs=-1,
  callbacks=[objective.callback]
  )

print("Study statistics: ")
print("Number of finished trials: ", len(study.trials))

print("Best trial:")
print(study.best_value)
print(study.best_params)

client.log_metric(parent_run.info.run_id, "best_cv_score", round(study.best_value, 3))

for param in study.best_params:
  client.log_param(parent_run.info.run_id, param, study.best_params[param])


mlflow.end_run()

[32m[I 2022-09-19 21:10:59,952][0m A new study created in memory with name: no-name-a109b16f-6aee-4ece-93ea-5631c728f9de[0m


Epoch 1/150
Epoch 1/150
Epoch 1/150
Epoch 1/150
Epoch 2/150
Epoch 2/150
Epoch 2/150
Epoch 2/150
Epoch 3/150
Epoch 3/150
Epoch 3/150
Epoch 3/150
Epoch 4/150
Epoch 4/150
Epoch 4/150
Epoch 4/150
Epoch 5/150
Epoch 5/150
Epoch 6/150
Epoch 6/150
10/63 [===>..........................] - ETA: 1s - loss: 0.5142 - accuracy: 0.7600Epoch 5/150
Epoch 5/150
Epoch 7/150
Epoch 6/150
Epoch 8/150
Epoch 7/150
 4/63 [>.............................] - ETA: 1s - loss: 0.5720 - accuracy: 0.7250Epoch 7/150
Epoch 9/150
Epoch 9/150
Epoch 8/150
 3/63 [>.............................] - ETA: 1s - loss: 0.5747 - accuracy: 0.7000Epoch 8/150
 9/63 [===>..........................] - ETA: 1s - loss: 0.5200 - accuracy: 0.7556Epoch 10/150
Epoch 10/150
Epoch 11/150
Epoch 11/150
Epoch 9/150
 1/63 [..............................] - ETA: 1s - loss: 0.3904 - accuracy: 0.9000Epoch 9/150
Epoch 11: early stopping
Epoch 11: early stopping
Epoch 10/150
Epoch 10/150
1/9 [==>...........................] - ETA: 0s - loss: 0.4452 - ac

[32m[I 2022-09-19 21:11:30,805][0m Trial 1 finished with value: 0.8208954930305481 and parameters: {'n_layers': 2, 'n_units_1': 10, 'activation_1': 'elu'}. Best is trial 1 with value: 0.8208954930305481.[0m


13/63 [=====>........................] - ETA: 0s - loss: 0.4813 - accuracy: 0.7769

[32m[I 2022-09-19 21:11:30,986][0m Trial 3 finished with value: 0.8171641826629639 and parameters: {'n_layers': 2, 'n_units_1': 16, 'activation_1': 'elu'}. Best is trial 1 with value: 0.8208954930305481.[0m


Epoch 11: early stopping
Epoch 11: early stopping
1/9 [==>...........................] - ETA: 3s - loss: 0.4919 - accuracy: 0.8125Epoch 2/150

[32m[I 2022-09-19 21:11:35,773][0m Trial 0 finished with value: 0.8171641826629639 and parameters: {'n_layers': 3, 'n_units_1': 10, 'activation_1': 'elu', 'n_units_2': 5, 'activation_2': 'elu'}. Best is trial 1 with value: 0.8208954930305481.[0m




[32m[I 2022-09-19 21:11:35,864][0m Trial 2 finished with value: 0.7985074520111084 and parameters: {'n_layers': 3, 'n_units_1': 10, 'activation_1': 'relu', 'n_units_2': 16, 'activation_2': 'tanh'}. Best is trial 1 with value: 0.8208954930305481.[0m


Epoch 3/150
Epoch 4/150
Epoch 2/150
Epoch 4/150
Epoch 3/150
Epoch 5/150
Epoch 4/150
Epoch 5/150
Epoch 6/150
Epoch 5/150
Epoch 6/150
 4/63 [>.............................] - ETA: 2s - loss: 0.4342 - accuracy: 0.8000Epoch 2/150
Epoch 6/150
Epoch 7/150
Epoch 7/150
Epoch 8/150
Epoch 3/150
Epoch 9/150
Epoch 9/150
Epoch 9/150
Epoch 4/150
Epoch 10/150
Epoch 11/150
Epoch 11/150
Epoch 11/150
Epoch 11: early stopping
Epoch 11: early stopping
Epoch 6/150
Epoch 11: early stopping

[32m[I 2022-09-19 21:11:56,885][0m Trial 4 finished with value: 0.8246268630027771 and parameters: {'n_layers': 2, 'n_units_1': 11, 'activation_1': 'tanh'}. Best is trial 4 with value: 0.8246268630027771.[0m


Epoch 1/150


[32m[I 2022-09-19 21:11:57,724][0m Trial 6 finished with value: 0.8059701323509216 and parameters: {'n_layers': 1}. Best is trial 4 with value: 0.8246268630027771.[0m


Epoch 7/150

[32m[I 2022-09-19 21:11:59,427][0m Trial 5 finished with value: 0.8134328126907349 and parameters: {'n_layers': 2, 'n_units_1': 17, 'activation_1': 'elu'}. Best is trial 4 with value: 0.8246268630027771.[0m


Epoch 8/150
Epoch 2/150
 1/63 [..............................] - ETA: 3:42 - loss: 0.8076 - accuracy: 0.3000Epoch 9/150
Epoch 3/150
Epoch 4/150
Epoch 10/150
Epoch 2/150
 6/63 [=>............................] - ETA: 2s - loss: 0.6537 - accuracy: 0.6333Epoch 5/150
Epoch 11/150
Epoch 3/150
Epoch 7/150
Epoch 11: early stopping
Epoch 8/150
Epoch 4/150


[32m[I 2022-09-19 21:12:10,330][0m Trial 7 finished with value: 0.7985074520111084 and parameters: {'n_layers': 4, 'n_units_1': 4, 'activation_1': 'elu', 'n_units_2': 20, 'activation_2': 'elu', 'n_units_3': 11, 'activation_3': 'tanh'}. Best is trial 4 with value: 0.8246268630027771.[0m


Epoch 9/150
Epoch 5/150
Epoch 11/150
 1/63 [..............................] - ETA: 1s - loss: 1.0009 - accuracy: 0.5000Epoch 6/150
Epoch 11: early stopping
Epoch 7/150

[32m[I 2022-09-19 21:12:16,278][0m Trial 8 finished with value: 0.8134328126907349 and parameters: {'n_layers': 1}. Best is trial 4 with value: 0.8246268630027771.[0m


Epoch 8/150
Epoch 9/150
Epoch 10/150
Epoch 11/150
Epoch 11: early stopping


[32m[I 2022-09-19 21:12:23,328][0m Trial 9 finished with value: 0.8171641826629639 and parameters: {'n_layers': 4, 'n_units_1': 14, 'activation_1': 'elu', 'n_units_2': 5, 'activation_2': 'elu', 'n_units_3': 19, 'activation_3': 'elu'}. Best is trial 4 with value: 0.8246268630027771.[0m


Study statistics: 
Number of finished trials:  10
Best trial:
0.8246268630027771
{'n_layers': 2, 'n_units_1': 11, 'activation_1': 'tanh'}
