In [None]:
# from google.colab import drive
# drive.mount('/content/gdrive', force_remount=True)
# !pip install -q kaggle
# !ls ~/.kaggle/kaggle.json
# !ls gdrive/MyDrive/colab_work/kaggle.json

# !mkdir -p ~/.kaggle
# !cp gdrive/MyDrive/colab_work/kaggle.json ~/.kaggle/
# !chmod 600 ~/.kaggle/kaggle.json


## Neural Networks Deep Learning Hyperparameters search on Titanic Disaster dataset

@misc{omalley2019kerastuner
,     title        = {KerasTuner}
,     author       = {O'Malley, Tom and Bursztein, Elie and Long, James and Chollet, Fran\c{c}ois and Jin, Haifeng and Invernizzi, Luca and others}
,     year         = {2019}
,     howpublished = {https://github.com/keras-team/keras-tuner}
}

{cite}`omalley2019kerastuner,
title:KerasTuner,
author:O'Malley, Tom and Bursztein, Elie and Long, James and Chollet, Fran\c{c}ois and Jin, Haifeng and Invernizzi, Luca and others,
year:2019,
howpublished:https://github.com/keras-team/keras-tunerqiime

### Jupyter Notebook [uploaded to GitHub](https://github.com/jayc279/kaggle_notebooks/blob/main/hyper_parameters_tuning_titanic-disaster_data.ipynb)
[Titanic Disaster dataset using hyperparameters captured from run executed above Notebook](https://github.com/jayc279/kaggle_notebooks/blob/main/titanic-ml-from-disaster-neural-networks.ipynb)

## import packages

In [None]:
import numpy as np
import pandas as pd
import math
import os
import gc
import re
import inspect
import matplotlib.pyplot as plt
import seaborn as sns

from math import floor
from math import ceil

import warnings
warnings.filterwarnings('ignore')
pd.set_option('display.max_columns', None)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import cross_val_score

In [None]:
import tensorflow as tf
import tensorflow.keras as keras

from keras.layers import Dense, Dropout, Flatten, BatchNormalization, Input
from keras.models import Sequential
from keras.optimizers import Adadelta, Adagrad, Adam, Adamax, Nadam, SGD, RMSprop, Ftrl
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier, KerasRegressor

from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.layers import LeakyReLU, ReLU, ELU, PReLU
from keras.constraints import MaxNorm
from keras.initializers import glorot_uniform, he_uniform, glorot_normal, he_normal
from keras.initializers import uniform, normal, zero


### TPU - GPU setup

In [None]:
# Detect TPU
def tpu_setup():
    try:
        tpu = tf.distribute_cluster_resolver.TPUClusterResolver()
        print('detected: ', tpu.master());
    except ValueError as e:
        print('Error: no TPU: ', e)
        tpu = None

    if tpu:
        tf.config.experimental_connect_to_cluster(tpu)
        tf.tpu.experimental.initialize_tpu_system(tpu)
        strategy = tf.distribute.experimental.TPUStrategy(tpu)
    else:
        strategy = tf.distribute.get_strategy()

    return strategy


In [None]:
def gpu_setup():
    # Detect GPUs, return appropriate strategy
    gpus = tf.config.experimental.list_physical_devices('GPU')
    
    if gpus:
        try:
            # setup up GPU memory growth as True for each GPU
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            strategy = tf.distribute.MirroredStrategy()
            print(f'using: {len(gpus)}')
        except:
            strategy = tf.distribute.get_strategy()
            
        return strategy


In [None]:
# Setting TPU or GPU strategy for this session
# strategy = tpu_setup()
# strategy = gpu_setup()
# print('Replicas in strategy: ', replicas)                 # object has no attribute 'num_replicas_in_sync'
# !nvidia-smi

In [None]:
# function to clear model and garbage collector
def clear_bags(model):
  del model
  gc.collect()


### load and clean Titanic Disaster dataset

load Ttitanic Disaster train and test data. you will see we have:  
for training: 891 rows and 12 columns  
for testing: 418 rows and 11 columns (does not include `Survived` column)

In [None]:
train_df = pd.read_csv('/kaggle/input/titanic/train.csv')
test_df = pd.read_csv('/kaggle/input/titanic/test.csv')
print(train_df.shape, test_df.shape)

Split `PassengerId` column from train dataset and create `true` label dataset - y_train  
Remove columns that do not add to predictions - columns that do not affect Survival outcomes  
Columns like: `PassengerId`, `Name`, `Ticket`, `Fare`  
Drop same columns as above from `test` dataset
Drop `Survived` since we already created `y_train` dataset

In [None]:
y_train = train_df['Survived']
train_df.drop(['PassengerId', 'Survived','Name','Ticket','Fare'], axis=1, inplace=True)
train_df.shape, y_train.shape

In [None]:
passengerId = test_df['PassengerId']
test_df.drop(['PassengerId', 'Name','Ticket','Fare'], axis=1, inplace=True)
test_df.shape

### column `Age` clean-up

In [None]:
train_mean_age = train_df['Age'].mean(numeric_only=True)
test_mean_age = test_df['Age'].mean(numeric_only=True)
train_quant_age = train_df['Age'].quantile(0.75)
test_quant_age = test_df['Age'].quantile(0.75)
print(train_mean_age, train_quant_age)
print(test_mean_age, test_quant_age)

ma = train_mean_age
mb = train_quant_age
if train_mean_age > train_quant_age:
    mb = ma
    ma = train_quant_age

train_df['Age'] = train_df.apply(
    lambda row: np.random.randint(ma, mb) if np.isnan(row['Age']) else row['Age'], axis=1
)
test_df['Age'] = test_df.apply(
    lambda row: np.random.randint(ma, mb) if np.isnan(row['Age']) else row['Age'], axis=1
)

### column `Cabin` clean-up

In [None]:
# Regression runs
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, f1_score

# fill NaNs in both Train and Test data with 'L0' - lowest leverl in Titanic decks
train_df['Cabin'] = train_df['Cabin'].fillna('L0')
test_df['Cabin'] = test_df['Cabin'].fillna('L0')

# train_df['Cabin'] = train_df['Cabin'].map(lambda x: re.sub("\d+","", x))
train_df['Cabin'] = train_df['Cabin'].map(lambda x: re.sub("\s+","", x))
train_df['Cabin'] = train_df['Cabin'].map(lambda x: re.sub(r'(.)\1+', r'\1',x))

# test_df['Cabin'] = test_df['Cabin'].map(lambda x: re.sub("\d+","", x))
test_df['Cabin'] = test_df['Cabin'].map(lambda x: re.sub("\s+","", x))
test_df['Cabin'] = test_df['Cabin'].map(lambda x: re.sub(r'(.)\1+', r'\1',x))

X1 = list(np.asarray(train_df['Cabin'].unique()))
X2 = list(np.asarray(test_df['Cabin'].unique()))
X3 = X1 + X2

# LabelEncoder().fit(X3)
le = LabelEncoder()
le.fit(X3)

train_df['Cabin'] = le.transform(train_df['Cabin'])
test_df['Cabin'] = le.transform(test_df['Cabin'])
# train_df['Cabin'][:50]
# test_df['Cabin'][:50]

### one-hot encoding of categorical columns

In [None]:
train_df = pd.get_dummies(train_df)
test_df = pd.get_dummies(test_df)

one last check to make sure columns in training and test datasets match   
if columns do not match we can raise exception or drop the offending columns and continue  
**Note** if the number of columns between train and test sets do not match, do not proceed

here the code drops mis-matched columns and continues with hyperparameter tuning

In [None]:
# before split drop columns that don't match between train and test
for i in train_df.columns:
  # print(f'column:-{i}-')
  if i not in test_df.columns:
    print(f'column:-{i}- does not exist in test_df')
    train_df.drop(i, inplace=True, axis=1)

for i in test_df.columns:
  # print(f'column:-{i}-')
  if i not in train_df.columns:
    print(f'column:-{i}- does not exist in train_df')
    test_df.drop(i, inplace=True, axis=1)

print(train_df.shape, test_df.shape, y_train.shape)

convert `True` to `1` and `False` to `0`   
This is a requirement to process data in Neural Networks

In [None]:
# convert True/False to 1 & 0
train_df = train_df.replace({True: 1, False: 0})
test_df = test_df.replace({True: 1, False: 0})

scale data using SkLearn StandardScaler    
Please use other scalers if you think it might improve the model

we only scale two columns in both train and test datasets  
1. Age
2. Cabin

In [None]:
sc = StandardScaler()
train_df['Age_scaled'] = sc.fit_transform(train_df[['Age']])
test_df['Age_scaled'] = sc.transform(test_df[['Age']])
train_df.drop(['Age'], inplace=True, axis=1)
test_df.drop(['Age'], inplace=True, axis=1)

In [None]:
train_df.shape, y_train.shape, test_df.shape

In [None]:
# scale 'Cabin' as well 
train_df['Cabin_scaled'] = sc.fit_transform(train_df[['Cabin']])
test_df['Cabin_scaled'] = sc.transform(test_df[['Cabin']])
train_df.drop(['Cabin'], inplace=True, axis=1)
test_df.drop(['Cabin'], inplace=True, axis=1)

In [None]:
## check data types of train and test - except for Age, all should be categorical
train_df.shape, y_train.shape, y_train.shape

split trainingset into training and validation datasets.  
Optimal woule be if we did not have to split training set to create validation dataset

print shape to make sure:  
1. number of columns match between train and test datasets
2. number of rows match between train and validation datasets


In [None]:
# split train dataset into train and validation datasets
# train_df_copy = train_df.copy()
train_X, val_X, train_y, val_y = train_test_split(train_df, 
                                                  y_train, 
                                                  test_size=0.3, 
                                                  random_state=42,
                                                 shuffle=False)

In [None]:
print(train_X.shape, val_X.shape, train_y.shape, val_y.shape)

<hr><br>
<hr>

### Neural Networks - Deep Learning Hyperparameter Tuning

In [None]:
seed = 7
tf.random.set_seed(seed)

# create dictionary to store hyper-parameters
hyper_params = dict()

Declare defaults to setup up Deep Learning Neural Network to hypertune parameters

In [None]:
def_batch = 32
def_epoch = 100
def_num_layers = 4
def_dropout = 0.2
def_neurons = math.ceil(train_X.shape[1] * def_batch)

print(
    f"{'def_neurons:':<15}{def_neurons:>10}",
    f"{'def_num_layers:':<15}{def_num_layers:>10}",
    f"{'def_batch:':<15}{def_batch:>10}",
    f"{'def_epoch:':<15}{def_epoch:>10}",
    sep='\n'
)


### **create Deep Learning Neural Netowrk model to Tune hyper-parameters**

In [None]:
# default values to setup a Keras model
hyper_params['learning_rate']     , best_learning_rate       = None, None  # 0.001
hyper_params['momentum']          , best_momentum            = None, None  # 0.1
hyper_params['init_mode']         , best_init_mode           = None, None # 'he_normal'
hyper_params['activation']        , best_activation          = 'relu', 'relu'
hyper_params['dropout']           , best_dropout             = None, None  # 0.2
hyper_params['weight_constraint'] , best_weight_constraint   = None, None  # 4.0
hyper_params['neurons']           , best_neurons             = def_neurons, def_neurons  # 256
hyper_params['optimizer']         , best_optimizer           = 'Adam', 'Adam'  # 'Adam'
hyper_params['epochs']            , best_epoch_num           = def_epoch, def_epoch
hyper_params['batch_size']        , best_batch_size          = def_batch, def_batch
hyper_params['num_hidden_layers'] , best_num_hidden_layers   = 3, 3

# input dataframe
inputdf=train_df         # define the dataframe to use for input_shape - Neural Networks only need number of features


In [None]:
def create_hyper_model(lr=best_learning_rate,
                       momentum=best_momentum,
                       init_mode=best_init_mode,
                       activation=best_activation,
                       dropout=best_dropout,
                       weight_constraint=best_weight_constraint,
                       neurons=best_neurons,
                       optimizer=best_optimizer,
                       num_hidden_layers=best_num_hidden_layers,
                      inputdf=inputdf):
    
    # clear model session and recreate - 
    # according to Keras docs works for a functional model
    keras.backend.clear_session()
    dense_dict = inspect.signature(Dense)
    dropout_dict = inspect.signature(Dropout)
    batch_norm_dict = inspect.signature(BatchNormalization)
    
    ###################################################
    # get defaults for hyper-search parameter values
    ###################################################
    # ValueError: Unknown initializer: 'glorot_uniform'. 
    # Please ensure this object is passed to the `custom_objects` argument.
    if init_mode is None:
        k = 'kernel_initializer'
        # val = str(dense_dict.parameters[k]).split('=')[1]
        init_mode = str(dense_dict.parameters[k]).split('=')[1] 
        init_mode = 'glorot_normal'

    if activation is None:
        k = 'activation'
        # val = str(dense_dict.parameters[k]).split('=')[1]
        activation = str(dense_dict.parameters[k]).split('=')[1] 
         
    if weight_constraint is None:
        k = 'kernel_constraint'
        # val = str(dense_dict.parameters[k]).split('=')[1]
        # weight_constraint = str(dense_dict.parameters[k]).split('=')[1] 
        weight_constraint = 4.0

    if lr is None:    # learning_rate - optimizer
        lr = 0.001
    
    if momentum is None:
        k = 'momentum'
        # val = str(dense_dict.parameters[k]).split('=')[1]
        momentum = np.float32(str(batch_norm_dict.parameters[k]).split('=')[1])
        # momentum = 0.1

    if dropout is None:
        dropout = 0.2

    ###################################################
    # function to return a list of layers depending on the 
    # number asked and number_of_neurons in first layer
    ###################################################
    def return_num_layers(nn_num=neurons, n_layers=num_hidden_layers):
        layers = []

        divn = 1 if n_layers <= 1 else (n_layers -1)
        # first layer to contain all neurons
        first_lyr_units = neurons

        # last layer (before output) is:
        last_lyr_units = neurons // n_layers
        
        incr_by = (last_lyr_units - first_lyr_units) / divn
        num_neurons = first_lyr_units
        
        # print(f'num_neurons:{num_neurons} - incr_by:{incr_by}')
        for i in range(1, n_layers + 1):
            layers.append(round(math.ceil(num_neurons)))
            # print(f'layer:{i} - num_neurons:{num_neurons} - layers:{layers}')
            num_neurons = num_neurons + incr_by

        return layers

    
    # neurons = math.ceil(neurons)
    neurons = math.ceil(inputdf.shape[1] * (best_batch_size / 2) )
    num_lyrs = return_num_layers(neurons, num_hidden_layers)
    # print('num_lyrs ', num_lyrs)
    
    ###################################################
    # create model
    ###################################################
    model = Sequential()
    # model.add(Input(shape=(inputdf.shape[1],)))   # used in functional

    model.add(Dense(num_lyrs[0] * 2, input_shape=(inputdf.shape[1],), 
                    kernel_initializer=init_mode, activation=activation,
                    kernel_constraint=MaxNorm(weight_constraint)))

    if momentum > 0.5:
        model.add(BatchNormalization(momentum=momentum))

    if len(num_lyrs) > 2:
        for i in range(1, num_hidden_layers -1):
            model.add(Dense(num_lyrs[i -1] * ( i / num_hidden_layers ), 
                            kernel_initializer=init_mode, activation=activation,
                            kernel_constraint=MaxNorm(weight_constraint)))
    
    # model.add(Dense(neurons//2, kernel_initializer=init_mode, activation=activation,
    #               kernel_constraint=MaxNorm(weight_constraint)))

    if dropout > 0.1:
        model.add(Dropout(dropout))

    # model.add(Flatten())
    if len(num_lyrs) > 1:
        model.add(Dense(num_lyrs[-1], kernel_initializer=init_mode, activation=activation,
                        kernel_constraint=MaxNorm(weight_constraint)))
    
    model.add(Dense(1, kernel_initializer=init_mode, activation='sigmoid'))
    ###################################################
    
    # Compile model
    opt=keras.optimizers.get(optimizer)   # opt.__dir__ to know what can be set
    opt.learning_rate=lr
    model.compile(loss='binary_crossentropy', optimizer=opt, metrics=['accuracy'])
    return model

### create model (default settings) before hyper-parameter tuning to check current layers and parameters

In [None]:
cvmodel = create_hyper_model()
cvmodel.summary()

### **Tune hyperparameters**

if you have the resources you can tune on as many hyperparmaters as you wish  
for this Notebook only tuned on a few with limited sets of inputs fo each parameter

In [None]:
clear_bags(cvmodel)
# cvmodel = KerasClassifier(build_fn=create_hyper_model, verbose=0)

# cvmodel = KerasClassifier(build_fn=create_hyper_model, verbose=0)

num_hidden_layers = [2, 3, 4, 5]
batch_size=[16, 32, 64]
epochs = [30, 50, 70, 90]
neurons = [128, 256, 512]

# optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adam']

learn_rate = [0.001, 0.01, 0.1]
momentum = [0.0, 0.2, 0.4, 0.6]

# init_mode = ['uniform', 'lecun_uniform', 'normal', 'zero', 'glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']
init_mode = ['glorot_normal', 'glorot_uniform', 'he_normal', 'he_uniform']

# activation = ['relu', 'tanh', 'linear', 'selu', 'elu', 'leaky_relu', 'exponential','gelu','mish']
activation = ['relu', 'tanh', 'leaky_relu']

weight_constraint = [0.1, 1.0, 2.0]
dropout_rate = [0.3, 0.5, 0.6, 0.7]

cvmodel = KerasClassifier(build_fn=create_hyper_model,
                          neurons = neurons,
                          num_hidden_layers = num_hidden_layers,
                          # optimizer = optimizer,
                          # lr = learn_rate,
                          # momentum = momentum,
                          # init_mode = init_mode,
                          # activation = activation,                          
                          # weight_constraint = weight_constraint,
                          dropout = dropout_rate,
                          epochs= epochs,
                          batch_size = batch_size,
                          inputdf=train_df,
                          verbose=0,
                          )

# create param_grid dictionary
param_grid = dict(batch_size=batch_size,
                  # optimizer=optimizer,
                  # lr=learn_rate,
                  neurons=neurons,
                  num_hidden_layers = num_hidden_layers,
                  # momentum=momentum,
                  # init_mode=init_mode,
                  # activation=activation,
                  # weight_constraint=weight_constraint,
                  dropout=dropout_rate,
                  epochs=epochs)


In [None]:
# GridSearchCV on all hyper-parameters
grid = GridSearchCV(estimator=cvmodel, param_grid=param_grid, n_jobs = 1, cv = 3)
grid.fit(train_df,y_train)

In [None]:
print(f'best_fit:{grid.best_score_} best_params:{grid.best_params_}')
means = grid.cv_results_['mean_test_score']
stds = grid.cv_results_['std_test_score']
params = grid.cv_results_['params']

for mean, std, param in zip(means, stds, params):
    print(f'mean:{mean} - std:{std} - param:{param}')

In [None]:
# grid.best_params_.keys()

In [None]:
def list_params():

    # mark these as globals ince values might change
    # also get error 'Unbound LocalError' 
    global best_optimizer, best_neurons, num_hidden_layers, best_weight_constraint, best_dropout
    global best_activation, best_init_mode, best_learning_rate, best_momentum, best_epoch_num
    global best_batch_size
    
    # capture best values for each hyper-parameter if evailable in gridCV run 
    if 'optimizer' in grid.best_params_.keys():
        best_optimizer= grid.best_params_['optimizer']
        
    if 'epochs' in grid.best_params_.keys():
        best_epoch_num = grid.best_params_['epochs']

    if 'batch_size' in grid.best_params_.keys():
        best_batch_size = grid.best_params_['batch_size']

    if 'lr' in grid.best_params_.keys():
        best_learning_rate = grid.best_params_['lr']

    if 'momentum' in grid.best_params_.keys():
        best_momentum = grid.best_params_['momentum']

    if 'init_mode' in grid.best_params_.keys():
        best_init_mode= grid.best_params_['init_mode']

    if 'activation' in grid.best_params_.keys():
        best_activation= grid.best_params_['activation']

    if 'dropout' in grid.best_params_.keys():
        best_dropout = grid.best_params_['dropout']

    if 'neurons' in grid.best_params_.keys():
        best_neurons = grid.best_params_['neurons']

    if 'weight_constraint' in grid.best_params_.keys():
        best_weight_constraint = grid.best_params_['weight_constraint']

    if 'num_hidden_layers' in grid.best_params_.keys():
        best_num_hidden_layers = grid.best_params_['num_hidden_layers']
        
    ## add to hyper_params dictionary
    hyper_params['optimizer'] = best_optimizer
    hyper_params['dropout'] = best_dropout
    hyper_params['neurons'] = best_neurons
    hyper_params['weight_constraint'] = best_weight_constraint
    hyper_params['activation'] = best_activation
    hyper_params['init_mode'] = best_init_mode
    hyper_params['learning_rate'] = best_learning_rate
    hyper_params['momentum'] = best_momentum
    hyper_params['epochs'] = best_epoch_num
    hyper_params['batch_size'] = best_batch_size

    hyper_params['num_hidden_layers'] = num_hidden_layers

    ## print best values
    print('best_optimizer: ', best_optimizer)
    print('best_neurons: ', best_neurons)
    print('num_hidden_layers: ', num_hidden_layers)
    print('best_weight_constraint: ', best_weight_constraint)
    print('best_dropout: ', best_dropout)
    print('best_activation: ', best_activation)
    print('best_init_mode: ', best_init_mode)
    print('best_learning_rate: ', best_learning_rate)
    print('best_momentum: ', best_momentum)
    print('best_epoch_num: ', best_epoch_num)
    print('best_batch_size: ', best_batch_size)


list_params()

### **Create Final Model incorporating all Tuned hyper-parameters**

In [None]:
train_df.shape, y_train.shape, train_df.shape, train_df.columns

In [None]:
X_train, X_val, y_train, y_val = train_test_split(train_df, y_train, test_size=0.3, random_state=111)

In [None]:
print(X_train.shape, X_val.shape, y_train.shape, y_val.shape, test_df.shape)

### list of hyper-parameters fine-timed for titanic disaster dataset

In [None]:
# list all hyper-parameters for this run
for key in hyper_params.keys():
  print(f'key: {key:20s} value: {hyper_params[key]}')

### build and execute final model

use captured hyperparamters during GridSearchCV
you can also manually change values to build and execute the model to improve accuracy, etc..

In [None]:
clear_bags(cvmodel)
final_model = create_hyper_model(lr=best_learning_rate,
                                 momentum=best_momentum,
                                 init_mode=best_init_mode,
                                 activation=best_activation,
                                 dropout=best_dropout,
                                 weight_constraint=best_weight_constraint,
                                 neurons=best_neurons,
                                 optimizer=best_optimizer,
                                 num_hidden_layers=en(num_hidden_layers),
                                 inputdf=X_train,
                                )
final_model.summary()

### Keras callbacks - Learning Rate, and Early Stopping

**[Early Stopping](https://keras.io/api/callbacks/early_stopping/)** - stop training when a monitored metric has improved to minimize loss metric.  
The qty to be monitored needs to be available in 'logs' dict.

In [None]:
# early stopping call back on 'val_loss'
callback = keras.callbacks.EarlyStopping(monitor='val_accuracy',  # what to monitor
                                         min_delta=0.03,      # change to monitor
                                         patience=35,          # num of epochs to wait before breaking out
                                         mode='max',          # here 'min' stop decreasing
                                         verbose=1,           # verbose=0 don't print output
                                         restore_best_weights=False,   # whether to restore model weights
                                         )

In [None]:
# This function keeps the initial learning rate for the first ten epochs
# and decreases it exponentially after that.
#
# learning rate scheduling
LEARN_RATE = 0.001

def lr_scheduler(epoch):
    global LEARN_RATE
    if epoch < 10:
        LEARN_RATE += 1e-7 
    else:
        LEARN_RATE *= tf.math.exp(-0.004)
        
    return LEARN_RATE

# define callback
lr_callback = keras.callbacks.LearningRateScheduler(
    lr_scheduler, 
    verbose=True,
)

In [None]:
history = final_model.fit(X_train, y_train,
                          epochs= best_epoch_num,
                          batch_size = best_batch_size,
                          validation_data=(X_val, y_val),
                          callbacks=[callback, lr_callback],
                          verbose=1,
                         )

# Training stopped early due to setting of callback
model_eval = final_model.evaluate(X_val, y_val)
print(model_eval)

In [None]:
# predict on validation dataset
val_pred_model = final_model.predict(X_val)

# print(val_pred_model)
val_predicted = [0 if pred < 0.5 else 1 for pred in val_pred_model]
print('\nNeural Nets - report on Confusion matrix, Classification report, Accuracy and F1 scores')
print(confusion_matrix(y_val, val_predicted))
print(classification_report(y_val, val_predicted))

print(f'Accuracy Score: {accuracy_score(y_val, val_predicted):.2f}')
print(f'F1 Score: {f1_score(y_val, val_predicted):.2f}')

### loss and accuracy plots

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

hist_frame=pd.DataFrame(data=history.history)
plt.figure(figsize=(12,9))
plt.subplot(2,2,1)
sns.lineplot(data=(hist_frame.loss, hist_frame.val_loss))
plt.subplot(2,2,2)
sns.lineplot(data=(hist_frame.accuracy, hist_frame.val_accuracy))

### predictions test data

In [None]:
test_model_predictions = final_model.predict(test_df)

### create submission.csv file

In [None]:
# now predict on Test dataset and copy to CSV

test_model_data = pd.DataFrame()
test_model_data['PassengerId'] = passengerId

test_model_data['Survived'] = [0 if pred < 0.5 else 1 for pred in test_model_predictions]
test_model_data.to_csv('submission.csv', index = False)


In [None]:
!head submission.csv

<u>For more information on hyper-parameter search also chekout:</u><br>
[Grid Search Hyperparameters for Deep Learning Models - Jason Brownlee](https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/)<br>
[Tuning the Hyperparameters and Layers of NN Deep Learning - Rendyk](https://www.analyticsvidhya.com/blog/2021/05/tuning-the-hyperparameters-and-layers-of-neural-network-deep-learning/)

[JBL]:https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras

[RDK]:https://www.analyticsvidhya.com/blog/2021/05/tuning-the-hyperparameters-and-layers-of-neural-network-deep-learning