In [47]:
import numpy as np
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import sys
from loguru import logger
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.keras import layers
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from sklearn.dummy import DummyClassifier
from tensorflow.python.keras.layers import Dense, Flatten, Input, Dropout
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.callbacks import EarlyStopping, TensorBoard

import keras_tuner as kt
from keras_tuner.tuners import RandomSearch, Hyperband
from keras_tuner.engine.hyperparameters import HyperParameters as hp
from tensorflow.keras.models import Sequential

sys.path.append('..')

from definitions import get_project_root
from src.data.make_dataset import create_train_test_validation
from src.visualization.visualize import plot_results

root = get_project_root()


In [48]:
tf.random.set_seed(42)

In [49]:
## Create train, validation and test sets
x_train, x_valid, x_test, y_train, y_valid, y_test = create_train_test_validation()
x_train.shape, y_train.shape, x_valid.shape, y_valid.shape, x_test.shape, y_test.shape

2022-02-12 13:04:28.445 | INFO     | src.data.make_dataset:create_train_test_validation:73 - found file labeled_data.csv, procceed with creating train, test and validation sets


((61711, 23), (61711, 1), (13225, 23), (13225, 1), (13224, 23), (13224, 1))

In [77]:
def model_builder(hp):
    model = Sequential()

    model.add(Dense(23,activation='relu', input_shape=(23,)))

    for i in range(hp.Int("n_layers",1,6)):
        model.add(Dense(hp.Int(f"layer_{i}",5,500,step=50),activation='relu'))

    model.add(Dropout(hp.Float("dropout",0.05,0.4)))
    model.add(Dense(15,activation='softmax'))

    model.compile(optimizer="adam", loss='sparse_categorical_crossentropy',metrics=['accuracy'])

    return model
    

<h2> randomsearch tuner

In [78]:
log_dir = "logs_kt_random"
tensorboard = TensorBoard(log_dir=log_dir)

tuner = RandomSearch(
    model_builder,
    objective='val_loss',
    max_trials=15,
    executions_per_trial=3,
    directory=log_dir)

tuner.search_space_summary()

tuner.search(x=x_train,y=y_train,epochs=10,batch_size=64,callbacks=[tensorboard],validation_data=(x_valid,y_valid))

Trial 15 Complete [00h 02m 27s]
val_loss: 1.3695966402689617

Best val_loss So Far: 1.3497042258580525
Total elapsed time: 00h 34m 56s
INFO:tensorflow:Oracle triggered exit


INFO:tensorflow:Oracle triggered exit


In [79]:
best_randomsearch_params = tuner.get_best_hyperparameters(num_trials=1)[0]

In [80]:
randomsearch_model = model_builder(best_randomsearch_params)
history_random = randomsearch_model.fit(x_train, y_train, epochs=50,validation_data=(x_valid,y_valid),callbacks=[EarlyStopping(patience=5,restore_best_weights=True)])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50


In [81]:
randomsearch_result = randomsearch_model.evaluate(x_test,y_test)
randomsearch_result



[1.3500087261199951, 0.5536902546882629]

<h2> Randomsearch does not give a better result than the base model. Let's try Hyperband

<h3> We can reuse the model_builder function

In [84]:
log_dir = "kt_hb_acc2"

tuner_hb = Hyperband(
    model_builder,
    objective="val_loss",
    max_epochs=25,
    factor=3,
    hyperband_iterations=1,
    seed=42,
    directory=log_dir
)

tuner_hb.search(x=x_train,y=y_train,epochs=12,batch_size=64,callbacks=[tensorboard],validation_data=(x_valid,y_valid))

Trial 30 Complete [00h 01m 08s]
val_loss: 1.3541325330734253

Best val_loss So Far: 1.3442531824111938
Total elapsed time: 00h 16m 39s
INFO:tensorflow:Oracle triggered exit


INFO:tensorflow:Oracle triggered exit


In [89]:
best_hyperband_params = tuner_hb.get_best_hyperparameters(num_trials=1)[0]
hyperband_model = model_builder(best_randomsearch_params)
history_hb = hyperband_model.fit(x_train, y_train, epochs=50,validation_data=(x_valid,y_valid),callbacks=[EarlyStopping(patience=5,restore_best_weights=True)])

TypeError: fit() got an unexpected keyword argument 'maximize'

In [86]:
hyperband_result = hyperband_model.evaluate(x_test,y_test)




In [88]:
file_model = root / 'src' / 'models' / 'winning_hypermodel.model'
randomsearch_model.save(file_model)



INFO:tensorflow:Assets written to: c:\Users\huube\OneDrive\Master of Informatics\Machine Learning\Eindopdracht\src\models\winning_hypermodel.model\assets


INFO:tensorflow:Assets written to: c:\Users\huube\OneDrive\Master of Informatics\Machine Learning\Eindopdracht\src\models\winning_hypermodel.model\assets


<h2> Hyperband is the winning tuner at this moment. We will continue to evaluate the hyperband tuned model. 