In [47]:
import numpy as np
from pathlib import Path
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
import sys, os
from loguru import logger
import tensorflow as tf
from tensorflow import keras
from tensorflow.python.keras import layers
from sklearn.preprocessing import LabelEncoder, OrdinalEncoder
from sklearn.dummy import DummyClassifier
from tensorflow.python.keras.layers import Dense, Flatten, Input, Dropout
from tensorflow.python.keras.models import Sequential
from tensorflow.python.keras.callbacks import EarlyStopping, TensorBoard
from ray.tune.integration.keras import TuneReportCallback
from ray.tune.schedulers import AsyncHyperBandScheduler
from ray.tune import JupyterNotebookReporter
from ray import tune
import keras_tuner as kt
from kerastuner.tuners import RandomSearch, Hyperband
from kerastuner.engine.hyperparameters import HyperParameters as hp
from tensorflow.keras.models import Sequential

sys.path.append('..')

from definitions import get_project_root
from src.data.make_dataset import create_train_test_validation
from src.visualization.visualize import plot_results

root = get_project_root()


In [48]:
tf.random.set_seed(42)

In [49]:
## Create train, validation and test sets
x_train, x_valid, x_test, y_train, y_valid, y_test = create_train_test_validation()
x_train.shape, y_train.shape, x_valid.shape, y_valid.shape, x_test.shape, y_test.shape

2022-02-12 13:04:28.445 | INFO     | src.data.make_dataset:create_train_test_validation:73 - found file labeled_data.csv, procceed with creating train, test and validation sets


((61711, 23), (61711, 1), (13225, 23), (13225, 1), (13224, 23), (13224, 1))

In [50]:
def model_builder(hp):
    model = Sequential()

    model.add(Dense(23,activation='relu', input_shape=(23,)))

    for i in range(hp.Int("n_layers",1,10)):
        model.add(Dense(hp.Int(f"layer_{i}",1,200),activation='relu'))

    model.add(Dropout(hp.Float("dropout",0.05,0.4)))
    model.add(Dense(15,activation='softmax'))

    model.compile(optimizer="adam", loss='sparse_categorical_crossentropy',metrics=['accuracy'])

    return model
    

<h2> randomsearch tuner

In [35]:
log_dir = "logs_kt_random"
tensorboard = TensorBoard(log_dir=log_dir)

tuner = RandomSearch(
    model_builder,
    objective='val_loss',
    max_trials=15,
    executions_per_trial=3,
    directory=log_dir)

tuner.search_space_summary()

tuner.search(x=x_train,y=y_train,epochs=10,batch_size=64,callbacks=[tensorboard],validation_data=(x_valid,y_valid))

INFO:tensorflow:Reloading Oracle from existing project logs_kt3\untitled_project\oracle.json
INFO:tensorflow:Reloading Tuner from logs_kt3\untitled_project\tuner0.json
Search space summary
Default search space size: 11
n_layers (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 10, 'step': 1, 'sampling': None}
layer_0 (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 200, 'step': 1, 'sampling': None}
dropout (Float)
{'default': 0.05, 'conditions': [], 'min_value': 0.05, 'max_value': 0.4, 'step': None, 'sampling': None}
layer_1 (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 200, 'step': 1, 'sampling': None}
layer_2 (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 200, 'step': 1, 'sampling': None}
layer_3 (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 200, 'step': 1, 'sampling': None}
layer_4 (Int)
{'default': None, 'conditions': [], 'min_value': 1, 'max_value': 200, 'step': 1, 

In [36]:
best_randomsearch_params = tuner.get_best_hyperparameters(num_trials=1)[0]

In [38]:
randomsearch_model = model_builder(best_randomsearch_params)
history = randomsearch_model.fit(x_train, y_train, epochs=50,validation_data=(x_valid,y_valid))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [45]:
eval_result = randomsearch_model.evaluate(x_test,y_test)
eval_result



[1.406857967376709, 0.5481699705123901]

<h2> Randomsearch does not give a better result than the base model. Let's try Hyperband

<h3> We can reuse the model_builder function

In [53]:
log_dir = "kt_hb"

tuner_hb = Hyperband(
    model_builder,
    objective="val_loss",
    max_epochs=25,
    factor=3,
    hyperband_iterations=1,
    seed=42,
    directory=log_dir
)

tuner_hb.search(x=x_train,y=y_train,epochs=10,batch_size=64,callbacks=[tensorboard],validation_data=(x_valid,y_valid))

Trial 30 Complete [00h 01m 09s]
val_loss: 1.3710397481918335

Best val_loss So Far: 1.3495172262191772
Total elapsed time: 00h 12m 37s
INFO:tensorflow:Oracle triggered exit


In [55]:
best_randomsearch_params = tuner_hb.get_best_hyperparameters(num_trials=1)[0]
hyperband_model = model_builder(best_randomsearch_params)
history_hb = hyperband_model.fit(x_train, y_train, epochs=50,validation_data=(x_valid,y_valid))

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [None]:
eval_result = hyperband_model.evaluate(x_test,y_test)
eval_result