In [27]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [28]:
#import json
import math
import matplotlib.pyplot as plt
import numpy as np
import os
import pandas as pd
import pickle
#import seaborn as sns

import keras_tuner
import tensorflow as tf
import tensorflow.keras.layers as tfl

from sklearn.metrics import confusion_matrix, classification_report

In [29]:
pd.options.display.max_seq_items = 2000
pd.options.display.max_columns = 500
pd.options.display.max_rows = 800
pd.options.display.max_colwidth = 200
pd.options.display.width = 800

In [30]:
base_url = '/Users/francesco/REPOS/nam-nat-CNN-clf/app'
os.chdir(base_url)

In [31]:
from models.hypermodel import hyperband_tuner

### Load artifacts

In [32]:
with np.load(os.path.join(base_url, "artifacts/training_data", "gender_data.npz"), allow_pickle=True) as f:
    x_train, y_train = f["x_train"], f["y_train"]
    x_val, y_val = f["x_val"], f["y_val"]
    
print(x_train.shape)
print(x_val.shape)

(3990, 16)
(705, 16)


In [33]:
with open(os.path.join(base_url, "artifacts/training_data", "gender_tokenizer.pkl"), "rb") as f:
    tokenizer = pickle.load(f)

### Build model

In [83]:
# Params
embedding_size = len(tokenizer.word_index)
print("embedding_size = ", embedding_size)
metrics = [tf.keras.metrics.CategoricalAccuracy()]
print("metrics = ", metrics)
softmax_units = y_train.shape[1]
print("softmax_units = ", softmax_units)
input_shape = x_train.shape[1]
print("input_shape = ", input_shape)

embedding_size =  33
metrics =  [<keras.metrics.metrics.CategoricalAccuracy object at 0x7fdafa0df6d0>]
softmax_units =  3
input_shape =  16


In [84]:
# Batch data
train_dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)).batch(64)
val_dataset = tf.data.Dataset.from_tensor_slices((x_val, y_val)).batch(64)

In [113]:
project_name = "tuning_gender"
tuner = hyperband_tuner(input_shape, metrics, softmax_units, embedding_size, project_name)

In [114]:
tuner.search_space_summary(extended=True)

Search space summary
Default search space size: 5
dropout_1 (Float)
{'default': 0.3, 'conditions': [], 'min_value': 0.0, 'max_value': 0.5, 'step': 0.1, 'sampling': None}
dense_units_1 (Choice)
{'default': 32, 'conditions': [], 'values': [32, 64, 128, 256], 'ordered': True}
dropout_2 (Float)
{'default': 0.5, 'conditions': [], 'min_value': 0.0, 'max_value': 0.5, 'step': 0.1, 'sampling': None}
dense_units_2 (Choice)
{'default': 32, 'conditions': [], 'values': [32, 64, 128, 256], 'ordered': True}
lr (Float)
{'default': 0.0001, 'conditions': [], 'min_value': 0.0001, 'max_value': 0.01, 'step': None, 'sampling': 'log'}


### Tuning

In [115]:
hpt_dir = "logs/tb_gen_hpt"
fit_dir = "logs/tb_gen_fit"
print(hpt_dir)
tensorboard_callback_htp = tf.keras.callbacks.TensorBoard(log_dir=hpt_dir, histogram_freq=1)
tensorboard_callback_fit = tf.keras.callbacks.TensorBoard(log_dir=fit_dir, histogram_freq=1)

logs/tb_gen_hpt


In [116]:
%load_ext tensorboard

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard


In [117]:
# Estimated trials simulation:
sim_max_epochs = 20
sim_iterations = 1
sim_factor = 2
est_trials = sim_iterations*sim_max_epochs*(math.log(sim_max_epochs, sim_factor))**2
print("Estimated trials: ", est_trials)

Estimated trials:  373.5812491475342


In [118]:
tuner.search(
    train_dataset,
    validation_data=val_dataset,
    callbacks=[tensorboard_callback_htp, tf.keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)]
)

Trial 92 Complete [00h 00m 25s]
val_loss: 0.7715394496917725

Best val_loss So Far: 0.7641831636428833
Total elapsed time: 00h 12m 22s
INFO:tensorflow:Oracle triggered exit


In [119]:
tuner.results_summary(3)

Results summary
Results in logs/gen_hpt/tuning_gender
Showing 3 best trials
<keras_tuner.engine.objective.Objective object at 0x7fdae70d5e50>
Trial summary
Hyperparameters:
dropout_1: 0.2
dense_units_1: 64
dropout_2: 0.0
dense_units_2: 64
lr: 0.0061221776185432925
tuner/epochs: 10
tuner/initial_epoch: 0
tuner/bracket: 1
tuner/round: 0
Score: 0.7641831636428833
Trial summary
Hyperparameters:
dropout_1: 0.30000000000000004
dense_units_1: 64
dropout_2: 0.4
dense_units_2: 128
lr: 0.0014597926340128918
tuner/epochs: 20
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.768103301525116
Trial summary
Hyperparameters:
dropout_1: 0.4
dense_units_1: 256
dropout_2: 0.1
dense_units_2: 256
lr: 0.004737761473125344
tuner/epochs: 20
tuner/initial_epoch: 0
tuner/bracket: 0
tuner/round: 0
Score: 0.7715394496917725


In [120]:
best_hps = tuner.get_best_hyperparameters()[0]
best_model = tuner.hypermodel.build(best_hps)
print(best_model.summary())

Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 16, 33)            1122      
                                                                 
 conv1d_5 (Conv1D)           (None, 12, 16)            2656      
                                                                 
 dropout_2 (Dropout)         (None, 12, 16)            0         
                                                                 
 conv1d_6 (Conv1D)           (None, 8, 16)             1296      
                                                                 
 conv1d_7 (Conv1D)           (None, 6, 16)             784       
                                                                 
 conv1d_8 (Conv1D)           (None, 4, 16)             784       
                                                                 
 conv1d_9 (Conv1D)           (None, 2, 16)            