In [None]:
# Env using Python 3.10.14
# pip install notebook==5.7.5

# the keras libraries
# pip install tensorflow Version: 2.17.0
import tensorflow as tf

from tensorflow.python.keras import models, layers

# Version: 3.4.1
from tensorflow import keras

# pip install keras-tuner
import keras_tuner as kt

In [None]:
# other libraries
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

# use for splitting the test and train data
from sklearn.model_selection import train_test_split

# for the creation of the cartesian product grid
from itertools import product

# for the timing of each test
import time

In [323]:
import math
import json

# Basic conv net to achieve statistical significance

In [77]:
class Data_Handling:
    '''
    requirements:
        math
        numpy
        train_test_split from sklearn.model_selection
        
    '''
    def __init__(self, output_size=5, batch_size=128, ):
        self.__shape = 0
        self.__output_size = output_size
        self.__length = 0
        
        
    def load_data(self, label_path, data_path, split=0.2):
        # load the images and labels
        self.__labels = np.load(label_path)
        self.__data = np.load(data_path)
        self.__length = self.__labels.shape[0]
        self.__shape = self.__data[1].shape
        

        print("Loaded files of size:")
        print(f"Images: {self.__data.shape}\nLabels: {self.__labels.shape}")
        
    def split_data(self, split=0.2):
        # split and shuffle the data and labels
        self.__X_train, self.__X_test, self.__y_train, self.__y_test = train_test_split(
            self.__data, self.__labels, test_size=split)
        
    
    def run_tuner(timer, tuner, max_epochs=50, batch_size=64, callbacks=[])
        SPLIT = 0.2
        timer.start(test_name)

        tuner.search(
            self.__X_train, self.__y_train,
            batch_size=batch_size,
            epochs=max_epochs,
            validation_split=SPLIT,
            callbacks=callbacks,
        )
        # get and view the best performing hyper parameter set
        best_hps = tuner.get_best_hyperparameters()[0]

        test_duration = timer.stop(best_hps.values)
        best_hps.values

    @property
    def shape(self, ):
        return self.__shape
    
    @property
    def output_size(self, ):
        return self.__output_size
    
    @property
    def X_train(self, ):
        return self.__X_train
    
    @property
    def y_train(self, ):
        return self.__y_train
    
    @property
    def X_test(self, ):
        return self.__X_test
    
    @property
    def y_test(self, ):
        return self.__y_test
    
    @property
    def length(self, ):
        return self.__length

In [None]:
dh = Data_Handling()

label_path = 'mitdb_labels_reduced.npy'
data_path = 'mitdb_data_reduced.npy'

dh.load_data(label_path=label_path, data_path=data_path)

In [85]:
dh.split_data(split=0.2)
print(dh.X_train.shape, dh.X_test.shape)

(11352, 281, 362, 1) (2838, 281, 362, 1)


In [98]:
# must prefix keras. unlike in the book
def build_basic_model(shape, output_size, callbacks):
    
    inputs = keras.Input(shape=shape)   
    x = keras.layers.Conv2D(filters=2, kernel_size=3, activation="relu")(inputs) 
    x = keras.layers.MaxPooling2D(pool_size=2)(x)
    x = keras.layers.Flatten()(x)
    
    outputs = keras.layers.Dense(output_size, activation="softmax")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)

    # compile the model
    model.compile(optimizer="rmsprop",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])

    return model

In [102]:
# set the params
max_epochs = 50
vaidation_split = 0.2
batch_size = 256

# create the callbacks
monitor = 'val_loss'
checkpoint_path = 'checkpoint_path.keras'

callbacks = [
    keras.callbacks.EarlyStopping(
        monitor=monitor, 
        patience=3
    ),
    
    keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path, 
        monitor=monitor, 
        save_best_only=True
    )
]

In [106]:
# build fresh model
basic_model = build_basic_model(dh.shape, dh.output_size, callbacks)

# fit to the taining data
basic_model.fit(
    dh.X_train, 
    dh.y_train, 
    epochs=max_epochs, 
    validation_split=vaidation_split, 
    batch_size=batch_size, 
    callbacks=[callbacks]
)

Epoch 1/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m22s[0m 599ms/step - accuracy: 0.3284 - loss: 3542.2876 - val_accuracy: 0.6984 - val_loss: 289.1055
Epoch 2/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 587ms/step - accuracy: 0.7509 - loss: 164.1139 - val_accuracy: 0.9005 - val_loss: 28.0742
Epoch 3/50
[1m36/36[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 592ms/step - accuracy: 0.8614 - loss: 70.9357 - val_accuracy: 0.9229 - val_loss: 14.7922


<keras.src.callbacks.history.History at 0x320f0b790>

In [109]:
# using the epoch as per the early stopping callback above
val_loss_history = basic_model.history.history['val_loss']
# get the index of the lowest recorded loss function (+ 1 to account for 0 idx)
best_epoch = np.argmin(val_loss_history) + 1


# build and train a fresh model for evaluation
basic_test_model = build_basic_model(dh.shape, dh.output_size, callbacks)
# fit model on the entire training set by removing the validation_split param
basic_model_history = basic_test_model.fit(
    dh.X_train, 
    dh.y_train, 
    epochs=best_epoch, 
    batch_size=batch_size, 
)

Epoch 1/3
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m25s[0m 538ms/step - accuracy: 0.3114 - loss: 8101.0479
Epoch 2/3
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 535ms/step - accuracy: 0.4480 - loss: 1806.6357
Epoch 3/3
[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m24s[0m 537ms/step - accuracy: 0.7604 - loss: 292.8893


In [23]:
# evaluate performance on the unseen test data to see whether the basic model can beat 
# a the statistical significance calculated in the workbook [WORKBOOK]
basic_model.evaluate(X_test, y_test, batch_size=batch_size)

[1m12/12[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 194ms/step - accuracy: 0.9228 - loss: 0.4049


[0.43242478370666504, 0.9221282601356506]

In [None]:
pred = basic_model.predict(X_test) 

In [None]:
p = np.argmax(pred, axis = 1)[:5] 
r = y_test[:5]
print(p,r)

In [None]:
model.evaluate(X_test, y_test, batch_size=batch_size)

In [None]:
model.history.history

## Grid search using keras library functinon

In [197]:
class HyperModel(kt.HyperModel):
    
    def __init__(self, num_classes, shape, filter_step=16, lr_step=0.1 ):

            self.__num_classes = num_classes
            self.__shape = shape
            self.__filter_step = filter_step
            self.__lr_step = lr_step
            
    def build(self, hp):

        filters_1 = hp.Int(name="filters_1", min_value=16, max_value=32, step=self.__filter_step) 
        filters_2 = hp.Int(name="filters_2", min_value=filters_1, max_value=64, step=self.__filter_step)
        # allows a zero setting 
        rate_1 = hp.Float(name="rate_1", min_value=0, max_value=0.5, step=self.__lr_step)
        
        inputs = keras.Input(shape=self.__shape)
        x = keras.layers.Conv2D(filters=filters_1, kernel_size=3, activation="relu")(inputs) 
        x = keras.layers.MaxPooling2D(pool_size=2)(x)
        x = keras.layers.Dropout(rate=rate_1)(x)
        x = keras.layers.Conv2D(filters=filters_2, kernel_size=3, activation="relu")(x) 
        x = keras.layers.MaxPooling2D(pool_size=2)(x)

        x = keras.layers.Flatten()(x)
        
        outputs = keras.layers.Dense(self.__num_classes, activation="softmax")(x)
        model = keras.Model(inputs=inputs, outputs=outputs)
        
        learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
        
        model.compile(
            optimizer=keras.optimizers.RMSprop(learning_rate=learning_rate),
            loss="sparse_categorical_crossentropy",
            metrics=["accuracy"])

        return model

In [None]:
class Tuner_Timer:
    
    def __init__(self):
        self.__start = None
        self.__end = None
        self.__duration = None
#         self.__results = {}
#         self.__tuners = []
#         self.__common_params = common_params
#         self.__callbacks = []
        
    def add_tuner(self, tuner):
        self.__tuners.append(tuner)
        
    def start(self):
        '''
        start the timer 
        '''
        self.__start = time.time()
        
    def stop(self):
        '''
        stop the timer and format the duration
        '''
        self.__end = time.time()
        self.__duration = self.__end - self.__start
        duration_string = time.strftime('%H:%M:%S', time.gmtime(self.__duration))

        return duration_string

In [445]:
def save_best_result(results_dict, results, tuner):

    if tuner.project_name in results_dict.keys():
        
        results_dict[tuner.project_name].update(results)
#         print(results)
    else:
        results_dict[tuner.project_name] = results
        
    #save the current result to file
    f = open(f"{tuner.directory}/results.json", "w")

    json.dump(results, f, indent = 6)

    f.close()

In [438]:
def run_tuner(timer, tuner, batch_size, max_epochs, callbacks):
        '''
        run a given tuner saving the best parameter configuration to a timer object
        along with the total duration of the optimizer's run

        params:
            timer (Tuner_Timer) the timer used for recording results
            tuner (keras.Tuner) the current tuner under test 
            batch_size (int) batch for training 
            max_epochs (int) the maximum number of epochs to run if not stopped by early stopping
            callbacks (keras.callbacks) for early stopping
        '''
        # start timer
        timer.start()
        # the tuner will save results to the directory specified in the tuner constructor
        tuner.search(
            dh.X_train[:1000], dh.y_train[:1000],
            batch_size=batch_size,
            epochs=max_epochs,
            validation_split=0.2,
            callbacks=callbacks,
        )

        # get the best performing hyper parameter set
        best_hps = tuner.get_best_hyperparameters()[0].values
        # stop timing and get the duration
        test_duration = timer.stop()
        
        return {'duration':test_duration, 'best_params':best_hps}
#         return {tuner.tuner_id:{'duration':'00:00:10', 'best_params':{'1':100, '2':200}}}

In [439]:
hypermodel = HyperModel(num_classes=dh.output_size, shape=dh.shape, filter_step=16)

directory = "start-11-08"

common_params = {
    'hypermodel': hypermodel, 
    'objective': "val_accuracy", 
    'executions_per_trial':1,
    'directory':directory,
    'tuner_id':tuner_id,
    'overwrite':False,
}

callbacks = [
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=2),
]

In [447]:
# RANDOM
tuners.append(kt.RandomSearch(project_name='random', max_trials=5, **common_params))

best_result = run_tuner(timer, tuners[0], batch_size, max_epochs, callbacks)
# add best params to result dict and save current dict to file
save_best_result(best_results, best_result, tuners[0])

Trial 5 Complete [00h 00m 16s]
val_accuracy: 0.5099999904632568

Best val_accuracy So Far: 0.6899999976158142
Total elapsed time: 00h 02m 17s


In [None]:
# HYPERBAND
tuners.append(kt.Hyperband(project_name='hyperband', factor=3, hyperband_iterations=1, **common_params))

In [432]:
# BAYES
tuners.append(kt.BayesianOptimization(project_name='bayes', **common_params))

# GRID
tuners.append(kt.GridSearch(project_name='grid', **common_params))

In [446]:
batch_size = 128
max_epochs = 2

timer = Tuner_Timer()
best_results = {}
tuners = []

In [None]:
# run all optimizers
for tuner in tuners:
    
    # run the optimizer
    best_result = run_tuner(timer, tuner, batch_size, max_epochs, callbacks)
    # add best params to result dict and save current dict to file
    save_best_result(best_results, best_result, tuner)
    
    loss, accuracy, best_epoch = retrain_and_evaluate(tuners[0])
    
    best_results[tuners[0].project_name]['loss'] = loss
    best_results[tuners[0].project_name]['accuracy'] = accuracy
    best_results[tuners[0].project_name]['best_epoch'] = best_epoch
    
    print(f'Loss: {loss}\tAccuracy: {accuracy}')
    

In [474]:
best_results

{'random': {'duration': '00:02:17',
  'best_params': {'filters_1': 16,
   'filters_2': 32,
   'rate_1': 0.2,
   'learning_rate': 0.01}}}

In [466]:
tuners[0].results_summary(1)

Results summary
Results in start-11-08/random
Showing 1 best trials
Objective(name="val_accuracy", direction="max")

Trial 0 summary
Hyperparameters:
filters_1: 16
filters_2: 32
rate_1: 0.2
learning_rate: 0.01
Score: 0.6899999976158142


In [478]:
def retrain_and_evaluate(tuner):
    # get the object hp to rebuild a fresh model
    best_hps = tuners[0].get_best_hyperparameters()[0]
    # build a fresh model for retraining in order to find the point of overfitting
    model = hypermodel.build(best_hps)
    print('Finding best epoch')
    model.fit(dh.X_train[:1000], dh.y_train[:1000],
                batch_size=batch_size,
                epochs=max_epochs,
                validation_split=0.2,
                callbacks=callbacks,)

    # find best epoch since there seems no way to find this in the tuner
    best_epoch = np.argmin(model.history.history['val_loss'])
    # rebuild fresh model
    print(f'\nRetraining to best epoch: {best_epoch}')
    model = hypermodel.build(best_hps)
    # reterain on the entire set for the best epoch
    model.fit(dh.X_train[:1000], dh.y_train[:1000],
                batch_size=batch_size,
                epochs=best_epoch,)
    
    print('\nEvaluating model')
    loss, accuracy = model.evaluate(dh.X_test[:1000], dh.y_test[:1000])
    return loss, accuracy, best_epoch

In [479]:
loss, accuracy, best_epoch = retrain_and_evaluate(tuners[0])
best_results[tuners[0].project_name]['loss'] = loss
best_results[tuners[0].project_name]['accuracy'] = accuracy
best_results[tuners[0].project_name]['best_epoch'] = best_epoch
print(f'Loss: {loss}\tAccuracy: {accuracy}')

Finding best epoch
Epoch 1/2
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1s/step - accuracy: 0.2469 - loss: 29561.8145 - val_accuracy: 0.5100 - val_loss: 84.5610
Epoch 2/2
[1m7/7[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 1s/step - accuracy: 0.2824 - loss: 239.8521 - val_accuracy: 0.5100 - val_loss: 2.3600

Retraining to best epoch: 1
[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 1s/step - accuracy: 0.2930 - loss: 98111.7891 

Evaluating model
[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 78ms/step - accuracy: 0.5018 - loss: 1.5688
Loss: 1.5684082508087158	Accuracy: 0.4959999918937683


In [480]:
best_results

{'random': {'duration': '00:02:17',
  'best_params': {'filters_1': 16,
   'filters_2': 32,
   'rate_1': 0.2,
   'learning_rate': 0.01},
  'loss': 1.5684082508087158,
  'accuracy': 0.4959999918937683,
  'best_epoch': 1}}

In [None]:
def retrain_and_evaluate(params):
    '''
    rebuild a fresh model and retrain on the entire dataset
    
    # model =  build(params)
    # model.fit( ... )
    # model.evaluate( ... )
    return:
        evaluation metric
    '''

[cite]  
The Hyperband tuning algorithm uses adaptive resource allocation and early-stopping to quickly converge on a high-performing model. This is done using a sports championship style bracket. The algorithm trains a large number of models for a few epochs and carries forward only the top-performing half of models to the next round. Hyperband determines the number of models to train in a bracket by computing 1 + logfactor(max_epochs) and rounding it up to the nearest integer.

## Redundant code

In [182]:
d = {
     0:
      {'hyperband':
       {'r1':100,'r2':150},
      'bayes':
       {'r1':100,'r2':150}
      },
      1:
      {'hyperband':
       {'r1':100,'r2':150},
      'bayes':
       {'r1':100,'r2':150}
      }
     }
    
d[2] = {"hyperband":{}}
d[2]['hyperband'] = {'r1':999,'r2':999}
d

{0: {'hyperband': {'r1': 100, 'r2': 150}, 'bayes': {'r1': 100, 'r2': 150}},
 1: {'hyperband': {'r1': 100, 'r2': 150}, 'bayes': {'r1': 100, 'r2': 150}},
 2: {'hyperband': {'r1': 999, 'r2': 999}}}

### Results
Error "BiasGrad requires tensor size <= int32 max" with batch 256  
https://stackoverflow.com/questions/60414562/how-to-solve-the-biasgrad-requires-tensor-size-int32-max-invalidargumenterr