In [1]:
# Env using Python 3.10.14
# pip install notebook==5.7.5

# the keras libraries
# pip install tensorflow Version: 2.17.0
import tensorflow as tf

from tensorflow.python.keras import models, layers

# Version: 3.4.1
from tensorflow import keras

# pip install keras-tuner
import keras_tuner as kt




In [2]:
# other libraries
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt

# use for splitting the test and train data
from sklearn.model_selection import train_test_split

# for the creation of the cartesian product grid
from itertools import product

# for the timing of each test
import time

In [3]:
import math
import json

# Basic conv net to achieve statistical significance

In [4]:
class Data_Handling:
    '''
    requirements:
        math
        numpy
        train_test_split from sklearn.model_selection
        
    '''
    def __init__(self, output_size=5, batch_size=128, ):
        self.__shape = 0
        self.__output_size = output_size
        self.__length = 0
        
        
    def load_data(self, label_path, data_path, split=0.2):
        # load the images and labels
        self.__labels = np.load(label_path)
        self.__data = np.load(data_path)
        self.__length = self.__labels.shape[0]
        self.__shape = self.__data[1].shape
        

        print("Loaded files of size:")
        print(f"Images: {self.__data.shape}\nLabels: {self.__labels.shape}")
        
    def split_data(self, split=0.2):
        # split and shuffle the data and labels
        self.__X_train, self.__X_test, self.__y_train, self.__y_test = train_test_split(
            self.__data, self.__labels, test_size=split)
        
    
    def run_tuner(timer, tuner, max_epochs=50, batch_size=64, callbacks=[]):
        SPLIT = 0.2
        timer.start(test_name)

        tuner.search(
            self.__X_train, self.__y_train,
            batch_size=batch_size,
            epochs=max_epochs,
            validation_split=SPLIT,
            callbacks=callbacks,
        )
        # get and view the best performing hyper parameter set
        best_hps = tuner.get_best_hyperparameters()[0]

        test_duration = timer.stop(best_hps.values)
        best_hps.values

    @property
    def shape(self, ):
        return self.__shape
    
    @property
    def output_size(self, ):
        return self.__output_size
    
    @property
    def X_train(self, ):
        return self.__X_train
    
    @property
    def y_train(self, ):
        return self.__y_train
    
    @property
    def X_test(self, ):
        return self.__X_test
    
    @property
    def y_test(self, ):
        return self.__y_test
    
    @property
    def length(self, ):
        return self.__length

In [5]:
dh = Data_Handling()

label_path = 'mitdb_labels_reduced.npy'
data_path = 'mitdb_data_reduced.npy'

dh.load_data(label_path=label_path, data_path=data_path)

Loaded files of size:
Images: (14190, 281, 362, 1)
Labels: (14190,)


In [6]:
dh.split_data(split=0.2)
print(dh.X_train.shape, dh.X_test.shape)

(11352, 281, 362, 1) (2838, 281, 362, 1)


In [None]:
# must prefix keras. unlike in the book
def build_basic_model(shape, output_size, callbacks):
    
    inputs = keras.Input(shape=shape)   
    x = keras.layers.Conv2D(filters=2, kernel_size=3, activation="relu")(inputs) 
    x = keras.layers.MaxPooling2D(pool_size=2)(x)
    x = keras.layers.Flatten()(x)
    
    outputs = keras.layers.Dense(output_size, activation="softmax")(x)
    model = keras.Model(inputs=inputs, outputs=outputs)

    # compile the model
    model.compile(optimizer="rmsprop",
                  loss="sparse_categorical_crossentropy",
                  metrics=["accuracy"])

    return model

In [7]:
# set the params
max_epochs = 50
vaidation_split = 0.2
batch_size = 256

# create the callbacks
monitor = 'val_loss'
checkpoint_path = 'checkpoint_path.keras'

callbacks = [
    keras.callbacks.EarlyStopping(
        monitor=monitor, 
        patience=3
    ),
    
    keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_path, 
        monitor=monitor, 
        save_best_only=True
    )
]

In [None]:
# build fresh model
basic_model = build_basic_model(dh.shape, dh.output_size, callbacks)

# fit to the taining data
basic_model.fit(
    dh.X_train, 
    dh.y_train, 
    epochs=max_epochs, 
    validation_split=vaidation_split, 
    batch_size=batch_size, 
    callbacks=[callbacks]
)

In [None]:
# using the epoch as per the early stopping callback above
val_loss_history = basic_model.history.history['val_loss']
# get the index of the lowest recorded loss function (+ 1 to account for 0 idx)
best_epoch = np.argmin(val_loss_history) + 1


# build and train a fresh model for evaluation
basic_test_model = build_basic_model(dh.shape, dh.output_size, callbacks)
# fit model on the entire training set by removing the validation_split param
basic_model_history = basic_test_model.fit(
    dh.X_train, 
    dh.y_train, 
    epochs=best_epoch, 
    batch_size=batch_size, 
)

In [None]:
# evaluate performance on the unseen test data to see whether the basic model can beat 
# a the statistical significance calculated in the workbook [WORKBOOK]
basic_model.evaluate(X_test, y_test, batch_size=batch_size)

In [None]:
pred = basic_model.predict(X_test) 

In [None]:
p = np.argmax(pred, axis = 1)[:5] 
r = y_test[:5]
print(p,r)

In [None]:
model.evaluate(X_test, y_test, batch_size=batch_size)

In [None]:
model.history.history

## Grid search using keras library functinon

In [8]:
class HyperModel(kt.HyperModel):
    
    def __init__(self, num_classes, shape, filter_step=16, lr_step=0.1 ):

            self.__num_classes = num_classes
            self.__shape = shape
            self.__filter_step = filter_step
            self.__lr_step = lr_step
            
    def build(self, hp):

        filters_1 = hp.Int(name="filters_1", min_value=16, max_value=32, step=self.__filter_step) 
        filters_2 = hp.Int(name="filters_2", min_value=filters_1, max_value=64, step=self.__filter_step)
        # allows a zero setting 
        rate_1 = hp.Float(name="rate_1", min_value=0, max_value=0.5, step=self.__lr_step)
        
        inputs = keras.Input(shape=self.__shape)
        x = keras.layers.Conv2D(filters=filters_1, kernel_size=3, activation="relu")(inputs) 
        x = keras.layers.MaxPooling2D(pool_size=2)(x)
        x = keras.layers.Dropout(rate=rate_1)(x)
        x = keras.layers.Conv2D(filters=filters_2, kernel_size=3, activation="relu")(x) 
        x = keras.layers.MaxPooling2D(pool_size=2)(x)

        x = keras.layers.Flatten()(x)
        
        outputs = keras.layers.Dense(self.__num_classes, activation="softmax")(x)
        model = keras.Model(inputs=inputs, outputs=outputs)
        
        learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
        
        model.compile(
            optimizer=keras.optimizers.RMSprop(learning_rate=learning_rate),
            loss="sparse_categorical_crossentropy",
            metrics=["accuracy"])

        return model

In [9]:
class Tuner_Timer:
    
    def __init__(self):
        self.__start = None
        self.__end = None
        self.__duration = None
#         self.__results = {}
#         self.__tuners = []
#         self.__common_params = common_params
#         self.__callbacks = []
        
    def add_tuner(self, tuner):
        self.__tuners.append(tuner)
        
    def start(self):
        '''
        start the timer 
        '''
        self.__start = time.time()
        
    def stop(self):
        '''
        stop the timer and format the duration
        '''
        self.__end = time.time()
        self.__duration = self.__end - self.__start
        duration_string = time.strftime('%H:%M:%S', time.gmtime(self.__duration))

        return duration_string

In [10]:
def save_best_result(results_dict, results, tuner):

#     if tuner.project_name in results_dict.keys():
        
#         results_dict[tuner.project_name].update(results)
# #         print(results)
#     else:
#         results_dict[tuner.project_name] = results
        
    #save the current result to file
    f = open(f"{tuner.directory}/results.json", "w")

    json.dump(results, f, indent = 6)

    f.close()

In [11]:
def run_tuner(timer, tuner, batch_size, max_epochs, callbacks):
        '''
        run a given tuner saving the best parameter configuration to a timer object
        along with the total duration of the optimizer's run

        params:
            timer (Tuner_Timer) the timer used for recording results
            tuner (keras.Tuner) the current tuner under test 
            batch_size (int) batch for training 
            max_epochs (int) the maximum number of epochs to run if not stopped by early stopping
            callbacks (keras.callbacks) for early stopping
        '''
        # start timer
        print(f'Running optimizer {tuner.project_name}')
        timer.start()
        # the tuner will save results to the directory specified in the tuner constructor
        tuner.search(
            dh.X_train, dh.y_train,
            batch_size=batch_size,
            epochs=max_epochs,
            validation_split=0.2,
            callbacks=callbacks,
        )

        # get the best performing hyper parameter set
        best_hps = tuner.get_best_hyperparameters()[0].values
        # stop timing and get the duration
        test_duration = timer.stop()
        
        return {'duration':test_duration, 'best_params':best_hps}
#         return {tuner.tuner_id:{'duration':'00:00:10', 'best_params':{'1':100, '2':200}}}

In [12]:
def retrain_and_evaluate(tuner):
    # get the object hp to rebuild a fresh model
    best_hps = tuners[0].get_best_hyperparameters()[0]
    # build a fresh model for retraining in order to find the point of overfitting
    model = hypermodel.build(best_hps)
    print('Finding best epoch')
    model.fit(dh.X_train[:1000], dh.y_train[:1000],
                batch_size=batch_size,
                epochs=max_epochs,
                validation_split=0.2,
                callbacks=callbacks,)

    # find best epoch since there seems no way to find this in the tuner
    best_epoch = np.argmin(model.history.history['val_loss'])
    # rebuild fresh model
    print(f'\nRetraining to best epoch: {best_epoch}')
    model = hypermodel.build(best_hps)
    # reterain on the entire set for the best epoch
    model.fit(dh.X_train, dh.y_train,
                batch_size=batch_size,
                epochs=best_epoch,)
    
    print('\nEvaluating model')
    loss, accuracy = model.evaluate(dh.X_test[:1000], dh.y_test[:1000])
    return loss, accuracy, best_epoch

In [40]:
hypermodel = HyperModel(num_classes=dh.output_size, shape=dh.shape, filter_step=16)
now = str(datetime.datetime.now())[5:10]

directory = f"start-{now}"
print(directory)

common_params = {
    'hypermodel': hypermodel, 
    'objective': "val_accuracy", 
    'executions_per_trial':1,
    'directory':directory,
    'tuner_id':'test_03',
    'overwrite':False,
}

callbacks = [
    keras.callbacks.EarlyStopping(monitor="val_loss", patience=2),
]

start-08-22


In [41]:
batch_size = 128
max_epochs = 30

timer = Tuner_Timer()
best_results = {}
tuners = []

In [42]:
# RANDOM
tuners.append(kt.RandomSearch(project_name='random', max_trials=40, **common_params))

# HYPERBAND
tuners.append(kt.Hyperband(project_name='hyperband', factor=3, hyperband_iterations=1, **common_params))

# BAYES
tuners.append(kt.BayesianOptimization(project_name='bayes', **common_params))

# GRID
tuners.append(kt.GridSearch(project_name='grid', **common_params))

In [43]:

# run all optimizers
for tuner in tuners:
    
    # run the optimizer
    best_result = run_tuner(timer, tuner, batch_size, max_epochs, callbacks)
    
#     # add best params to result dict and save current dict to file
#     save_best_result(best_results, best_result, tuner)
    if tuner.project_name in best_results.keys():
        
        best_results[tuner.project_name].update(best_result)
#         print(results)
    else:
        best_results[tuner.project_name] = best_result
    
    loss, accuracy, best_epoch = retrain_and_evaluate(tuners[0])
    
    best_results[tuner.project_name]['loss'] = loss
    best_results[tuner.project_name]['accuracy'] = accuracy
    #convert from int64 to make serializable
    best_results[tuner.project_name]['best_epoch'] = int(best_epoch)
    
     #save the current result to file
    f = open(f"{tuner.directory}/results.json", "w")

    json.dump(best_results, f, indent = 6)

    f.close()
    
    print(f'Loss: {loss}\tAccuracy: {accuracy}')
    

Trial 26 Complete [00h 06m 59s]
val_accuracy: 0.5741963982582092

Best val_accuracy So Far: 0.9625715613365173
Total elapsed time: 08h 32m 17s

Search: Running Trial #27

Value             |Best Value So Far |Hyperparameter
16                |32                |filters_1
32                |32                |filters_2
0                 |0                 |rate_1
0.0001            |0.0001            |learning_rate

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30

IOPub message rate exceeded.
The notebook server will temporarily stop sending output
to the client in order to avoid crashing it.
To change this limit, set the config variable
`--NotebookApp.iopub_msg_rate_limit`.

Current values:
NotebookApp.iopub_msg_rate_limit=1000.0 (msgs/sec)
NotebookApp.rate_limit_window=3.0 (secs)



In [None]:
best_results

In [None]:
tuners[0].results_summary(1)

In [27]:
import datetime
now = datetime.datetime.now()
str(now)[:10]

'2024-08-19'

[cite]  
The Hyperband tuning algorithm uses adaptive resource allocation and early-stopping to quickly converge on a high-performing model. This is done using a sports championship style bracket. The algorithm trains a large number of models for a few epochs and carries forward only the top-performing half of models to the next round. Hyperband determines the number of models to train in a bracket by computing 1 + logfactor(max_epochs) and rounding it up to the nearest integer.

## Redundant code

In [None]:
d = {
     0:
      {'hyperband':
       {'r1':100,'r2':150},
      'bayes':
       {'r1':100,'r2':150}
      },
      1:
      {'hyperband':
       {'r1':100,'r2':150},
      'bayes':
       {'r1':100,'r2':150}
      }
     }
    
d[2] = {"hyperband":{}}
d[2]['hyperband'] = {'r1':999,'r2':999}
d

### Results
Error "BiasGrad requires tensor size <= int32 max" with batch 256  
https://stackoverflow.com/questions/60414562/how-to-solve-the-biasgrad-requires-tensor-size-int32-max-invalidargumenterr