# CWT hyperparameters tuning

Use GPU T4 x 2  
When using GP100, there are XLA errors.  

5 channels (LT, RT, LP, RP, C).

Implementing tf.keras.metrics.KLDivergence().



In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
import keras
from keras import layers, regularizers
import keras_tuner as kt
import sys
import os

# Sets off SettingWithCopyWarning.
pd.set_option('mode.chained_assignment', None)


# ----------------------------------------
# Flags for working on my different machines.
# flag_kaggle = True
# flag_FW = True
flag_LN = True

try:
    if flag_kaggle:
        sys.path.insert(0, '/kaggle/input/hms-lib')
        base_dir = '/kaggle/input/hms-harmful-brain-activity-classification'
        devset_dir = '/kaggle/input/hms-cwt-scalograms-single-numpy-v1'
        output_dir = ''
except:
    pass 

try:
    if flag_FW:
        sys.path.insert(0, '../lib')
        base_dir = '../../kaggle_data/hms'
        devset_dir = '../data'
        output_dir = 'results/'
except:
    pass 

try:
    if flag_LN:
        sys.path.insert(0, '../lib')
        base_dir = '../../data/hms'
        devset_dir = '../data'
        output_dir = 'results/'
except:
    pass 
# ----------------------------------------

from KLmetric import score

path_train = [
    f'{devset_dir}/05_single_cwt_v3_10s_reduced_part1.npy',
    f'{devset_dir}/05_single_cwt_v3_10s_reduced_part2.npy',
    f'{devset_dir}/05_single_cwt_v3_10s_reduced_part3.npy',
    f'{devset_dir}/05_single_cwt_v3_10s_reduced_part4.npy']
path_val = f'{devset_dir}/05_single_cwt_v3_10s_reduced_part5.npy'
path_train_items = [
    f'{devset_dir}/05_single_cwt_v3_10s_reduced_part1_items.npy',
    f'{devset_dir}/05_single_cwt_v3_10s_reduced_part2_items.npy',
    f'{devset_dir}/05_single_cwt_v3_10s_reduced_part3_items.npy',
    f'{devset_dir}/05_single_cwt_v3_10s_reduced_part4_items.npy']
path_val_items = f'{devset_dir}/05_single_cwt_v3_10s_reduced_part5_items.npy'

2024-04-01 21:49:48.635036: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Data generators

In [3]:
#
# Data generator for training.
#
# training set splitted in 4 parts
# coefficients of cwt's arrays
# 5 channels (LP, RP, LT, RP, C)
#

class DataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, path_to_items, path_to_data, batch_size=32, n_classes=2, shuffle=True):
        ''' Initialization
        item: [eeg_id, eeg_sub_id, idx in sgrams (1st index), target,
        seizure_vote, lpd_vote, gpd_vote, lrda_vote,
        grda_vote, other_vote]
        '''
        self.n_channels = 5
        self.path_to_data = path_to_data
        self.j = -1
        self.items = {}
        self.items[0] = np.load(path_to_items[0])
        self.items[1] = np.load(path_to_items[1])
        self.items[2] = np.load(path_to_items[2])
        self.items[3] = np.load(path_to_items[3])
        self.batch_size = batch_size
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.ceil(self.len / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        # Generate data
        X, y = self.__data_generation(indexes)
        return X, y

    def get_dim(self):
        'Dimensions for the input layer.'
        return (self.dim[0], self.dim[1], self.n_channels)

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.j = self.j + 1
        if self.j == 4:
            self.j = 0
        self.data = np.load(self.path_to_data[self.j])
        self.len = self.data.shape[0]
        self.dim = (self.data.shape[1], self.data.shape[2])
        self.indexes = np.arange(self.len)
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, indexes):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        true_size = len(indexes)
        X = np.empty((true_size, *self.dim, self.n_channels))
        # y = np.empty((true_size, self.n_classes), dtype=float)
        y = np.empty((true_size), dtype=float)

        # Generate data
        for i, idx in enumerate(indexes):
            item = self.items[self.j][idx]
            # print(item)  # Uncomment for testing.
            X[i,:,:,:] = self.data[idx, :, :, :]
            # Store solution
            y[i] = item[3]

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)


#
# Data generator for validating.
#
# 4 parts
# coefficients of cwt's arrays
# 5 channels (LP, RP, LT, RP, C)
#

class ValDataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, path_to_items, path_to_data, batch_size=32, n_classes=2, shuffle=True):
        ''' Initialization
        item: [eeg_id, eeg_sub_id, idx in sgrams (1st index), target,
        seizure_vote, lpd_vote, gpd_vote, lrda_vote,
        grda_vote, other_vote]
        '''
        self.n_channels = 5
        # self.n_freqs = 40

        self.data = np.load(path_to_data)
        self.items = np.load(path_to_items)
        self.dim = (self.data.shape[1], self.data.shape[2])
        self.batch_size = batch_size
        self.len = self.items.shape[0]
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.ceil(self.len / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Generate data
        X, y = self.__data_generation(indexes)

        return X, y

    def get_dim(self):
        'Dimensions for the input layer.'
        return (self.dim[0], self.dim[1], self.n_channels)

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(self.len)
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, indexes):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        true_size = len(indexes)
        X = np.empty((true_size, *self.dim, self.n_channels))
        # y = np.empty((true_size, self.n_classes), dtype=float)
        y = np.empty((true_size), dtype=float)

        # Generate data
        for i, idx in enumerate(indexes):
            item = self.items[idx]
            # print(item)  # Uncomment for testing.
            X[i,:,:,:] = self.data[idx, :, :, :]
            # Store solution
            y[i] = item[3]

        return X, keras.utils.to_categorical(y, num_classes=self.n_classes)


#
# Test Data generator for predicting
# 

class TestDataGenerator(keras.utils.Sequence):
    'Generates data for Keras'
    def __init__(self, path_to_items, path_to_data, batch_size=32, n_classes=2, shuffle=False):
        ''' Initialization
        item: [eeg_id, eeg_sub_id, idx in sgrams (1st index), target,
        seizure_vote, lpd_vote, gpd_vote, lrda_vote,
        grda_vote, other_vote]
        '''
        self.n_channels = 5
        self.data = np.load(path_to_data)
        self.items = np.load(path_to_items)
        self.dim = (self.data.shape[1], self.data.shape[2])
        self.batch_size = batch_size
        self.len = self.data.shape[0]
        self.n_classes = n_classes
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.ceil(self.len / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Generate data
        X = self.__data_generation(indexes)

        return X

    def get_dim(self):
        'Dimensions for the input layer.'
        return (self.dim[0], self.dim[1], self.n_channels)

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(self.len)
        # pass 
        
    def __data_generation(self, indexes):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        true_size = len(indexes)
        X = np.empty((true_size, *self.dim, self.n_channels))

        # Generate data
        for i, idx in enumerate(indexes):
            # item = self.items[idx]
            # print(item)  # Uncomment for testing.
            X[i,:,:,:] = self.data[idx, :, :, :]

        return X


In [4]:
# Parameters
params = {
    'batch_size': 32,
    'n_classes': 2,
    'shuffle': True
    }

training_generator = DataGenerator(path_train_items, path_train , **params)
validation_generator = ValDataGenerator(path_val_items, path_val, **params)


## HP tuning

In [6]:
input_shape = training_generator.get_dim()
print('input shape: ',input_shape)
num_classes = 2

def model_builder(hp):
    model = keras.Sequential()
    model.add(keras.Input(shape=input_shape))

    for i in range(hp.Int("num_layers", 1, 3)):
        model.add(layers.Conv2D(
        filters=hp.Int(f"units_{i}", min_value=32, max_value=64, step=32),
        kernel_size = hp.Choice(f"kernel_{i}", [3,5]),
        data_format="channels_last",
        activation='relu', padding="same"))
        if hp.Boolean(f"dropout_{i}"):
            model.add(layers.Dropout(rate=0.25))
        if hp.Boolean(f"pooling_{i}"):
            model.add(layers.MaxPooling2D((3, 3)))

    if hp.Boolean(f"normalization"):
        model.add(layers.BatchNormalization())
    
    model.add(layers.Flatten())
    model.add(layers.Dense(
            units=hp.Int(f"units_{i}", min_value=32, max_value=512, step=32),
            activation=hp.Choice("activation", ["relu", "tanh"]),
            )
        )

    model.add(layers.Dense(num_classes, activation='softmax'))

    # learning_rate = hp.Float("lr", min_value=1e-4, max_value=1e-2, sampling="log")
    learning_rate = hp.Choice('learning_rate', values=[1e-2, 1e-3, 1e-4])
    momentum = hp.Choice('momentum', values=[0.2, 0.1, 0.01, 0.005])

    opt = keras.optimizers.SGD(
        learning_rate=learning_rate,
        momentum=momentum,
        )
    
    model.compile(
        optimizer=opt,
        # loss=tf.keras.losses.KLDivergence(),
        loss='categorical_crossentropy',
        # metrics=[tf.keras.metrics.KLDivergence()])
        metrics=[tf.keras.metrics.AUC()])    

    return model


input shape:  (49, 400, 5)


In [7]:
tuner = kt.Hyperband(model_builder,
                     # objective=kt.Objective("val_kullback_leibler_divergence", direction="min"),
                     objective=kt.Objective("val_auc", direction="max"),  # In Kaggle.
                     max_epochs=30,
                     factor=3,
                     directory=output_dir,
                     project_name='cwt_v2',
                     overwrite=True)


In [6]:
# tuner = kt.RandomSearch(
#     hypermodel=model_builder,
#     # objective="val_accuracy",
#     objective=kt.Objective("val_mean_absolute_error", direction="min"),
#     max_trials=3,
#     executions_per_trial=2,
#     overwrite=True,
#     directory="my_dir",
#     project_name="helloworld",
# )


In [7]:
# tuner.search_space_summary()

In [8]:
stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_auc', patience=5)


In [9]:
tuner.search(training_generator, epochs=10, validation_data=validation_generator, callbacks=[stop_early])

Trial 1 Complete [00h 00m 17s]
val_auc: 0.8197604417800903

Best val_auc So Far: 0.8197604417800903
Total elapsed time: 00h 00m 17s

Search: Running Trial #2

Value             |Best Value So Far |Hyperparameter
3                 |2                 |num_layers
64                |32                |units_0
5                 |3                 |kernel_0
False             |False             |dropout_0
True              |False             |pooling_0
True              |True              |normalization
tanh              |relu              |activation
0.001             |0.0001            |learning_rate
0.1               |0.2               |momentum
32                |32                |units_1
5                 |3                 |kernel_1
True              |False             |dropout_1
True              |False             |pooling_1
2                 |2                 |tuner/epochs
0                 |0                 |tuner/initial_epoch
3                 |3                 |tuner/bracket


KeyboardInterrupt: 

In [10]:
models = tuner.get_best_models()
best_model = models[0]
best_model.summary()


In [11]:
tuner.results_summary(num_trials=1)

Results summary
Results in ./cwt_v1
Showing 1 best trials
Objective(name="val_kl_divergence", direction="min")

Trial 0050 summary
Hyperparameters:
num_layers: 2
units_0: 64
kernel_0: 3
dropout_0: False
pooling_0: True
normalization: True
activation: tanh
learning_rate: 0.01
momentum: 0.005
units_1: 64
kernel_1: 5
dropout_1: False
pooling_1: False
units_2: 64
kernel_2: 5
dropout_2: False
pooling_2: False
tuner/epochs: 30
tuner/initial_epoch: 10
tuner/bracket: 3
tuner/round: 3
tuner/trial_id: 0046
Score: 0.5162684321403503


## Retrain the model

In [12]:
checkpoint_filepath = f'{output_dir}checkpoint-13-1.model.keras'
model_checkpoint_callback = keras.callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    monitor='val_auc',
    mode='max',
    save_weights_only=False,
    save_best_only=True)

best_hps=tuner.get_best_hyperparameters()[0]

model = tuner.hypermodel.build(best_hps)

history = model.fit(training_generator, epochs=5,
          validation_data=validation_generator,
          callbacks=[model_checkpoint_callback])


Epoch 1/5
[1m381/381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 35ms/step - kl_divergence: 1.4914 - loss: 1.4914 - val_kl_divergence: 1.1206 - val_loss: 1.1202
Epoch 2/5
[1m381/381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 30ms/step - kl_divergence: 1.0690 - loss: 1.0690 - val_kl_divergence: 0.9316 - val_loss: 0.9375
Epoch 3/5
[1m381/381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 30ms/step - kl_divergence: 0.8802 - loss: 0.8802 - val_kl_divergence: 0.8261 - val_loss: 0.8321
Epoch 4/5
[1m381/381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 29ms/step - kl_divergence: 0.6723 - loss: 0.6723 - val_kl_divergence: 0.6916 - val_loss: 0.6916
Epoch 5/5
[1m381/381[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 30ms/step - kl_divergence: 0.4611 - loss: 0.4610 - val_kl_divergence: 0.6016 - val_loss: 0.6014
