# Keras Hyperparameter tuning

In this notebook, we explored the best parameters for the dense layers using the Hyperband tuning algorithm implemented in Keras. However, we did not use these predictions as they did not perform as expected during the fine-tuning phase. 

We also made a mistake by not exploring the HeUniform initialization distribution strategy for ReLU dense layers. Instead, we only explored the Glorot Uniform, which is more suitable for sigmoid activation layers.

The keras tuner here uses the Hyperband algorithm, and to implement it we followed the implementation provided in https://keras.io/api/keras_tuner/tuners/hyperband/

## Tuning Dense Layers of ConvNext

In [None]:
from tensorflow.keras import regularizers, initializers

SEED = 42

def model_builder(hp):
        
    input = tf.keras.Input(shape=IMG_SHAPE)
    
    x = tf.keras.layers.Resizing(96, 96, interpolation='bicubic', name='resizing')(input)
    
    # Add data augmentation layer here
    x = data_augmentation(x)  
    x = model_cnn(x)  
    
    # Use a hyperparameter to decide.
    hp_dropout = hp.Float('dropout', min_value=0.2, max_value=0.5, step = 0.05)   
    
    #Choice between Globalaverage pooling and Flatten.
    #In case it is Flatten, we also include the dropout
    
    if hp.Choice('pooling', ['flatten', 'global']) == 'flatten':
        x = tf.keras.layers.Flatten()(x)
        x = tf.keras.layers.Dropout(hp_dropout)(x)
    else:
        x = tf.keras.layers.GlobalAveragePooling2D()(x)
    
    # Add a hyperparameter for the regularization type
    hp_reg_type = hp.Choice('reg_type', ['l1', 'l2'])
    
    # Add a hyperparameter for the regularization strength
    hp_reg_strength = hp.Float('reg_strength', min_value=0.0, max_value=0.1, step=0.02)
    
    # Add a hyperparameter for the initialization distribution
    hp_init_distribution = hp.Choice('init_distribution', [ 'glorot_uniform', 'glorot_normal'])
    
    
    for i in range(hp.Int('n_layers',1,3)):
        if hp_reg_type == 'l1':
            reg = regularizers.l1(hp_reg_strength)
        else:
            reg = regularizers.l2(hp_reg_strength)
        
        if hp_init_distribution == 'uniform':
            init = initializers.RandomUniform(seed=SEED)
            
        elif hp_init_distribution == 'normal':
            init = initializers.RandomNormal(seed=SEED)
            
        elif hp_init_distribution == 'glorot_uniform':
            init = initializers.GlorotUniform(seed=SEED)
        else:
            init = initializers.GlorotNormal(seed=SEED)
        
        #Add  variable number of dense layers, from 128 to 1024
        x = layers.Dense(units=hp.Int(f'units_{i}', min_value=128, max_value=1024, step=128),
                         kernel_regularizer=reg, kernel_initializer=init)(x)
        
        #In this case, we add batch normalization, then use relu as activation
        x = layers.BatchNormalization()(x)
        x = layers.Activation('relu')(x)
        
        #The dropout will depend on the tuner
        x = layers.Dropout(hp_dropout)(x)       
    
    #The last layer will always be a dense layer with sigmoid activation
    outputs = tf.keras.layers.Dense(1, activation='sigmoid', kernel_initializer=init)(x)

    model = keras.Model(input, outputs)

    # best learning rate
    hp_learning_rate = hp.Choice('learning_rate',values=[1e-2,1e-3,1e-4])
    
    # Define the hyperparameter for the optimizer
    hp_optimizer = hp.Choice('optimizer', values=['adam', 'rmsprop'])
    
    #Always use 
    model.compile(loss=keras.losses.BinaryCrossentropy(), optimizer=hp_optimizer, metrics=['binary_accuracy'])
    
    return model

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_binary_accuracy', 
                                              patience=10)

tuner = kt.Hyperband(model_builder, 
                     objective='val_binary_accuracy', 
                     directory="Tuner Results") 

tuner.search(training_set, epochs = 20, 
             validation_data = validation_set, 
             class_weight = class_weights_dict,
             callbacks=[stop_early])     

# Get the best model  
models = tuner.get_best_models(num_models=1)  
model = models[0]  

Best hyperparameters:
- Activation: leaky_relu
- Dropout: 0.4
- Pooling: global
- Regularization Type: l1
- Regularization Strength: 0.0
- Initialization Distribution: glorot_uniform
- Number of Layers: 3
- Learning Rate: 0.0001
- Optimizer: adam

Units in layer 1:128
Units in layer 2:1024
Units in Layer 3:768

## Tuning Dense Layers of EfficientNet

In [None]:
from tensorflow.keras import regularizers, initializers

SEED = 42

def model_builder(hp):
        
    input = tf.keras.Input(shape=IMG_SHAPE)
    x = tf.keras.layers.Resizing(96, 96, interpolation='bicubic', name='resizing')(input)
    
    x = data_augmentation(x)  # Add data augmentation layer here
    x = model_cnn(x)    
    
    # Use a hyperparameter to decide.
    hp_activation = hp.Choice('activation', values = ['relu', 'swish','gelu', 'leaky_relu'])
    hp_dropout = hp.Float('dropout', min_value=0.2, max_value=0.5, step = 0.05)   
    if hp.Choice('pooling', ['flatten', 'global']) == 'flatten':
        x = tf.keras.layers.Flatten()(x)
        x = tf.keras.layers.Dropout(hp_dropout)(x)
    else:
        x = tf.keras.layers.GlobalAveragePooling2D()(x)
    
    # Add a hyperparameter for the regularization type
    hp_reg_type = hp.Choice('reg_type', ['l1', 'l2'])
    
    # Add a hyperparameter for the regularization strength
    hp_reg_strength = hp.Float('reg_strength', min_value=0.0, max_value=0.1, step=0.01)
    
    # Add a hyperparameter for the initialization distribution
    hp_init_distribution = hp.Choice('init_distribution', ['uniform', 'normal', 'glorot_uniform', 'glorot_normal'])
    
    for i in range(hp.Int('n_layers',1,5)):
        if hp_reg_type == 'l1':
            reg = regularizers.l1(hp_reg_strength)
        else:
            reg = regularizers.l2(hp_reg_strength)
        
        if hp_init_distribution == 'uniform':
            init = initializers.RandomUniform(seed=SEED)
        
        elif hp_init_distribution == 'normal':
            init = initializers.RandomNormal(seed=SEED)
        
        elif hp_init_distribution == 'glorot_uniform':
            init = initializers.GlorotUniform(seed=SEED)
        else:
            init = initializers.GlorotNormal(seed=SEED)
        
        x = layers.Dense(units=hp.Int(f'units_{i}', min_value=128, max_value=1024, step=128),
                         kernel_regularizer=reg, kernel_initializer=init)(x)
        x = layers.BatchNormalization()(x)
        x = layers.Activation(hp_activation)(x)
        x = layers.Dropout(hp_dropout)(x)       
    
    outputs = tf.keras.layers.Dense(1, activation='sigmoid', kernel_initializer=init)(x)

    model = keras.Model(input, outputs)

    # best lr
    hp_learning_rate = hp.Choice('learning_rate',values=[1e-2,1e-3,1e-4])
    # Define the hyperparameter for the optimizer
    hp_optimizer = hp.Choice('optimizer', values=['adam', 'rmsprop', 'adagrad'])
    model.compile(loss=keras.losses.BinaryCrossentropy(), optimizer=hp_optimizer, metrics=['binary_accuracy'])
    
    return model

stop_early = tf.keras.callbacks.EarlyStopping(monitor='val_binary_accuracy', patience=10)
tuner = kt.Hyperband(model_builder, objective='val_binary_accuracy', directory="Tuner Results",)
tuner.search(training_set, epochs = 20, validation_data = validation_set, class_weight = class_weights_dict,callbacks=[stop_early])                  
# Get the best model  
models = tuner.get_best_models(num_models=1)  
model = models[0]  



Best hyperparameters:
- Activation: gelu
- Dropout: 0.35000000000000003
- Pooling: global
- Regularization Type: l1
- Regularization Strength: 0.0
- Initialization Distribution: glorot_normal
- Number of Layers: 1
- Learning Rate: 0.001
- Optimizer: rmsprop

Units in the layers:
- Units in layer 1: 768
- Units in layer 2:0
- Units in layer 3: 0

