## This code performs hyperparameter optimization for each of the CNN model trained on various datasets

### The hyperparameters optimized with random search were:

- Adam learning rate $\alpha$
- Batch size

Random searches performed starts with coarse tuning to $\alpha$, followed by successive fine tuning until the best $\alpha$ resulting in lowest validation error were found.

In [None]:
import sys
import gc
import os, shutil
import tempfile
from os import listdir
from random import randint

import numpy as np
import pandas as pd

import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.pyplot import imshow
import seaborn as sns
from PIL import Image

from keras import backend as K

import tensorflow as tf
tf.compat.v1.disable_eager_execution()
from tensorflow.keras import models
from tensorflow.keras.preprocessing import image
from tensorflow.keras import mixed_precision, regularizers
from tensorflow.keras.metrics import top_k_categorical_accuracy
from tensorflow.keras.layers import Input, Add, Dropout, Dense, Activation, ZeroPadding2D, BatchNormalization, Flatten, Conv2D, AveragePooling2D, MaxPooling2D, GlobalMaxPooling2D
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.initializers import random_uniform, glorot_uniform, constant, identity, he_normal
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, CSVLogger
from tensorflow.keras.losses import CategoricalCrossentropy
from tensorflow.keras.applications import InceptionV3, Xception, MobileNetV3Large,EfficientNetB0,EfficientNetV2B0
from resnet import resnet18

from sklearn.metrics import classification_report,confusion_matrix, matthews_corrcoef
from sklearn.utils.class_weight import compute_class_weight

In [None]:
plt.set_cmap('gray')
pd.set_option('precision', 3)

In [None]:
for gpu in tf.config.list_physical_devices("GPU"):
    tf.config.experimental.set_memory_growth(gpu, True)

### Initial settings for hyperparameter and usage of *mixed precision* from NVIDIA CUDA/CU-DNN

In [None]:
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_global_policy(policy)
    
es = EarlyStopping(monitor='val_accuracy', mode='max', min_delta=0.005, patience = 6,  restore_best_weights=True)

### Data Generator Utility

In [None]:
def create_datagen(train_path, val_path, test_path, target_size = (256,256), batch_size = 16, efficient = False):
    if efficient:
        train_datagen = image.ImageDataGenerator(
            rescale = 1.,
        )
    else:
        train_datagen = image.ImageDataGenerator(
            rescale = 1./255,
        )

    train_generator = train_datagen.flow_from_directory(
        train_path,
        target_size=target_size,
        batch_size= batch_size,
        color_mode="rgb",
        class_mode='categorical',
        shuffle = True
    )

    validation_generator = train_datagen.flow_from_directory(
        val_path,
        target_size=target_size,
        batch_size= batch_size,
        color_mode="rgb",
        class_mode='categorical',
        shuffle = False
    )
    
    test_generator = train_datagen.flow_from_directory(
        test_path,
        target_size=target_size,
        batch_size= batch_size,
        color_mode="rgb",
        class_mode='categorical',
        shuffle = False
    )
    return train_generator, validation_generator, test_generator

In [None]:
initializer = tf.keras.initializers.HeNormal()

def add_regularization(model, regularizer = regularizers.l2(0.01)):
    if not isinstance(regularizer, regularizers.Regularizer):
        print("Regularizer must be a subclass of tf.keras.regularizers.Regularizer")
        return model

    for layer in model.layers:
        for attr in ['kernel_regularizer']:
            if hasattr(layer, attr):
                setattr(layer, attr, regularizer)

    model_json = model.to_json()

    tmp_weights_path = os.path.join(tempfile.gettempdir(), 'tmp_weights.h5')
    model.save_weights(tmp_weights_path)

    model = models.model_from_json(model_json)
    
    model.load_weights(tmp_weights_path, by_name=True)
    return model

### Utility for Model Creation

In [None]:
def make_Inception():
    base = InceptionV3(
        include_top = False,
        weights = "imagenet",
        input_shape = (256, 256, 3),
        pooling = "max",
        classes = 4,
        classifier_activation="softmax",
    )
    out = Dropout(0.4)(base.output)
    out = Dense(32, activation='relu', kernel_initializer=initializer)(out)
    out = BatchNormalization()(out)
    out = Dropout(0.4)(out)
    out = Dense(4, activation='softmax', kernel_initializer="glorot_uniform")(out)

    model = Model(inputs = base.input,outputs=out)
    add_regularization(model)

    return model

In [None]:
def make_Xception():  
    base = Xception(
        include_top = False,
        weights = "imagenet",
        input_shape = (256, 256, 3),
        pooling = "max"
    )
    out = Dropout(0.5)(base.output)
    out = Dense(32, activation='relu', kernel_initializer=initializer)(out)
    out = BatchNormalization()(out)
    out = Dropout(0.5)(out)
    out = Dense(4, activation='softmax', kernel_initializer="glorot_uniform")(out)

    model = Model(inputs = base.input,outputs=out)
    add_regularization(model)
    
    return model

In [None]:
def make_mobileNet():
    base = MobileNetV3Large(
        input_shape= (256, 256, 3),
        alpha=1.0,
        include_top = False,
        weights= "imagenet",
        classes = 4,
        pooling = "max",
        dropout_rate = 0.3,
    )
    out = Dropout(0.5)(base.output)
    out = Dense(32, activation='relu', kernel_initializer=initializer)(out)
    out = BatchNormalization()(out)
    out = Dropout(0.5)(out)
    out = Dense(4, activation='softmax', kernel_initializer="glorot_uniform")(out)

    model = Model(inputs = base.input,outputs=out)

    return model

In [None]:
def make_EfficientB0():
    base = EfficientNetB0(
        include_top = False,
        weights = "imagenet",
        input_shape = (256, 256, 3),
        pooling = "max",
        classes = 4
    )
    out = Dropout(0.5)(base.output)
    out = Dense(64, activation='relu', kernel_initializer=initializer)(out)
    out = BatchNormalization()(out)
    out = Dropout(0.5)(out)
    out = Dense(4, activation='softmax', kernel_initializer="glorot_uniform")(out)

    model = Model(inputs = base.input,outputs=out)
    add_regularization(model)

    return model

### Define dataset paths and generator object

In [None]:
train_path = 'train_paths'
val_path = 'val_paths'
test_path = 'test_paths'

In [None]:
train_generator, validation_generator, test_generator = create_datagen(train_path, val_path, test_path,
                                                                       batch_size = batch,efficient = False)

In [None]:
x_batch, y_batch = next(train_generator)

plt.figure(figsize=(16, 32))
for k, (img, lbl) in enumerate(zip(x_batch, y_batch)):
    plt.subplot(8, 4, k+1)#4 rows with 8 images.
    plt.title(str(lbl))
    plt.imshow(img)

### Optimization for Model HQ,HPS,RSS - Inc, Xcp, Mbl, Eff - Random Search

In [None]:
best_hyperparam = (100, 0, 0)
epochs = 8

lr_trial = [1e-2, 1e-3, 1e-4, 5e-5]
batch_trial = [8, 16, 32]

trial = 10

#Round 1 Random Search (Coarse)
for _ in range(trial):
    lr = lr_trial[randint(0,3)]
    batch = batch_trial[randint(0,2)]
    print(lr, batch)
    train_generator, validation_generator, test_generator = create_datagen(train_path, val_path, test_path,
                                                                       batch_size = batch,efficient = True)
    model = make_EfficientB0()
    model.compile(loss = CategoricalCrossentropy(from_logits=False, label_smoothing = 0.2, axis=-1), 
                                optimizer = Adam(learning_rate=lr, beta_1=0.9, beta_2=0.999, 
                                            epsilon=None, amsgrad=False), 
                               metrics = ['accuracy'])
    hist = model.fit(
            train_generator,
            epochs = epochs,
            validation_data=validation_generator,
            verbose = 2
        )
    val_loss = np.nanmin(hist.history['val_loss'])
    print(f"Minimum val loss is {val_loss}")
    if val_loss < best_hyperparam[0]:
        best_hyperparam = (val_loss, lr, batch)
        print(best_hyperparam)
    del model
    gc.collect()
    tf.keras.backend.clear_session()

In [None]:
print(f"START RANDOM SEARCH ROUND 2 (FINE)")

epochs = 5
train_generator, validation_generator, test_generator = create_datagen(train_path, val_path, test_path,
                                                                       batch_size = best_hyperparam[2],efficient = False)

#Round 2 Random Search (Fine)
lr_trial_2 = [ best_hyperparam[1] / 2, best_hyperparam[1] * 2 ]
for lr in lr_trial_2:
    model = make_EfficientB0()
    model.compile(loss = CategoricalCrossentropy(from_logits=False, label_smoothing = 0.2, axis=-1), 
                                optimizer = Adam(learning_rate=lr, beta_1=0.9, beta_2=0.999, 
                                            epsilon=None, amsgrad=False), 
                               metrics = ['accuracy'])
    hist = model.fit(
            train_generator,
            epochs = epochs,
            validation_data=validation_generator,
            verbose = 2
        )
    val_loss = np.nanmin(hist.history['val_loss'])
    print(f"Minimum val loss is {val_loss}")
    if val_loss < best_hyperparam[0]:
        best_hyperparam = (val_loss, lr, batch)
        print(best_hyperparam)
    del model
    gc.collect()
    tf.keras.backend.clear_session()

print(f"START RANDOM SEARCH ROUND 3 (VERY FINE)")
#Round 3 Random Search (Very Fine)
lr_trial_3 = [ best_hyperparam[1] / 3, best_hyperparam[1] * 2/3 ,  
              best_hyperparam[1] * 4/3, best_hyperparam[1] * 5/3]
for lr in lr_trial_3:
    model = make_mobileNet()
    model.compile(loss = CategoricalCrossentropy(from_logits=False, label_smoothing = 0.2, axis=-1), 
                                optimizer = Adam(learning_rate=lr, beta_1=0.9, beta_2=0.999, 
                                            epsilon=None, amsgrad=False), 
                               metrics = ['accuracy'])
    hist = model.fit(
            train_generator,
            epochs = epochs,
            validation_data=validation_generator,
            verbose = 2
        )
    val_loss = np.nanmin(hist.history['val_loss'])
    print(f"Minimum val loss is {val_loss}")
    if val_loss < best_hyperparam[0]:
        best_hyperparam = (val_loss, lr, batch)
        print(best_hyperparam)
    del model
    gc.collect()
    tf.keras.backend.clear_session()

In [None]:
best_hyperparam