# Random serach to find the best model

## Load dataset

In [1]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

image_size = 128 # image size are small to make sure random search doesn't take so long!
train_datagen = ImageDataGenerator(rescale=1/255)

train_generator = train_datagen.flow_from_directory(
       './data/train/',  
       target_size=(image_size, image_size), 
       batch_size=16,
       class_mode='categorical')


Found 905 images belonging to 5 classes.


In [2]:
class_names = train_generator.class_indices
print("Class names in the dataset:", class_names)

Class names in the dataset: {'day': 0, 'fog': 1, 'night': 2, 'rain': 3, 'snow': 4}


In [2]:
test_datagen  = ImageDataGenerator( rescale = 1.0/255. )

validation_generator =  test_datagen.flow_from_directory("./data/test/",
                                                        batch_size=16,
                                                        class_mode  = 'categorical',
                                                        target_size = (image_size, image_size))

Found 145 images belonging to 5 classes.


## Create the model

Our model has 3-4 CNN layers and 2-3 fully connected layers. Based on selected hyperparameters, number of layers, filter size, number of kernels, etc. would be different.

In [3]:
import tensorflow as tf
from tensorflow.keras.optimizers import RMSprop


def create_model(n1=16, n2=16, n3=16, n4=0, n5=128, n6=0, s1=3, s2=3, s3=3, s4=3, 
    pooling_size=2, activation='relu', learning_rate=1e-4):

  model = tf.keras.models.Sequential([
      tf.keras.layers.Input(shape=(image_size, image_size, 3)), 
      tf.keras.layers.Conv2D(n1, (s1, s1), activation=activation),
      tf.keras.layers.MaxPooling2D(pooling_size, pooling_size),
      tf.keras.layers.Conv2D(n2, (s2, s2), activation=activation),
      tf.keras.layers.MaxPooling2D(pooling_size, pooling_size),
      tf.keras.layers.Conv2D(n3, (s3, s3), activation=activation),
      tf.keras.layers.MaxPooling2D(pooling_size, pooling_size)
  ])

  if n4:
    model.add(tf.keras.layers.Conv2D(n4, (s4, s4), activation=activation))
    model.add(tf.keras.layers.MaxPooling2D(pooling_size, pooling_size))

  model.add(tf.keras.layers.Flatten())
  model.add(tf.keras.layers.Dense(n5, activation=activation))
  if n6:
    model.add(tf.keras.layers.Dense(n6, activation=activation))
  model.add(tf.keras.layers.Dense(5, activation='softmax'))
      
  


  model.compile(loss='categorical_crossentropy',
                optimizer=RMSprop(learning_rate=learning_rate),
                metrics=['accuracy'])
  return model

In [4]:
model = create_model(s1=7, s2=7, s3=7, n4=16, s4=7, n6=128, pooling_size=2)
        
model.summary()

## Random search

### Get features and labels from data generators

In [5]:
import numpy as np

# Initialize empty lists to hold data
X_data = []
y_data = []

num_batches = len(train_generator)

for _ in range(num_batches):
    # Get the next batch
    X_batch, y_batch = next(train_generator)
    # Append the batch data to the lists
    X_data.append(X_batch)
    y_data.append(y_batch)


X = np.concatenate(X_data, axis=0)  
y = np.concatenate(y_data, axis=0)

# check data shape
print(X.shape)  
print(y.shape)  

(905, 128, 128, 3)
(905, 5)


### create possible hyper-parameters dictionary

In [6]:
from itertools import product

# Define the hyperparameter grid
hyperparameter_grid = {
    'n1': [64, 128, 256],
    'n2': [64, 128, 256],
    'n3': [64, 128, 256],
    'n4': [0, 64, 128, 256],
    'n5': [128, 256, 512],
    'n6': [128, 256, 512],
    's1': [3, 5, 7],
    's2': [3, 5, 7],
    's3': [3, 5, 7],
    's4': [3, 5, 7],
    'pooling_size': [2],
    'activation': ['relu', 'tanh'],
    'learning_rate': [1e-4]
}

# Create a list of all hyperparameter names
hyperparameter_names = list(hyperparameter_grid.keys())

# Generate all combinations of hyperparameters
hyperparameter_combinations = list(product(*hyperparameter_grid.values()))

In [7]:
# total number of possible scenarios
len(hyperparameter_combinations)

157464

### start random search and save the results

In [11]:
from sklearn.model_selection import KFold
import random

best_score = 0
best_params = None
ITR_NUMBER = 1 # here you can change the number of random scenarios you want to evaluate 


n_splits = 3
kf = KFold(n_splits=n_splits)

for idx in range(ITR_NUMBER):

    random_idx = random.randint(0, len(hyperparameter_combinations)-1)
    while random_idx in selected_idx:
        random_idx = random.randint(0, len(hyperparameter_combinations)-1)

    selected_idx.append(random_idx)

    combination = hyperparameter_combinations[random_idx]

    print(f"ITERATION {idx+1} OUT OF {ITR_NUMBER} - selected index: {random_idx}")
    params = dict(zip(hyperparameter_names, combination))
    
    fold_val_acc = []
    fold_acc = []
    fold_val_loss = []
    fold_loss = []
    print(f"Params: {params}")
    for train_index, val_index in kf.split(X):
        # Split data into training and validation sets for the current fold
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]
        
        # Create and compile the model with the current set of parameters
        model = create_model(**params)
        
        # Fit the model
        history = model.fit(
            X_train, y_train,
            epochs=20,  
            validation_data=(X_val, y_val),
            verbose=0 
        )
        
        # Evaluate the model on the validation set and store the score
        val_accuracy = max(history.history['val_accuracy'])
        fold_val_acc.append(val_accuracy)
        acc = max(history.history['accuracy'])
        fold_acc.append(acc)
        val_loss = min(history.history['val_loss'])
        fold_val_loss.append(val_loss)
        loss = min(history.history['loss'])
        fold_loss.append(loss)

    fold_acc_avg = np.mean(fold_acc)
    fold_val_acc_avg = np.mean(fold_val_acc)
    fold_loss_avg = np.mean(fold_loss)
    fold_val_loss_avg = np.mean(fold_val_loss)
    print(f"Average Accuracy: {fold_acc_avg}")
    print(f"Average Val Accuracy: {fold_val_acc_avg}")
    print(f"Average loss: {fold_loss_avg}")
    print(f"Average Val loss: {fold_val_loss_avg}")

    total_acc.append(fold_acc_avg)
    total_val_acc.append(fold_val_acc_avg)
    total_loss.append(fold_loss_avg)
    total_val_loss.append(fold_val_loss_avg)    
    
    # Update the best score and parameters if the current model is better
    if fold_val_acc_avg > best_score:
        best_score = fold_val_acc_avg
        best_params = params

print(f"Best Average Validation Accuracy: {best_score}")
print(f"Best Hyperparameters: {best_params}")

ITERATION 1 OUT OF 10 - selected index: 11946
Params: {'n1': 64, 'n2': 64, 'n3': 256, 'n4': 0, 'n5': 128, 'n6': 256, 's1': 7, 's2': 3, 's3': 7, 's4': 3, 'pooling_size': 2, 'activation': 'relu', 'learning_rate': 0.0001}
Average Accuracy: 0.7552579243977865
Average Val Accuracy: 0.692841370900472
Average loss: 0.5843812624613444
Average Val loss: 0.7528943220774332
ITERATION 2 OUT OF 10 - selected index: 21855
Params: {'n1': 64, 'n2': 128, 'n3': 64, 'n4': 128, 'n5': 512, 'n6': 512, 's1': 7, 's2': 7, 's3': 3, 's4': 5, 'pooling_size': 2, 'activation': 'tanh', 'learning_rate': 0.0001}
Average Accuracy: 0.9867394169171652
Average Val Accuracy: 0.6828965942064921
Average loss: 0.07660306245088577
Average Val loss: 0.8242895205815634
ITERATION 3 OUT OF 10 - selected index: 73578
Params: {'n1': 128, 'n2': 128, 'n3': 64, 'n4': 128, 'n5': 256, 'n6': 256, 's1': 3, 's2': 5, 's3': 7, 's4': 3, 'pooling_size': 2, 'activation': 'relu', 'learning_rate': 0.0001}
Average Accuracy: 0.7121621568997701
Avera

In [12]:
import pickle
data_to_save = {
    "hyperparameter_combinations": hyperparameter_combinations,
    "total_acc": total_acc,
    "total_val_acc": total_val_acc,
    "total_loss": total_loss,
    "total_val_loss": total_val_loss,
    'selected_random_idx': selected_idx
}
# Saving the data to a file using pickle
with open("model_training_results.pkl", "wb") as file:
    pickle.dump(data_to_save, file)

### find the best model

In [13]:
max_val_acc_value = max(total_val_acc)  # Find the max value in the list
max_index = total_val_acc.index(max_val_acc_value)  # Find the index of the max value

print(f"best parameters: {hyperparameter_combinations[max_index]}")
print(f"best accuracy: {total_acc[max_index]}")
print(f"best validation accuracy: {total_val_acc[max_index]}")
print(f"best loss: {total_loss[max_index]}")
print(f"best validation loss: {total_val_loss[max_index]}")

best parameters: (64, 64, 64, 0, 128, 128, 3, 7, 5, 7, 2, 'relu', 0.0001)
best accuracy: 0.8563702702522278
best validation accuracy: 0.7182386716206869
best loss: 0.3962838153044383
best validation loss: 0.7078284422556559


## Evalute manually selected models

In [21]:
manually_selected_parameters = [
      hyperparameter_combinations.index((256, 256, 256, 0, 512, 512, 3, 3, 3, 3, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((256, 256, 256, 256, 512, 512, 3, 3, 3, 3, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((128, 128, 128, 0, 256, 256, 3, 3, 3, 3, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((128, 128, 128, 128, 256, 256, 3, 3, 3, 3, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((64, 64, 64, 0, 128, 128, 3, 3, 3, 3, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((64, 64, 64, 64, 128, 128, 3, 3, 3, 3, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((256, 256, 256, 0, 512, 512, 5, 5, 5, 5, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((256, 256, 256, 256, 512, 512, 5, 5, 5, 5, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((128, 128, 128, 0, 256, 256, 5, 5, 5, 5, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((128, 128, 128, 128, 256, 256, 5, 5, 5, 5, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((64, 64, 64, 0, 128, 128, 5, 5, 5, 5, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((64, 64, 64, 64, 128, 128, 5, 5, 5, 5, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((256, 256, 256, 0, 512, 512, 7, 7, 7, 7, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((256, 256, 256, 256, 512, 512, 7, 7, 7, 7, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((128, 128, 128, 0, 256, 256, 7, 7, 7, 7, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((128, 128, 128, 128, 256, 256, 7, 7, 7, 7, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((64, 64, 64, 0, 128, 128, 7, 7, 7, 7, 2, 'relu', 0.0001)),
      hyperparameter_combinations.index((64, 64, 64, 64, 128, 128, 7, 7, 7, 7, 2, 'relu', 0.0001)),
]


In [24]:
from sklearn.model_selection import KFold
import random

best_score = 0
best_params = None

n_splits = 3
kf = KFold(n_splits=n_splits)

for count, index in enumerate(manually_selected_parameters):
    selected_idx.append(index)

    combination = hyperparameter_combinations[index]

    print(f"ITERATION {count+1} OUT OF {len(manually_selected_parameters)} - selected index: {index}")
    params = dict(zip(hyperparameter_names, combination))
    
    fold_val_acc = []
    fold_acc = []
    fold_val_loss = []
    fold_loss = []
    print(f"Params: {params}")
    for train_index, val_index in kf.split(X):
        # Split data into training and validation sets for the current fold
        X_train, X_val = X[train_index], X[val_index]
        y_train, y_val = y[train_index], y[val_index]
        
        # Create and compile the model with the current set of parameters
        model = create_model(**params)
        
        # Fit the model
        history = model.fit(
            X_train, y_train,
            epochs=20,  
            validation_data=(X_val, y_val),
            verbose=0 
        )
        
        # Evaluate the model on the validation set and store the score
        val_accuracy = max(history.history['val_accuracy'])
        fold_val_acc.append(val_accuracy)
        acc = max(history.history['accuracy'])
        fold_acc.append(acc)
        val_loss = min(history.history['val_loss'])
        fold_val_loss.append(val_loss)
        loss = min(history.history['loss'])
        fold_loss.append(loss)

    fold_acc_avg = np.mean(fold_acc)
    fold_val_acc_avg = np.mean(fold_val_acc)
    fold_loss_avg = np.mean(fold_loss)
    fold_val_loss_avg = np.mean(fold_val_loss)
    print(f"Average Accuracy: {fold_acc_avg}")
    print(f"Average Val Accuracy: {fold_val_acc_avg}")
    print(f"Average loss: {fold_loss_avg}")
    print(f"Average Val loss: {fold_val_loss_avg}")

    total_acc.append(fold_acc_avg)
    total_val_acc.append(fold_val_acc_avg)
    total_loss.append(fold_loss_avg)
    total_val_loss.append(fold_val_loss_avg)    
    
    # Update the best score and parameters if the current model is better
    if fold_val_acc_avg > best_score:
        best_score = fold_val_acc_avg
        best_params = params

print(f"Best Average Validation Accuracy: {best_score}")
print(f"Best Hyperparameters: {best_params}")

ITERATION 1 OUT OF 18 - selected index: 152928
Params: {'n1': 256, 'n2': 256, 'n3': 256, 'n4': 0, 'n5': 512, 'n6': 512, 's1': 3, 's2': 3, 's3': 3, 's4': 3, 'pooling_size': 2, 'activation': 'relu', 'learning_rate': 0.0001}
Average Accuracy: 0.891711433728536
Average Val Accuracy: 0.7281614740689596
Average loss: 0.3215294082959493
Average Val loss: 0.6935030817985535
ITERATION 2 OUT OF 18 - selected index: 157302
Params: {'n1': 256, 'n2': 256, 'n3': 256, 'n4': 256, 'n5': 512, 'n6': 512, 's1': 3, 's2': 3, 's3': 3, 's4': 3, 'pooling_size': 2, 'activation': 'relu', 'learning_rate': 0.0001}
Average Accuracy: 0.8011112610499064
Average Val Accuracy: 0.7104940811793009
Average loss: 0.5110840797424316
Average Val loss: 0.7096290389696757
ITERATION 3 OUT OF 18 - selected index: 76464
Params: {'n1': 128, 'n2': 128, 'n3': 128, 'n4': 0, 'n5': 256, 'n6': 256, 's1': 3, 's2': 3, 's3': 3, 's4': 3, 'pooling_size': 2, 'activation': 'relu', 'learning_rate': 0.0001}
Average Accuracy: 0.8497276306152344
A

In [26]:
import pickle
data_to_save = {
    "hyperparameter_combinations": hyperparameter_combinations,
    "total_acc": total_acc,
    "total_val_acc": total_val_acc,
    "total_loss": total_loss,
    "total_val_loss": total_val_loss,
    'selected_random_idx': selected_idx
}
# Saving the data to a file using pickle
with open("model_training_results.pkl", "wb") as file:
    pickle.dump(data_to_save, file)