### **CIFAR10 MLP Hyperparameter exploration example using Scikit**

This is an example of Hyperparameter setting for MNIST MLP with Keras and Scikit. <br>
Uncomment cell #1 to use it in COLAB


In [1]:
#!pip install numpy
#!pip install random
#!pip install matplotlib
#!pip install tensorflow==2.17.1
#!pip install keras==3.6.0
#!pip pandas as pd
#!pip install scikeras

In [2]:
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
import tensorflow as tf
from tensorflow.keras import models
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras import Input
from tensorflow.keras.layers import ReLU, Dense, Softmax
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.models import Model
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
from sklearn.model_selection import GridSearchCV
from tensorflow.keras.callbacks import EarlyStopping

from tensorflow.keras.utils import to_categorical

from sklearn.model_selection import ParameterGrid
from scikeras.wrappers import KerasClassifier

import sys
sys.stderr = open('err.txt', 'w')


I0000 00:00:1740085170.451571  684184 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1740085170.479480  684184 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1740085170.479520  684184 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1740085170.612032  684184 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:00:1740085170.612087  684184 cuda_executor.cc:1001] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
I0000 00:0

#### **Identify GPU to use**

In [3]:
from tensorflow.python.client import device_lib
devices = device_lib.list_local_devices()
gpu_devices = [device for device in devices if device.device_type == 'GPU']
for gpu in gpu_devices:
    print('Using', gpu.physical_device_desc)

Using device: 0, name: NVIDIA T600 Laptop GPU, pci bus id: 0000:01:00.0, compute capability: 7.5


#### **Hyperparameter Search Space**

In [4]:
param_grid = {
    'hidden_size1'  : [512,1024, 2048],
    'hidden_size2'  : [512, 1024],
    'activation_pr': ['relu'],
    'optimizer_pr' : ['adam'],
    'lrate'        : [0.0001, 0.0001],
    'batch_size'   : [32],
    'loss_pr'      : ['categorical_crossentropy']
}

In [5]:
# Parameters not optimized

num_classes = 10  # this is the number of digits
num_epochs = 5

#### **Data preparation and processing**

In [6]:
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
type(x_train)                # data loaded in numpy arrays
print(x_train.shape, y_train.shape)
label_dict = {0: "airplane",
                     1: "automobile",
                     2: "bird",
                     3: "cat",
                     4: "deer",
                     5: "dog",
                     6: "frog",
                     7: "horse",
                     8: "ship",
                     9: "truck"}

# reshape images to RGB scale with width & height are size 32
X_train = x_train.reshape(50000, 32 * 32 * 3)
X_test = x_test.reshape(10000, 32 * 32 * 3)

# normalization to avoid gradient explode or vanish
X_train = X_train.astype("float32")
X_test = X_test.astype("float32")
X_train /= 255.0
X_test /= 255.0

# convert class into one hot encoder
y_train = to_categorical(y_train, 10)
y_test  = to_categorical(y_test, 10)
print(X_test.shape)
input_shape = X_test.shape[1]
print(input_shape, y_test.shape)




(50000, 32, 32, 3) (50000, 1)
(10000, 3072)
3072 (10000, 10)


#### **MLP Architecture**

In [7]:
def create_MLP(optimizer_pr = 'adam', loss_pr='MeanSquaredError', activation_pr='relu', 
               lrate=0.001, hidden_size1=64, hidden_size2=64):
    inputs = Input(shape=(input_shape,))
    x = Dense(hidden_size1, activation=activation_pr)(inputs)
    x = Dense(hidden_size2, activation=activation_pr)(inputs)
    x = Dense(10)(x)
    output = Softmax()(x)
    model = Model(inputs=inputs, outputs=output)

    if optimizer_pr == 'adam':
        opt = Adam(learning_rate=lrate)
    if optimizer_pr ==  'rmsprop':
        opt = RMSprop(learning_rate=lrate)
    model.compile(optimizer= opt, loss = loss_pr, metrics=['accuracy'])
              
    return model

# Configure early stopping
early_stopping = EarlyStopping(
    monitor='val_loss',  # Monitor the validation loss
    patience=5,         # Number of epochs with no improvement after which training will be stopped
    verbose=1,           # Log when training is stopped
    min_delta=0.001,
    mode='min',          # Stop training when the monitored quantity has stopped decreasing
    restore_best_weights = True  # Restore model weights from the epoch with the best value of the monitored quantity
)

In [8]:
# We shorten the exploration to 5 epochs to reduce overall exploration time

model = KerasClassifier(build_fn=create_MLP, hidden_size1=64, hidden_size2=64, lrate=0.001, optimizer_pr = 'adam',
                        loss_pr='categorical_crossentropy', activation_pr='relu', batch_size=16, epochs=100)

In [9]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=2, verbose = 3)
grid_result = grid.fit(X_train, y_train, validation_data=(X_test, y_test), 
                 callbacks=[early_stopping], verbose=0)

Fitting 2 folds for each of 12 candidates, totalling 24 fits
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 993us/step
[CV 1/2] END activation_pr=relu, batch_size=32, hidden_size1=512, hidden_size2=512, loss_pr=categorical_crossentropy, lrate=0.0001, optimizer_pr=adam;, score=0.492 total time= 1.9min
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step
[CV 2/2] END activation_pr=relu, batch_size=32, hidden_size1=512, hidden_size2=512, loss_pr=categorical_crossentropy, lrate=0.0001, optimizer_pr=adam;, score=0.501 total time= 1.9min
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[CV 1/2] END activation_pr=relu, batch_size=32, hidden_size1=512, hidden_size2=512, loss_pr=categorical_crossentropy, lrate=0.0001, optimizer_pr=adam;, score=0.501 total time= 2.6min
[1m782/782[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step
[CV 2/2] END activation_pr=relu, batch_size=32, hidden_size1=512, hidden_size2=512, loss

In [10]:
print(f"Best Accuracy: {grid_result.best_score_} using {grid_result.best_params_}")

# Detailed results
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']

for mean, std, param in zip(means, stds, params):
    print(f"Accuracy: {mean:.4f} (+/- {std:.4f}) with params: {param}")

Best Accuracy: 0.50854 using {'activation_pr': 'relu', 'batch_size': 32, 'hidden_size1': 512, 'hidden_size2': 1024, 'loss_pr': 'categorical_crossentropy', 'lrate': 0.0001, 'optimizer_pr': 'adam'}
Accuracy: 0.4965 (+/- 0.0046) with params: {'activation_pr': 'relu', 'batch_size': 32, 'hidden_size1': 512, 'hidden_size2': 512, 'loss_pr': 'categorical_crossentropy', 'lrate': 0.0001, 'optimizer_pr': 'adam'}
Accuracy: 0.4975 (+/- 0.0032) with params: {'activation_pr': 'relu', 'batch_size': 32, 'hidden_size1': 512, 'hidden_size2': 512, 'loss_pr': 'categorical_crossentropy', 'lrate': 0.0001, 'optimizer_pr': 'adam'}
Accuracy: 0.5085 (+/- 0.0061) with params: {'activation_pr': 'relu', 'batch_size': 32, 'hidden_size1': 512, 'hidden_size2': 1024, 'loss_pr': 'categorical_crossentropy', 'lrate': 0.0001, 'optimizer_pr': 'adam'}
Accuracy: 0.5025 (+/- 0.0048) with params: {'activation_pr': 'relu', 'batch_size': 32, 'hidden_size1': 512, 'hidden_size2': 1024, 'loss_pr': 'categorical_crossentropy', 'lrate'

In [11]:
import session_info
session_info.show(html=False)

-----
keras               3.6.0
matplotlib          3.9.2
numpy               1.26.4
scikeras            0.13.0
session_info        1.0.0
sklearn             1.5.2
tensorflow          2.17.1
-----
IPython             8.28.0
jupyter_client      8.6.3
jupyter_core        5.7.2
-----
Python 3.12.3 (main, Jan 17 2025, 18:03:48) [GCC 13.3.0]
Linux-5.15.167.4-microsoft-standard-WSL2-x86_64-with-glibc2.39
-----
Session information updated at 2025-02-20 23:12
