### Chapter 8 - Hyper Parameter Tuning

In [None]:
!pip install scikeras

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import GridSearchCV

In [None]:
activation = ['relu', 'sigmoid']
optimizer = ['adam', 'sgd']
batch_size = [10, 20, 30]
epochs = [10, 20]
param_grid = dict(activation=activation, optimizer=optimizer, batch_size=batch_size, epochs=epochs)

In [None]:
def create_model(activation='relu', optimizer='adam'):
    model = Sequential()
    model.add(Dense(64, input_dim=5, activation=activation))
    model.add(Dense(32, activation=activation))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer=optimizer, metrics=['accuracy'])
    return model

In [None]:
model = KerasClassifier(build_fn=create_model, verbose=0)

In [None]:
grid = GridSearchCV(estimator=model, param_grid=param_grid, cv=2, n_jobs=-1)
grid_result = grid.fit(X, y)

In [None]:
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

#### Randomized Search

In [None]:
from sklearn.model_selection import RandomizedSearchCV

# Use RandomizedSearchCV to perform random search for hyperparameters
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, n_iter=10, cv=3)
random_search.fit(X, y)

#### Data preprocessing Hyper parameters

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

In [None]:
# Loading the data
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()
# Convert to one hot encoding of the data
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)
# Define number of classes
num_classes = 10

In [None]:
# Define the model architecture
def create_model(batch_size=32, data_augmentation=True, data_normalization=True):
    model = Sequential()
    if data_normalization:
        model.add(tf.keras.layers.experimental.preprocessing.Rescaling(1./255))
    if data_augmentation:
        model.add(ImageDataGenerator(rotation_range=20, width_shift_range=0.1, height_shift_range=0.1, horizontal_flip=True))
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(28, 28, 1)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(num_classes, activation='softmax'))
    # Compile the model
    model.compile(optimizer=Adam(learning_rate=0.001),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])

    return model

In [None]:
# Define hyperparameters to tune
param_dist = {
    'batch_size': [16, 32,64,128],
    'data_augmentation': [True, False],
    'data_normalization': [True, False]
}

In [None]:
# Define the KerasClassifier wrapper for use in RandomizedSearchCV
model = tf.keras.wrappers.scikit_learn.KerasClassifier(build_fn=create_model, epochs=5, verbose=1)

In [None]:
# Define the random search object
random_search = RandomizedSearchCV(estimator=model, param_distributions=param_dist, cv=2, verbose=2)

# Perform the random search to find the best hyperparameters
random_search_results = random_search.fit(x_train.reshape(-1, 28, 28, 1), y_train)

In [None]:
# Print the best hyperparameters
print('Best batch size:', random_search_results.best_params_['batch_size'])
print('Best data augmentation:', random_search_results.best_params_['data_augmentation'])
print('Best data normalization:', random_search_results.best_params_['data_normalization'])

#### Model architecture Hyper parameters

In [None]:
!pip install keras-tuner

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from kerastuner.tuners import BayesianOptimization

In [None]:
# Load the Fashion MNIST dataset
(x_train, y_train), (x_test, y_test) = keras.datasets.fashion_mnist.load_data()

# Preprocess the data
x_train = np.expand_dims(x_train.astype("float32") / 255.0, axis=-1)
x_test = np.expand_dims(x_test.astype("float32") / 255.0, axis=-1)
y_train = keras.utils.to_categorical(y_train, 10)
y_test = keras.utils.to_categorical(y_test, 10)

# Define the model builder function
def build_model(hp):
    model = keras.Sequential()
    model.add(layers.Conv2D(filters=hp.Int('conv_1_filter', min_value=32, max_value=128, step=32),
                            kernel_size=hp.Choice('conv_1_kernel', values=[5, 3]),
                            activation=hp.Choice('conv_1_activation', values=['relu', 'tanh']),
                            padding = 'same',
                            input_shape=(28, 28, 1)))
    model.add(layers.MaxPooling2D(pool_size=(2, 2)))
    for i in range(hp.Int('num_layers', 2, 4)):
        model.add(layers.Conv2D(filters=hp.Int(f'conv_{i}_filter', min_value=32, max_value=128, step=32),
                                kernel_size=hp.Choice(f'conv_{i}_kernel', values=[3, 5]),
                            padding = 'same',
                                activation=hp.Choice(f'conv_{i}_activation', values=['relu', 'tanh'])))
    model.add(layers.Flatten())
    model.add(layers.Dense(units=hp.Choice('dense_1_units', values = [128,256,512]),
                           activation=hp.Choice('dense_1_activation', values=['relu', 'tanh'])))
    model.add(layers.Dropout(rate=hp.Float('dropout_1', min_value=0.0, max_value=0.5, step=0.1)))
    model.add(layers.Dense(10, activation='softmax'))
    
    # Compile the model
    model.compile(optimizer=keras.optimizers.Adam(hp.Float('learning_rate', 0.0001, 0.01, sampling='log')),
                  loss='categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

# Define the Bayesian optimization tuner
tuner = BayesianOptimization(
    build_model,
    objective='val_accuracy',
    max_trials=3,
    executions_per_trial=2,
    directory='my_dir',
    project_name='fashion_mnist')

# Start the hyperparameter search
tuner.search(x_train, y_train, epochs=5, validation_data=(x_test, y_test))

# Get the optimal hyperparameters
best_hps = tuner.get_best_hyperparameters(num_trials=1)[0]

In [None]:
print(f"Optimal number of filters for the first convolutional layer:" ,best_hps.values)

#### Weight initialisation and Tuning

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.utils import to_categorical
from scikeras.wrappers import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV

In [None]:
# Load the Fashion-MNIST dataset
(x_train, y_train), (x_test, y_test) = fashion_mnist.load_data()

# Normalize the data and convert labels to one-hot encoding
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [None]:
# Define the CNN model architecture
def create_model(init_mode='uniform'):
    model = Sequential()
    model.add(Conv2D(32, kernel_size=(3, 3), activation='relu', kernel_initializer=init_mode, input_shape=(28, 28, 1)))
    model.add(Conv2D(64, kernel_size=(3, 3), activation='relu', kernel_initializer=init_mode))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
    model.add(Flatten())
    model.add(Dense(128, activation='relu', kernel_initializer=init_mode))
    model.add(Dropout(0.5))
    model.add(Dense(10, activation='softmax', kernel_initializer=init_mode))
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
    return model

In [None]:
# Create a KerasClassifier object for use with RandomizedSearchCV
model = KerasClassifier(build_fn=create_model, verbose=0)

In [None]:
# Define the hyperparameter space
param_dist = {'init_mode': [ 'glorot_uniform', 'he_uniform']}

In [None]:
# Define the RandomizedSearchCV object
random_search = RandomizedSearchCV(model, param_distributions=param_dist, n_iter=4, cv=3, verbose=2, random_state=42)

In [None]:
# Fit the RandomizedSearchCV object to the data
result = random_search.fit(x_train.reshape(-1, 28, 28, 1), y_train, epochs=5, validation_data=(x_test.reshape(-1, 28, 28, 1), y_test))

In [None]:
# Identify the best parameters
best_weights = result.best_params_
best_weights

#### Model optimization hyper parameter tuning

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import SGD, Adam, Adagrad
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier
from sklearn.model_selection import RandomizedSearchCV
import numpy as np

In [None]:
# Load the Fashion MNIST dataset
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# Preprocess the data
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0
y_train = to_categorical(y_train)
y_test = to_categorical(y_test)

In [None]:
# Define the model architecture
def create_model(learning_rate=0.01, momentum=0.0, decay_rate=0.0, optimizer='sgd'):
    model = Sequential()
    model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=(28, 28, 1)))
    model.add(MaxPooling2D((2, 2)))
    model.add(Flatten())
    model.add(Dense(128, activation='relu'))
    model.add(Dense(10, activation='softmax'))
    
    if optimizer == 'sgd':
        opt = SGD(lr=learning_rate, momentum=momentum, decay=decay_rate)
    elif optimizer == 'adam':
        opt = Adam(lr=learning_rate, beta_1=momentum, decay=decay_rate)
    else:
        opt = Adagrad(lr=learning_rate, decay=decay_rate)
        
    model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])
    return model

In [None]:
# Wrap the model in a scikit-learn classifier object
clf = KerasClassifier(build_fn=create_model)

# Define the hyperparameters to search over
params = {
    'learning_rate': [0.001, 0.01, 0.1],
    'momentum': [0.0, 0.5, 0.9],
    'decay_rate': [0.0, 0.01, 0.001],
    'optimizer': ['sgd', 'adam', 'adagrad']
}

In [None]:
# Define the search method and run the search
search = RandomizedSearchCV(clf, params, cv=3, n_iter=5, n_jobs=-1, verbose=2)
search.fit(X_train.reshape(-1, 28, 28, 1), y_train)

In [None]:
# Print the best parameters and accuracy
print("Best parameters: ", search.best_params_)
print("Accuracy: ", search.best_score_)

#### Regularization hyper parameters

In [None]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Flatten, Conv2D, MaxPooling2D
from tensorflow.keras.optimizers import SGD, Adam, Adagrad
from kerastuner.tuners import BayesianOptimization
import numpy as np


In [None]:
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()
# Preprocess the data
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

In [None]:
# Define the model builder function for Bayesian optimization
def build_model(hp):
    model = keras.Sequential()
    # Add Convolutional layer
    model.add(Conv2D(32,(3,3),padding="same",activation='relu',input_shape=(28, 28, 1)))
    # Add MaxPooling layer
    model.add(MaxPooling2D(pool_size=(2, 2)))
    # Add Flatten layer
    model.add(Flatten())
    # Add Dense layer with L2 regularization
    model.add(Dense(128,activation='relu',kernel_regularizer=tf.keras.regularizers.l2(hp.Choice('l2_regularizer', values=[1e-4, 1e-3, 1e-1]))))
    # Add Dropout layer
    model.add(Dropout(hp.Float('dropout_rate', 0, 0.5, step=0.1)))
    # Add output layer
    model.add(layers.Dense(units=10, activation='softmax'))
    # Compile the model
    model.compile(optimizer='adam',loss='sparse_categorical_crossentrop y',metrics=['accuracy'])
    return model

In [None]:
# Define the BayesianOptimization tuner
tuner = BayesianOptimization(
build_model,
objective='val_accuracy',
max_trials=5,
directory='my_dir',
project_name='HP_tuning')

In [None]:
tuner.search(x_train.reshape(-1, 28, 28, 1), y_train, epochs=5, validation_split=0.2)
# Get the best hyperparameters
best_hps = tuner.get_best_hyperparameters(1)[0]
# Print the best hyperparameters
print(f"Best Hyperparameters: {best_hps.values}")

In [None]:
# Build and compile the model with the best hyperparameters
model = tuner.hypermodel.build(best_hps)
model.compile(optimizer='adam',loss='sparse_categorical_crossentropy',metrics=['accuracy'])
# Train the model
model.fit(x_train.reshape(-1, 28, 28, 1), y_train)