
# Model Tuning: Deep CNN with Dropout and Batch Normalisation – Stage 3


# data_preparation

In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import fashion_mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization


# Set a global random seed for reproducibility
np.random.seed(42)
tf.random.set_seed(42)

# Load Fashion MNIST dataset directly from Keras
# It returns: (X_train, y_train), (X_test, y_test)
(X_train, y_train), (X_test, y_test) = fashion_mnist.load_data()

# Normalize pixel values to range [0, 1] by dividing by 255
X_train = X_train.astype("float32") / 255.0
X_test = X_test.astype("float32") / 255.0

# Reshape input data to add a channel dimension (since images are grayscale)
# Shape becomes (num_samples, 28, 28, 1) for CNN compatibility
X_train = np.expand_dims(X_train, axis=-1)
X_test = np.expand_dims(X_test, axis=-1)

# Convert labels to one-hot encoded format (for softmax output)
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Print dataset shapes for confirmation
print(f"[INFO] X_train shape: {X_train.shape}")
print(f"[INFO] X_test shape: {X_test.shape}")
print(f"[INFO] y_train shape: {y_train.shape}")
print(f"[INFO] y_test shape: {y_test.shape}")

2025-04-16 20:36:47.271399: E tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:9342] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2025-04-16 20:36:47.271524: E tensorflow/compiler/xla/stream_executor/cuda/cuda_fft.cc:609] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2025-04-16 20:36:47.271569: E tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:1518] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered


[INFO] X_train shape: (60000, 28, 28, 1)
[INFO] X_test shape: (10000, 28, 28, 1)
[INFO] y_train shape: (60000, 10)
[INFO] y_test shape: (10000, 10)


# Model with Dropout and Batch Normalisation to improve generalisation 

In [2]:
def build_deep_cnn_model():
    """
    Builds a deeper convolutional neural network for Fashion MNIST classification,
    incorporating Dropout and Batch Normalisation for improved generalisation.
    """
    model = Sequential()

    # First convolutional block
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(28, 28, 1)))
    model.add(BatchNormalization())       # Normalize activations
    model.add(MaxPooling2D(pool_size=(2, 2)))  # Downsample feature maps
    model.add(Dropout(0.25))              # Randomly drop 25% of neurons

    # Second convolutional block
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # Third convolutional block
    model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
    model.add(BatchNormalization())
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # Flatten the feature maps into a 1D vector
    model.add(Flatten())

    # Fully connected dense layer
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(0.5))  # More aggressive dropout in dense layers

    # Output layer for 10 classes with softmax
    model.add(Dense(10, activation='softmax'))

    # Compile the model with optimizer and loss function
    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model

# training the model and fine tuning 

In [3]:
# First, build the model using the function we defined above
model = build_deep_cnn_model()

# Train the model on training data and validate on test set
# Using batch size of 64 and running for 10 epochs
history = model.fit(
    X_train, y_train,
    epochs=10,                      # Run for 10 passes through the training set
    batch_size=64,                 # Number of samples per gradient update
    validation_data=(X_test, y_test),  # Use test set as validation
    verbose=2                      # Print progress per epoch (less noisy than verbose=1)
)

Epoch 1/10
938/938 - 48s - loss: 0.6384 - accuracy: 0.7708 - val_loss: 0.3800 - val_accuracy: 0.8615 - 48s/epoch - 51ms/step
Epoch 2/10
938/938 - 44s - loss: 0.4137 - accuracy: 0.8511 - val_loss: 0.3439 - val_accuracy: 0.8747 - 44s/epoch - 47ms/step
Epoch 3/10
938/938 - 43s - loss: 0.3640 - accuracy: 0.8693 - val_loss: 0.2995 - val_accuracy: 0.8922 - 43s/epoch - 46ms/step
Epoch 4/10
938/938 - 44s - loss: 0.3319 - accuracy: 0.8798 - val_loss: 0.3304 - val_accuracy: 0.8742 - 44s/epoch - 47ms/step
Epoch 5/10
938/938 - 40s - loss: 0.3107 - accuracy: 0.8872 - val_loss: 0.2765 - val_accuracy: 0.9011 - 40s/epoch - 42ms/step
Epoch 6/10
938/938 - 43s - loss: 0.2972 - accuracy: 0.8927 - val_loss: 0.2862 - val_accuracy: 0.8986 - 43s/epoch - 46ms/step
Epoch 7/10
938/938 - 44s - loss: 0.2890 - accuracy: 0.8954 - val_loss: 0.3131 - val_accuracy: 0.8848 - 44s/epoch - 47ms/step
Epoch 8/10
938/938 - 44s - loss: 0.2717 - accuracy: 0.9017 - val_loss: 0.3283 - val_accuracy: 0.8853 - 44s/epoch - 47ms/step


# accuracy and loss visualisation

In [None]:
# ---------------------------------------------------------------
# Visualise Training Metrics: Accuracy and Loss Over Epochs
# ---------------------------------------------------------------

import matplotlib.pyplot as plt

# Extract training history metrics
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)

# Plot training and validation accuracy
plt.figure(figsize=(10, 4))
plt.subplot(1, 2, 1)
plt.plot(epochs, acc, label='Training Accuracy')
plt.plot(epochs, val_acc, label='Validation Accuracy')
plt.title('Training vs Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.legend()
plt.grid(True)

# Plot training and validation loss
plt.subplot(1, 2, 2)
plt.plot(epochs, loss, label='Training Loss')
plt.plot(epochs, val_loss, label='Validation Loss')
plt.title('Training vs Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)

plt.tight_layout()
plt.show()

# Evaluation + Save the Model

In [5]:
# ---------------------------------------------------------------
# Evaluate Tuned Model on Test Set and Save It
# ---------------------------------------------------------------

# Evaluate the trained model on the test set (acts as unseen data)
test_loss, test_accuracy = model.evaluate(X_test, y_test, verbose=0)

# Print evaluation results
print(f"[RESULT] Test Accuracy of Tuned CNN Model: {test_accuracy:.4f}")
print(f"[RESULT] Test Loss: {test_loss:.4f}")

# Save the model to the correct directory
model.save("../models/stage3_fashion_cnn_tuned.h5")
print("[INFO] Tuned model saved to models/stage3_fashion_cnn_tuned.h5")

# Optional: print model architecture summary
print("\n[INFO] Model Architecture Summary:")
model.summary()

[RESULT] Test Accuracy of Tuned CNN Model: 0.9012
[RESULT] Test Loss: 0.2756
[INFO] Tuned model saved to models/stage3_fashion_cnn_tuned.h5

[INFO] Model Architecture Summary:
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 28, 28, 32)        320       
                                                                 
 batch_normalization (Batch  (None, 28, 28, 32)        128       
 Normalization)                                                  
                                                                 
 max_pooling2d (MaxPooling2  (None, 14, 14, 32)        0         
 D)                                                              
                                                                 
 dropout (Dropout)           (None, 14, 14, 32)        0         
                                                                 
 conv2d_1 (C