In [None]:
# Import core libraries
import tensorflow as tf
import numpy as np
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Import model components
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import (Conv2D, MaxPool2D, BatchNormalization, 
                                   Dropout, Dense, Flatten, GaussianNoise)
from tensorflow.keras.regularizers import l2
from tensorflow.keras.callbacks import (LearningRateScheduler, 
                                      ModelCheckpoint, EarlyStopping)
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Optimizer with weight decay from TensorFlow Addons
from tensorflow_addons.optimizers import AdamW


TensorFlow Addons (TFA) has ended development and introduction of new features.
TFA has entered a minimal maintenance and release mode until a planned end of life in May 2024.
Please modify downstream libraries to take dependencies from other repositories in our TensorFlow community (e.g. Keras, Keras-CV, and Keras-NLP). 

For more information see: https://github.com/tensorflow/addons/issues/2807 

 The versions of TensorFlow you are currently using is 2.10.1 and is not supported. 
Some things might work, some things might not.
If you were to encounter a bug, do not file an issue.
If you want to make sure you're using a tested and supported configuration, either change the TensorFlow version or the TensorFlow Addons's version. 
You can find the compatibility matrix in TensorFlow Addon's readme:
https://github.com/tensorflow/addons


In [3]:
# Limit GPU memory available to TensorFlow

# Get list of GPUs
gpus = tf.config.experimental.list_physical_devices('GPU')

if gpus:
    tf.config.experimental.set_virtual_device_configuration(
        gpus[0],
        [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=20480)]
    )

In [None]:
# Load and preprocess MNIST dataset
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Reshape data to (num_samples, height, width, channels) and normalize to [0,1]
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255

# Convert class labels to one-hot encoded vectors
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

In [None]:
# Configure data augmentation pipeline
datagen = ImageDataGenerator(
    rotation_range=15,       # Random rotations ±15 degrees
    zoom_range=0.2,          # Random zoom up to 20%
    width_shift_range=0.15,  # Horizontal shift ±15% of width
    height_shift_range=0.15, # Vertical shift ±15% of height
    shear_range=0.1,         # Shear transformations
    validation_split=0.2,    # Hold out 20% for validation
    preprocessing_function=lambda x: x + np.random.normal(0, 0.05, x.shape) # Add Gaussian noise
)

In [None]:
# Build CNN architecture
net = Sequential([
    # Input layer with Gaussian noise for regularization
    GaussianNoise(0.1, input_shape=(28,28,1)),
    
    # First convolutional block
    Conv2D(64, (3,3), activation='swish', padding='same', kernel_regularizer=l2(1e-4)),
    BatchNormalization(),  # Normalize activations before passing to next layer
    Conv2D(64, (3,3), activation='swish', padding='same', kernel_regularizer=l2(1e-4)),
    BatchNormalization(),
    MaxPool2D((2,2)),      # Downsample by factor of 2 in both dimensions
    Dropout(0.25),         # Randomly set 25% of activations to zero
    
    # Second convolutional block with more filters
    Conv2D(128, (3,3), activation='swish', padding='same', kernel_regularizer=l2(1e-4)),
    BatchNormalization(),
    Conv2D(128, (3,3), activation='swish', padding='same', kernel_regularizer=l2(1e-4)),
    BatchNormalization(),
    MaxPool2D((2,2)),
    Dropout(0.35),        # Increased dropout for deeper layers to prevent overfitting
    
    # Final convolutional block with even more filters
    Conv2D(256, (3,3), activation='swish', padding='valid', kernel_regularizer=l2(1e-4)),
    BatchNormalization(),
    Conv2D(256, (3,3), activation='swish', padding='valid', kernel_regularizer=l2(1e-4)),
    BatchNormalization(),
    Dropout(0.5),         # High dropout before dense layers to prevent overfitting
    
    # Classification head with dense layers
    Flatten(),            # Convert 3D features to 1D for dense layers
    Dense(1024, activation='swish', kernel_regularizer=l2(1e-4)),
    BatchNormalization(),
    Dropout(0.5),
    Dense(512, activation='swish', kernel_regularizer=l2(1e-4)),
    BatchNormalization(),
    Dropout(0.4),
    Dense(10, activation='softmax')  # Output layer with class probabilities, 10 different classes (digits 0 through 9) with softmax activation (assigns class with highest probability)
])

In [None]:
# Learning rate schedule function for training
def lr_schedule(epoch):
    lr = 3e-4   # Initial learning rate for AdamW optimizer
    if epoch > 30: lr *= 0.1    # Reduce LR by 10x after 30 epochs 
    if epoch > 45: lr *= 0.1    # Reduce again after 45 epochs 
    return lr

# Configure model for training
net.compile(optimizer=AdamW(weight_decay=1e-5),   # Adam with weight decay optimizer 
              loss='categorical_crossentropy',    # Suitable for multi-class classification 
              metrics=['accuracy'])               # Monitor accuracy during training 

In [None]:
# Train the model with augmented data and callbacks for learning rate schedule, model checkpointing, and early stopping
history = net.fit(
    training_data=datagen.flow(x_train, y_train, batch_size=256, subset='training'), # Training data generator 
    validation_data=datagen.flow(x_train, y_train, subset='validation'),             # Validation generator 
    epochs=60,
    callbacks=[
        LearningRateScheduler(lr_schedule),                    # Dynamic learning rate schedule       
        ModelCheckpoint('model.h5', save_best_only=True),      # Save best weights to file
        EarlyStopping(patience=12, restore_best_weights=True)  # Stop if no improvement after 12 epochs 
    ]
)

Epoch 1/60
Epoch 2/60
Epoch 3/60
Epoch 4/60
Epoch 5/60
Epoch 6/60
Epoch 7/60
Epoch 8/60
Epoch 9/60
Epoch 10/60
Epoch 11/60
Epoch 12/60
Epoch 13/60
Epoch 14/60
Epoch 15/60
Epoch 16/60
Epoch 17/60
Epoch 18/60
Epoch 19/60
Epoch 20/60
Epoch 21/60
Epoch 22/60
Epoch 23/60
Epoch 24/60
Epoch 25/60
Epoch 26/60
Epoch 27/60
Epoch 28/60
Epoch 29/60
Epoch 30/60
Epoch 31/60
Epoch 32/60
Epoch 33/60
Epoch 34/60
Epoch 35/60
Epoch 36/60
Epoch 37/60
Epoch 38/60
Epoch 39/60
Epoch 40/60
Epoch 41/60
Epoch 42/60
Epoch 43/60
Epoch 44/60
Epoch 45/60
Epoch 46/60
Epoch 47/60
Epoch 48/60
Epoch 49/60
Epoch 50/60
Epoch 51/60
Epoch 52/60
Epoch 53/60
Epoch 54/60
Epoch 55/60
Epoch 56/60
Epoch 57/60
Epoch 58/60
Epoch 59/60
Epoch 60/60


In [None]:
# Load best performing version of the model, and check accuracy against test data (not used during training)
final_model = load_model('model.h5')
loss, acc = final_model.evaluate(x_test, y_test)
print(f"Test accuracy: {acc*100:.2f}%")

Test accuracy: 99.64%


In [None]:
# Save model in HDF5 format
final_model.save("model.h5")