# Import Modules

In [7]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.model_selection import train_test_split

# Load and Preprocess dataset

In [9]:
# Load and preprocess MNIST data
(x_train, y_train), (x_test, y_test) = mnist.load_data()

# Reshape to add channel dimension and normalize
x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255.0
x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255.0

# One-hot encode labels
y_train = to_categorical(y_train, 10)
y_test = to_categorical(y_test, 10)

# Train-validation split (10% validation)
x_train_split, x_val, y_train_split, y_val = train_test_split(
    x_train, y_train, test_size=0.1, random_state=42
)

# Data augmentation
datagen = ImageDataGenerator(
    rotation_range=10,
    width_shift_range=0.1,
    height_shift_range=0.1
)
datagen.fit(x_train_split)

# Build Model

In [12]:
# Build the CNN model
model = models.Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),

    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.BatchNormalization(),
    layers.MaxPooling2D((2, 2)),

    layers.Flatten(),
    layers.Dense(10, activation='softmax')
])

# Compile the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

# Callbacks
early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)
lr_scheduler = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=2)

# Train the model
history = model.fit(
    datagen.flow(x_train_split, y_train_split, batch_size=128),
    epochs=20,
    validation_data=(x_val, y_val),
    steps_per_epoch=len(x_train_split) // 128,
    callbacks=[early_stop, lr_scheduler],
    verbose=1
)

# Evaluate on test data
test_loss, test_acc = model.evaluate(x_test, y_test, verbose=0)
print(f"\n Test accuracy: {test_acc:.4f}")

Epoch 1/20
[1m421/421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 42ms/step - accuracy: 0.9055 - loss: 0.3066 - val_accuracy: 0.7738 - val_loss: 0.6513 - learning_rate: 0.0010
Epoch 2/20
[1m421/421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9375 - loss: 0.1151 - val_accuracy: 0.7895 - val_loss: 0.6231 - learning_rate: 0.0010
Epoch 3/20
[1m421/421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 44ms/step - accuracy: 0.9647 - loss: 0.1136 - val_accuracy: 0.9672 - val_loss: 0.1045 - learning_rate: 0.0010
Epoch 4/20
[1m421/421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - accuracy: 0.9609 - loss: 0.1315 - val_accuracy: 0.9620 - val_loss: 0.1160 - learning_rate: 0.0010
Epoch 5/20
[1m421/421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m19s[0m 45ms/step - accuracy: 0.9715 - loss: 0.0907 - val_accuracy: 0.9763 - val_loss: 0.0754 - learning_rate: 0.0010
Epoch 6/20
[1m421/421[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0