<a href="https://colab.research.google.com/github/anupam-codespace/Machine-Learning-Assignments/blob/main/Assignment_1_Digit_Recognition.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# ===============================
# Advanced & Fast MNIST Trainer
# - Uses TF 2.x (Colab default)
# - Fast model: SeparableConv2D blocks + BatchNorm + GAP
# - Data pipeline optimized with cache + prefetch
# - Data augmentation + callbacks (LR schedule, EarlyStopping)
# ===============================

# Step 0: If on Colab, enable GPU and optionally mixed precision
# In Colab: Runtime -> Change runtime type -> GPU
try:
    # Mixed precision helps on modern GPUs (Colab T4/P100)
    from tensorflow.keras import mixed_precision
    mixed_precision.set_global_policy('mixed_float16')
    print("Mixed precision enabled")
except Exception:
    print("Mixed precision not available or not set")

# Step 1: Imports
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks, optimizers
import matplotlib.pyplot as plt
import numpy as np
import os

print("TF version:", tf.__version__)

# Step 2: Load MNIST dataset (simple, reliable)
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()

# Step 3: Preprocess - add channel dim and scale
# Convert to float32, scale to [0,1], shape -> (N,28,28,1)
x_train = x_train.astype('float32') / 255.0
x_test  = x_test.astype('float32')  / 255.0
x_train = np.expand_dims(x_train, -1)
x_test  = np.expand_dims(x_test, -1)

# Optional: split some validation data from train
val_split = 0.1
num_val = int(len(x_train) * val_split)
x_val = x_train[-num_val:]
y_val = y_train[-num_val:]
x_train = x_train[:-num_val]
y_train = y_train[:-num_val]

print("Train:", x_train.shape, "Val:", x_val.shape, "Test:", x_test.shape)

# Step 4: Build tf.data pipeline (fast)
BATCH_SIZE = 128
AUTOTUNE = tf.data.AUTOTUNE

def make_dataset(images, labels, augment=False, shuffle=False):
    ds = tf.data.Dataset.from_tensor_slices((images, labels))
    if shuffle:
        ds = ds.shuffle(buffer_size=2048)
    # map preprocessing
    def _prep(img, lbl):
        # img is already scaled to [0,1]
        img = tf.cast(img, tf.float32)
        return img, lbl
    ds = ds.map(_prep, num_parallel_calls=AUTOTUNE)
    if augment:
        # lightweight augmentation: random rotate, translate, zoom
        def _augment(img, lbl):
            img = tf.image.random_flip_left_right(img)  # no-op for digits but ok
            img = tf.image.random_rotation(img, 0.15) if hasattr(tf.image, 'random_rotation') else img
            # Alternative augmentations (use tf.keras layers below in model)
            img = tf.image.random_brightness(img, 0.1)
            img = tf.image.random_contrast(img, 0.9, 1.1)
            return img, lbl
        # Using Keras preprocessing layer in model is often preferable.
        # Here we keep augmentation minimal; main augmentation is in model pipeline below.
        ds = ds.map(_augment, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH_SIZE).cache().prefetch(AUTOTUNE)
    return ds

train_ds = make_dataset(x_train, y_train, augment=False, shuffle=True)
val_ds   = make_dataset(x_val, y_val, augment=False, shuffle=False)
test_ds  = make_dataset(x_test, y_test, augment=False, shuffle=False)

# Step 5: Data augmentation layer (applied in-model for reproducibility on CPU/GPU)
data_augmentation = tf.keras.Sequential([
    layers.RandomRotation(0.10),
    layers.RandomTranslation(height_factor=0.05, width_factor=0.05),
    layers.RandomZoom(0.08),
])

# Step 6: Build fast & effective model using SeparableConv2D blocks
def sep_conv_block(x, filters, kernel=(3,3), pool=True, dropout=0.2):
    x = layers.SeparableConv2D(filters, kernel, padding='same', activation=None)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    x = layers.SeparableConv2D(filters, kernel, padding='same', activation=None)(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation('relu')(x)
    if pool:
        x = layers.MaxPooling2D((2,2))(x)
    if dropout and dropout>0:
        x = layers.Dropout(dropout)(x)
    return x

# Input
inputs = layers.Input(shape=(28,28,1))
x = data_augmentation(inputs)         # apply augmentation at training time
x = layers.Resizing(28,28)(x)         # ensure size stable
x = layers.Normalization()(x)         # normalizes per-batch (learnable), optional

# Feature extractor
x = sep_conv_block(x, 32, dropout=0.15)   # -> (14x14)
x = sep_conv_block(x, 64, dropout=0.25)   # -> (7x7)
x = sep_conv_block(x, 128, pool=False, dropout=0.25) # no pool this level

# Global pooling + head
x = layers.GlobalAveragePooling2D()(x)
x = layers.Dense(128, activation='relu')(x)
x = layers.BatchNormalization()(x)
x = layers.Dropout(0.5)(x)

# Output (note: if mixed precision is enabled, last layer should be float32)
outputs = layers.Dense(10, activation='softmax', dtype='float32')(x)

model = models.Model(inputs, outputs, name="fast_mnist_model")
model.summary()

# Step 7: Compile model with optimizer and loss
optimizer = optimizers.Adam(learning_rate=1e-3)
model.compile(optimizer=optimizer,
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Step 8: Callbacks for faster convergence and safety
callbacks_list = [
    callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=3, min_lr=1e-6, verbose=1),
    callbacks.EarlyStopping(monitor='val_loss', patience=6, restore_best_weights=True, verbose=1),
    callbacks.ModelCheckpoint("best_mnist_model.h5", monitor='val_loss', save_best_only=True, verbose=1)
]

# Step 9: Train
EPOCHS = 20
history = model.fit(train_ds,
                    epochs=EPOCHS,
                    validation_data=val_ds,
                    callbacks=callbacks_list)

# Step 10: Evaluate on test set
test_loss, test_acc = model.evaluate(test_ds)
print(f"\nTest Accuracy: {test_acc*100:.2f}%")

# Step 11: Plot training curves
plt.figure(figsize=(12,4))
plt.subplot(1,2,1)
plt.plot(history.history['accuracy'], label='train_acc')
plt.plot(history.history['val_accuracy'], label='val_acc')
plt.legend(); plt.title('Accuracy'); plt.xlabel('epoch')

plt.subplot(1,2,2)
plt.plot(history.history['loss'], label='train_loss')
plt.plot(history.history['val_loss'], label='val_loss')
plt.legend(); plt.title('Loss'); plt.xlabel('epoch')
plt.show()

# Step 12: Save final model (already best saved by callback)
model.save("final_mnist_model.h5")
print("Saved final model as final_mnist_model.h5")


Mixed precision enabled
TF version: 2.19.0
Train: (54000, 28, 28, 1) Val: (6000, 28, 28, 1) Test: (10000, 28, 28, 1)


Epoch 1/20
[1m  1/422[0m [37m━━━━━━━━━━━━━━━━━━━━[0m [1m2:03:21[0m 18s/step - accuracy: 0.1328 - loss: 2.8587