# Module 2 — Experiment 1 (MLP): Regularization & Batch Normalization on MNIST

**Question (L3 – Apply, 2 lines):**  
Train a simple **MLP** on **MNIST** and compare **L2 regularization, Dropout, and Batch Normalization** against a baseline. Analyze overfitting, training stability, and generalization using validation curves and test accuracy.

**Learning Targets:** Regularization, Dropout, BatchNorm, Overfitting/Generalization, Validation curves.


In [None]:
# Imports & data
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
x_train = (x_train.astype('float32')/255.0).reshape(-1, 28*28)
x_test  = (x_test.astype('float32')/255.0).reshape(-1, 28*28)

print('Train:', x_train.shape, y_train.shape, '| Test:', x_test.shape, y_test.shape)

In [None]:
# Utility to plot histories
def plot_histories(histories, metric='val_accuracy', title='Validation Accuracy'):
    fig, ax = plt.subplots()
    for name, h in histories.items():
        ax.plot(h.history[metric], label=name)
    ax.set_xlabel('Epoch'); ax.set_ylabel(metric.replace('_',' ').title())
    ax.set_title(title); ax.legend(); plt.show()

In [None]:
# Build MLP variants
from tensorflow.keras import layers, models, regularizers

def make_baseline():
    m = models.Sequential([
        layers.Input(shape=(784,)),
        layers.Dense(256, activation='relu'),
        layers.Dense(128, activation='relu'),
        layers.Dense(10, activation='softmax')
    ])
    m.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return m

def make_l2(l2=1e-4):
    m = models.Sequential([
        layers.Input(shape=(784,)),
        layers.Dense(256, activation='relu', kernel_regularizer=regularizers.l2(l2)),
        layers.Dense(128, activation='relu', kernel_regularizer=regularizers.l2(l2)),
        layers.Dense(10, activation='softmax')
    ])
    m.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return m

def make_dropout(p=0.5):
    m = models.Sequential([
        layers.Input(shape=(784,)),
        layers.Dense(256, activation='relu'),
        layers.Dropout(p),
        layers.Dense(128, activation='relu'),
        layers.Dropout(p),
        layers.Dense(10, activation='softmax')
    ])
    m.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return m

def make_batchnorm():
    m = models.Sequential([
        layers.Input(shape=(784,)),
        layers.Dense(256, use_bias=False), layers.BatchNormalization(), layers.Activation('relu'),
        layers.Dense(128, use_bias=False), layers.BatchNormalization(), layers.Activation('relu'),
        layers.Dense(10, activation='softmax')
    ])
    m.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return m

In [None]:
# Train models (increase EPOCHS for deeper analysis)
EPOCHS = 8
BATCH = 128
hist = {}
test_acc = {}
for name, maker in [('baseline', make_baseline),
                    ('l2', make_l2),
                    ('dropout', make_dropout),
                    ('batchnorm', make_batchnorm)]:
    model = maker()
    h = model.fit(x_train, y_train, validation_split=0.2, epochs=EPOCHS, batch_size=BATCH, verbose=1)
    hist[name] = h
    test_acc[name] = model.evaluate(x_test, y_test, verbose=0)[1]
    print(name, 'test acc:', round(test_acc[name], 4))

In [None]:
# Visualize validation accuracy & loss
plot_histories(hist, 'val_accuracy', 'Validation Accuracy (MLP Regularization Variants)')
plot_histories(hist, 'val_loss', 'Validation Loss (MLP Regularization Variants)')

### Result & Inference (to be written by student)
- Which variant overfit least and why?
- Which variant improved stability or convergence?
- Summarize the impact of L2, Dropout, and BatchNorm on generalization.
