In [None]:
# Depth Ã— Width Sweep (MNIST)
We vary network **depth** (number of hidden layers) and **width** (neurons per layer) and track
train/val/test accuracy + loss to study generalization and overfitting.

In [None]:
# Set-up + Data
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Load + preprocess
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = (X_train / 255.0).reshape(-1, 784)
X_test  = (X_test / 255.0).reshape(-1, 784)
y_train = to_categorical(y_train, 10)
y_test  = to_categorical(y_test, 10)

print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)

In [None]:
# Model Builder
def build_mlp(depth: int, width: int, lr: float = 0.001):
    """
    depth = number of hidden layers
    width = neurons per hidden layer
    """
    model = Sequential()
    model.add(Dense(width, activation="relu", input_shape=(784,)))
    for _ in range(depth - 1):
        model.add(Dense(width, activation="relu"))
    model.add(Dense(10, activation="softmax"))
    model.compile(
        optimizer=Adam(learning_rate=lr),
        loss="categorical_crossentropy",
        metrics=["accuracy"]
    )
    return model