In [None]:
# Learning Rate and Epoch Sensitivity
We test how learning rate and number of epochs affect convergence and generalization.
Goal: show stable vs unstable optimization regimes.

In [None]:
# Data + Model Builder
import numpy as np
import matplotlib.pyplot as plt

from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = (X_train / 255.0).reshape(-1, 784)
X_test  = (X_test / 255.0).reshape(-1, 784)
y_train = to_categorical(y_train, 10)
y_test  = to_categorical(y_test, 10)

def build_mlp(depth: int, width: int, lr: float):
    model = Sequential()
    model.add(Dense(width, activation="relu", input_shape=(784,)))
    for _ in range(depth - 1):
        model.add(Dense(width, activation="relu"))
    model.add(Dense(10, activation="softmax"))
    model.compile(optimizer=Adam(learning_rate=lr),
                  loss="categorical_crossentropy",
                  metrics=["accuracy"])
    return model

In [None]:
# Learning Rate Experiment: Fixed Architecture
depth = 2
width = 128
lrs = [0.001, 0.01, 0.1, 1.0]
EPOCHS = 10
BATCH = 128

histories = {}

for lr in lrs:
    print("Training lr =", lr)
    model = build_mlp(depth, width, lr)
    hist = model.fit(X_train, y_train, validation_split=0.2,
                     epochs=EPOCHS, batch_size=BATCH, verbose=0)
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
    histories[lr] = (hist.history, test_acc, test_loss)
    print("test_acc =", round(test_acc, 4), "test_loss =", round(test_loss, 4))

In [None]:
# Plot losses
plt.figure()
for lr in lrs:
    h, _, _ = histories[lr]
    plt.plot(h["val_loss"], marker="o", label=f"lr={lr}")
plt.xlabel("Epoch")
plt.ylabel("Validation Loss")
plt.title("Learning Rate Sensitivity (Val Loss)")
plt.legend()
plt.show()

In [None]:
# Epoch experiment (fixed lr)
epochs_list = [5, 10, 20, 40]
lr = 0.001

epoch_results = []
for ep in epochs_list:
    model = build_mlp(depth=2, width=128, lr=lr)
    hist = model.fit(X_train, y_train, validation_split=0.2,
                     epochs=ep, batch_size=BATCH, verbose=0)
    test_loss, test_acc = model.evaluate(X_test, y_test, verbose=0)
    epoch_results.append((ep, test_acc, test_loss, hist.history["val_loss"][-1]))

epoch_results

In [None]:
# Plot test acc vs epochs
eps = [x[0] for x in epoch_results]
tacc = [x[1] for x in epoch_results]

plt.figure()
plt.plot(eps, tacc, marker="o")
plt.xlabel("Epochs")
plt.ylabel("Test Accuracy")
plt.title("Epoch Sensitivity (Test Accuracy)")
plt.show()