## Imports

In [None]:
import sys
sys.path.append("..")
from src import preprocess_mnist
from src import NNModel
from src import train_model
from src import plot_training_curves
from src import detect_convergence, plot_convergence
import torch
import torch.nn as nn
import matplotlib.pyplot as plt

## Displaying Non-Flattened MNIST Images

In [None]:
train_loader, val_loader, test_loader = preprocess_mnist(flatten=False)

images, labels = next(iter(train_loader))
fig, axes = plt.subplots(1, 8, figsize=(12, 2))
for i in range(8):
    axes[i].imshow(images[i].squeeze(), cmap='gray')
    axes[i].set_title(str(labels[i].item()))
    axes[i].axis('off')
plt.show()


## Displaying Flattened MNIST Data Information

In [None]:
train_loader, val_loader, test_loader = preprocess_mnist(batch_size=64, augment=False, flatten=True)

images, labels = next(iter(train_loader))
print(f"Images batch shape: {images.shape}")
print(f"Labels batch shape: {labels.shape}")
print(f"Example labels: {labels[:10]}")

In [None]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Model, loss, optimizer
model = NNModel().to(device)
model.apply(model._init_weights)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.01)

epochs = 20

# Train
history = train_model(model, train_loader, val_loader, criterion, optimizer, epochs=epochs, device=device)

# Plot
plot_training_curves(history, epochs=epochs)
conv_epoch = detect_convergence(history["val_loss_mean"])
plot_convergence(history["train_loss_mean"], history["val_loss_mean"], conv_epoch)


# Analysis


### 1. Learning Rate Analysis

In [None]:
# Test values : [0.001, 0.01, 0.1, 1.0]
learning_rates = [0.001, 0.01, 0.1, 1.0]
epochs = 10
results_lr = {}
for lr in learning_rates:
    print(f"Training with learning rate: {lr}")
    model = NNModel().to(device)
    model.apply(model._init_weights)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
    result = train_model(model, train_loader, val_loader, criterion, optimizer, epochs=epochs, device=device)
    results_lr[lr] = result
    plot_training_curves(result, epochs=epochs)
    conv_epoch = detect_convergence(result["val_loss_mean"])
    plot_convergence(result["train_loss_mean"], result["val_loss_mean"], conv_epoch)

# best lr 
best_lr = None
best_acc = 0.0

for lr, history in results_lr.items():
    val_acc = history["val_acc_mean"][-1]
    print(f"LR {lr:<5} → Final Val Acc: {val_acc*100:.2f}%")
    if val_acc > best_acc:
        best_acc = val_acc
        best_lr = lr

print(f"\nBest Learning Rate: {best_lr} with Val Acc = {best_acc*100:.2f}%")


### 2. Batch Size Analysis


In [None]:
# Test Values : [16, 32, 64, 128]
batch_sizes = [16, 32, 64, 128]
epochs = 10
results_bs = {} 

for bs in batch_sizes:  
    print(f"Training with batch size: {bs}")
    train_loader_bs, val_loader_bs, test_loader_bs = preprocess_mnist(batch_size=bs, augment=False, flatten=True)
   
    # model, loss, optimizer
    model = NNModel().to(device)
    model.apply(model._init_weights)
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
    
    
    result = train_model(model, train_loader_bs, val_loader_bs, criterion, optimizer, epochs=epochs, device=device)
    results_bs[bs] = result

 
# best batch size
best_bs = None
best_acc = 0.0  
for bs, history in results_bs.items():
    val_acc = history["val_acc_mean"][-1]
    print(f"BS {bs:<5} → Final Val Acc: {val_acc*100:.2f}%")
    if val_acc > best_acc:
        best_acc = val_acc
        best_bs = bs

print(f"\nBest Batch Size: {best_bs} with Val Acc = {best_acc*100:.2f}%")


### 3. Architecture Analysis

In [None]:
layers = [2, 3, 4, 5]
neurons_per_layer = [64, 128, 256, 512]
epochs = 10
results_arch = {}

for num_layers in layers:
    for neurons in neurons_per_layer:
        print(f"Training with {num_layers} layers and {neurons} neurons per layer")

        model = NNModel(hidden_sizes=[neurons]*num_layers).to(device)
        model.apply(model._init_weights)
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.SGD(model.parameters(), lr=0.01)
        
        result = train_model(model, train_loader, val_loader, criterion, optimizer, epochs=epochs, device=device)
        results_arch[(num_layers, neurons)] = result
