In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, random_split
from torchvision import datasets, transforms
import matplotlib.pyplot as plt

In [None]:
# Step 1: Load and preprocess the MNIST dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Subtract 0.5 and divide by 0.5 -> map to [-1, 1]
])
mnist_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)

# Step 2: Split the dataset into training and validation sets
train_size = int(0.8 * len(mnist_dataset))
val_size = len(mnist_dataset) - train_size
train_dataset, val_dataset = random_split(mnist_dataset, [train_size, val_size])

# Step 3: Utilize Torch data loader
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=64, shuffle=False)

In [None]:
# Step 4: Define your neural network architecture
class SimpleNN(nn.Module):
    def __init__(self):
        super(SimpleNN, self).__init__()
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(28 * 28, 200)
        self.layer_norm1 = nn.LayerNorm(200)  # Layer Normalization
        self.dropout = nn.Dropout(0.5)
        self.relu = nn.ReLU()
        self.fc2 = nn.Linear(200, 10)

    def forward(self, x):
        x = self.flatten(x)
        x = self.fc1(x)
        x = self.layer_norm1(x)  # Apply Layer Normalization
        x = self.dropout(x)
        x = self.relu(x)
        x = self.fc2(x)
        return x

In [None]:
# Step 5: Initialize the model, optimizer, and loss function
model = SimpleNN()
optimizer = optim.SGD(model.parameters(), lr=0.01)
criterion = nn.CrossEntropyLoss()


In [None]:
# Step 7: Plot training and validation metrics
def plot_metrics(train_losses_, val_losses_, train_accuracies_, val_accuracies_):
    epochs = range(1, len(train_losses_) + 1)

    # Plotting Loss
    plt.figure(figsize=(12, 5))
    plt.subplot(1, 2, 1)
    plt.plot(epochs, train_losses_, label='Training Loss')
    plt.plot(epochs, val_losses_, label='Validation Loss')
    plt.title('Training and Validation Loss')
    plt.xlabel('Epochs')
    plt.ylabel('Loss')
    plt.legend()

    # Plotting Accuracy
    plt.subplot(1, 2, 2)
    plt.plot(epochs, train_accuracies_, label='Training Accuracy')
    plt.plot(epochs, val_accuracies_, label='Validation Accuracy')
    plt.title('Training and Validation Accuracy')
    plt.xlabel('Epochs')
    plt.ylabel('Accuracy')
    plt.legend()

    plt.tight_layout()
    plt.show()

In [None]:
def evaluate(modell, val_load, loss_fn):
    modell.eval()  # Set the model to evaluation mode
    val_running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    with torch.no_grad():
        for val_inputs, val_labels in val_load:
            val_inputs, val_labels = val_inputs.to(device), val_labels.to(device)

            val_outputs = modell(val_inputs)
            validation_loss = loss_fn(val_outputs, val_labels)
            val_running_loss += validation_loss.item()

            _, predicted = torch.max(val_outputs, 1)
            correct_predictions += (predicted == val_labels).sum().item()
            total_samples += val_labels.size(0)

    validation_loss = val_running_loss / len(val_load)
    validation_accuracy = correct_predictions / total_samples

    return validation_loss, validation_accuracy


def train(modell, train_load, optimize, loss_fn):
    modell.train()  # Set the model to training mode
    running_loss = 0.0
    correct_predictions = 0
    total_samples = 0

    for inputs, labels in train_load:
        inputs, labels = inputs.to(device), labels.to(device)

        optimize.zero_grad()  # Zero the gradients to avoid accumulation
        outputs = modell(inputs)
        loss = loss_fn(outputs, labels)
        loss.backward()
        optimize.step()

        running_loss += loss.item()

        # Calculate training accuracy
        _, predicted = torch.max(outputs, 1)
        correct_predictions += (predicted == labels).sum().item()
        total_samples += labels.size(0)

    # Calculate average loss and accuracy for the epoch
    average_loss = running_loss / len(train_load)
    train_acc = correct_predictions / total_samples

    return average_loss, train_acc
num_epochs = 10
train_losses, val_losses, train_accuracies, val_accuracies = [], [], [], []
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

for epoch in range(num_epochs):
    # Implement your training loop here and record the metrics
    # Example: (Note: Actual training loop code would be more involved)

    train_loss, train_accuracy = train(model, train_loader, optimizer, criterion)
    val_loss, val_accuracy = evaluate(model, val_loader, criterion)

    train_losses.append(train_loss)
    val_losses.append(val_loss)
    train_accuracies.append(train_accuracy)
    val_accuracies.append(val_accuracy)

In [None]:
# Step 9: Plot the metrics
plot_metrics(train_losses, val_losses, train_accuracies, val_accuracies)
# Define lists of learning rates and batch sizes to experiment with
learning_rates = [0.1, 0.01, 0.001, 0.0001, 0.00001]
batch_sizes = [32, 64, 128, 256, 512]

# Track results for different hyperparameters
results = []

# Loop through learning rates and batch sizes
for lr in learning_rates:
    for batch_size in batch_sizes:
        # Re-initialize the model, optimizer, and criterion for each iteration
        model = SimpleNN()
        optimizer = optim.SGD(model.parameters(), lr=lr)
        criterion = nn.CrossEntropyLoss()

        # DataLoader with the current batch size
        train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
        val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

        # Training loop (implement your training loop here)
        # ...

        # After training, evaluate the model on validation data
        val_loss, val_accuracy = evaluate(model, val_loader, criterion)

        # Store the results
        results.append({
            'learning_rate': lr,
            'batch_size': batch_size,
            'validation_loss': val_loss,
            'validation_accuracy': val_accuracy
        })

# Print or analyze the results
for result in results:
    print(f"Learning Rate: {result['learning_rate']}, Batch Size: {result['batch_size']}")
    print(f"Validation Loss: {result['validation_loss']}, Validation Accuracy: {result['validation_accuracy']}")
    print("=" * 50)