# Assignment 9.2 - Optimizers

Please submit your solution of this notebook in the Whiteboard at the corresponding Assignment entry as .ipynb-file and as .pdf. <br><br>
Please do **NOT** rename the file!

#### State both names of your group members here:
[Jane and John Doe]

In [1]:
# Daniel Thompson, Paola Gega

### Check out https://colab.google/ for free GPU resources!

## Task 9.2.1: Optimizer Comparison

* Set up a fully connected neural network model using `PyTorch`. It should be designed for classifying CIFAR10 images. The model should have the following layers: **(RESULT)**
    * Layer1: input_dimension --> 256
    * Layer2: 256 --> 128
    * Layer3: 128 --> 10
    * Use ReLU as your non-linear activation function
* Monitor the learning (loss-function) for different optimizers. Feel free to use `PyTorch`'s implementations for each: **(RESULT)**
    * SGD (with learning rates [0.1, 0.01, 0.001])
    * SGD (with learning rates [0.1, 0.01, 0.001] and 0.9 momentum)
    * Adam
    * AdamW
    * RMSprop

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import matplotlib.pyplot as plt


# Simple Fully Connected Model
class SimpleFC(nn.Module):
    def __init__(self):
        super(SimpleFC, self).__init__()
        self.fc1 = nn.Linear(32 * 32 * 3, 256)
        self.fc2 = nn.Linear(256, 128)
        self.fc3 = nn.Linear(128, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = x.view(-1, 32 * 32 * 3)  # Flatten the images
        l1_out = self.relu(self.fc1(x))
        l2_out = self.relu(self.fc2(l1_out))
        l3_out = self.fc3(l2_out)
        return l3_out

def get_optimizer(model, opt, lr):
    if opt=='sgd1':
        optimizer = optim.SGD(model.parameters(), lr=lr)
    elif opt=='sgd2':
        optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)
    elif opt=='Adam':
        optimizer = optim.Adam(model.parameters(), lr=lr)
    elif opt=='AdamW':
        optimizer = optim.AdamW(model.parameters(), lr=lr)
    elif opt=='RMSprop':
        optimizer = optim.RMSprop(model.parameters(), lr=lr)
    return optimizer

def load_data(batch_size=64):   # Set batch size according to your machine
    """Load CIFAR-10 dataset."""
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])

    train_set = datasets.CIFAR10(root='./data', train=True,
                                 download=True, transform=transform)
    train_loader = DataLoader(train_set, batch_size=batch_size,
                              shuffle=True, num_workers=2)

    test_set = datasets.CIFAR10(root='./data', train=False,
                                download=True, transform=transform)
    test_loader = DataLoader(test_set, batch_size=batch_size,
                             shuffle=False, num_workers=2)

    return train_loader, test_loader


def train(model, optimizer, criterion, train_loader, num_epochs=10):
    """Train the model."""
    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        for i, (inputs, labels) in enumerate(train_loader):
            inputs = inputs.view(-1, 32 * 32 * 3)  # Flatten the images
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}')

def test(model, test_loader):
    """Evaluate on test set."""
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.view(-1, 32 * 32 * 3)  # Flatten the images
            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    print(f'Test Accuracy: {accuracy:.2f}%')
    return accuracy


In [3]:
# Setup - use CPU for simplicity
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {device}\n")

# Load data
train_loader, test_loader = load_data(batch_size=64)

Using device: cpu



In [None]:
best_accuracy = 0
best_lr = 0
best_opt = None

num_epochs = 10

for lr in [0.1, 0.01, 0.001]:
    print("SGD with learning rate", lr)
    model = SimpleFC().to(device)
    optimizer = get_optimizer(model, opt='sgd1', lr=lr)
    criterion = nn.CrossEntropyLoss()
    # Train the model
    train(model, optimizer, criterion, train_loader, num_epochs=num_epochs)
    # Test the model
    accuracy = test(model, test_loader)
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_lr = lr
        best_opt = 'SGD'
    print()

for lr in [0.1, 0.01, 0.001]:
    print("SGD with learning rate", lr, "and .9 momentum")
    model = SimpleFC().to(device)
    optimizer = get_optimizer(model, opt='sgd2', lr=lr)
    criterion = nn.CrossEntropyLoss()
    # Train the model
    train(model, optimizer, criterion, train_loader, num_epochs=num_epochs)
    # Test the model
    accuracy = test(model, test_loader)
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_lr = lr
        best_opt = 'SGD with .9 momentum'
    print()

for opt in ['Adam', 'AdamW', 'RMSprop']:
    lr = 0.01
    print(opt, " with learning rate", lr)
    model = SimpleFC().to(device)
    optimizer = get_optimizer(model, lr=lr, opt=opt)
    criterion = nn.CrossEntropyLoss()
    # Train the model
    train(model, optimizer, criterion, train_loader, num_epochs=num_epochs)
    # Test the model
    accuracy = test(model, test_loader)
    if accuracy > best_accuracy:
        best_accuracy = accuracy
        best_lr = lr
        best_opt = opt
    print()

SGD with learning rate  0.1
Epoch [1/10], Loss: 1.6846
Epoch [2/10], Loss: 1.4388
Epoch [3/10], Loss: 1.3227
Epoch [4/10], Loss: 1.2332
Epoch [5/10], Loss: 1.1559
Epoch [6/10], Loss: 1.0829
Epoch [7/10], Loss: 1.0185
Epoch [8/10], Loss: 0.9451
Epoch [9/10], Loss: 0.8840
Epoch [10/10], Loss: 0.8243
Test Accuracy: 48.01%

SGD with learning rate  0.01
Epoch [1/10], Loss: 2.0220
Epoch [2/10], Loss: 1.7431
Epoch [3/10], Loss: 1.6279
Epoch [4/10], Loss: 1.5535
Epoch [5/10], Loss: 1.4927
Epoch [6/10], Loss: 1.4410
Epoch [7/10], Loss: 1.3933
Epoch [8/10], Loss: 1.3494
Epoch [9/10], Loss: 1.3079
Epoch [10/10], Loss: 1.2717
Test Accuracy: 51.35%

SGD with learning rate  0.001
Epoch [1/10], Loss: 2.2820
Epoch [2/10], Loss: 2.2313
Epoch [3/10], Loss: 2.1668
Epoch [4/10], Loss: 2.1026
Epoch [5/10], Loss: 2.0476
Epoch [6/10], Loss: 1.9987
Epoch [7/10], Loss: 1.9557
Epoch [8/10], Loss: 1.9179
Epoch [9/10], Loss: 1.8844
Epoch [10/10], Loss: 1.8554
Test Accuracy: 35.73%

SGD with learning rate  0.1 and

In [5]:
print("Best optimizer fount:", best_opt)
print("lr = ", best_lr)

Best optimizer fount: SGD with .9 momentum
lr =  0.01


## Congratz, you made it! :)