In [9]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

from time import time


import matplotlib.pyplot as plt
from tqdm import tqdm

In [10]:
# Load MNIST dataset

transform = transforms.Compose([
    transforms.ToTensor(),  # Convert images to PyTorch tensors and scale to [0,1]
    transforms.Normalize((0.5,), (0.5,))  # Normalize to mean=0.5, std=0.5
])

trainset = torchvision.datasets.MNIST(root="./data", train=True, download=True, transform=transform)
valset = torchvision.datasets.MNIST(root="./data", train=False, download=True, transform=transform)

trainloader = DataLoader(trainset, batch_size=64, shuffle=True)
valloader = DataLoader(valset, batch_size=64, shuffle=False)

In [11]:
class MyMLP(nn.Module):
    
    def __init__(self):
        super(MyMLP, self).__init__()

        self.layers = nn.Sequential(
            nn.Linear(28*28, 64),
            nn.ReLU(),
            nn.Linear(64, 10)
        )


    def forward(self, x):
        
        return self.layers(x)

In [12]:
# Define model and parameters

model_mlp = MyMLP()
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_mlp.to(device)

num_epochs = 10
learning_rate = 0.001
optimizer = torch.optim.Adam(model_mlp.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [13]:
def train_and_evaluate(model, trainloader, valloader, device, num_epochs, optimizer, criterion):

    print(f'Training model: {model.__class__.__name__} with Loss function: {criterion} and Optimizer: {optimizer.__class__.__name__}')
    print(f'Using device: {device}' if device=='cpu' else f'Using device: {device} ({torch.cuda.get_device_name()})\n')

    for epoch in range(num_epochs):

# Train
        model.train()
        train_loss = 0
        train_acc = 0
        
        start = time()

        with tqdm(trainloader) as pbar:
            for i, (images, labels) in enumerate(pbar):
                images = images.view(-1, 28 * 28).to(device)
                optimizer.zero_grad()
                output = model(images)

                loss = criterion(output, labels.to(device))
                loss.backward()
                optimizer.step()
                train_loss += loss.item()

                accuracy = (output.argmax(dim=1) == labels.to(device)).float().mean()
                train_acc += accuracy.item()

                pbar.set_postfix(loss=loss.item(), accuracy=accuracy.item(), lr=optimizer.param_groups[0]['lr'])
        
        train_loss /= len(trainloader)
        train_acc /= len(trainloader)

        print(f"Epoch {epoch + 1}, Train Loss: {train_loss}, Train Accuracy: {train_acc}, Time: {time()-start:.2f}")


# Validation
        model.eval()
        val_loss = 0
        val_acc = 0

        with torch.no_grad():
            for images, labels in valloader:
                images = images.view(-1, 28 * 28).to(device)
                output = model(images)

                val_loss += criterion(output, labels.to(device)).item()
                val_acc += ((output.argmax(dim=1) == labels.to(device)).float().mean().item())

        val_loss /= len(valloader)
        val_acc /= len(valloader)

        print(f"Epoch {epoch + 1}, Val Loss: {val_loss}, Val Accuracy: {val_acc}\n")

In [14]:
train_and_evaluate(model_mlp, trainloader, valloader, device, num_epochs, optimizer, criterion)

Training model: MyMLP with Loss function: CrossEntropyLoss() and Optimizer: Adam
Using device: cuda (AMD Radeon RX 7800 XT)



100%|██████████| 938/938 [00:07<00:00, 130.12it/s, accuracy=1, loss=0.075, lr=0.001]     


Epoch 1, Train Loss: 0.40776860153179434, Train Accuracy: 0.8825126599147122, Time: 7.21
Epoch 1, Val Loss: 0.24400364491305535, Val Accuracy: 0.9298367834394905



100%|██████████| 938/938 [00:07<00:00, 131.27it/s, accuracy=0.938, loss=0.142, lr=0.001] 


Epoch 2, Train Loss: 0.22170722147009012, Train Accuracy: 0.9353178304904051, Time: 7.15


KeyboardInterrupt: 

In [None]:
def get_accuracy(logit, target):
    corrects = (torch.max(logit, 1)[1].view(target.size()).data == target.data).sum()   # correct predictions
    accuracy = 100.0 * corrects / target.size(0)                                        # % of correct predictions
    return accuracy.item()


def compute_weight_norm(model):     # extracting the weight norms
    norm = 0.0
    for name, param in model.named_parameters():
        if "weight" in name:
            norm += torch.norm(param.data, p=2)
    return norm.cpu().item()


def train_model(model, num_epochs, trainloader, criterion, optimizer, device):

    print(f'Training model: {model.__class__.__name__} with Loss function: {criterion} and Optimizer: {optimizer.__class__.__name__}\n')

    model = model.to(device)

    losses = []
    accs = []

    norms = []

    for epoch in range(num_epochs):
        train_running_loss = 0.0
        train_acc = 0.0

        # Set the model to training mode
        model = model.train()
        start = time()
        ## training step
        for i, (images, labels) in enumerate(trainloader):

            images = images.to(device)
            labels = labels.to(device)

            ## forward + backprop + loss
            logits = model(images)
            loss = criterion(logits, labels)

            # Reset the gradients to zero
            optimizer.zero_grad()

            loss.backward()

            ## update model params
            optimizer.step()

            train_running_loss += loss.item()
            train_acc += get_accuracy(logits, labels)


        losses.append(train_running_loss / i)
        accs.append(train_acc/i)

        norms.append(compute_weight_norm(model))
        model.eval()
        print(f"\tEpoch: [{epoch+1}/{num_epochs}] | Loss: {train_running_loss:.4f} | Train Accuracy: {train_acc:.4f} | Time: {time()-start:.2f}")

    print('\n')
    return losses, accs, norms

In [None]:
'''


# Training the model
def train_model(num_epochs):
    model.train()
    for epoch in range(num_epochs):
        total_loss = 0
        for images, labels in train_loader:
            images = images.to(device)
            labels = labels.to(device)
            images = images.view(images.shape[0], -1)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}')

train_model(10)

# Testing the model
def test_model():
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)
            images = images.view(images.shape[0], -1)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        print(f'Test Accuracy: {accuracy:.2f}%')

test_model()'''

"\n\n\n# Training the model\ndef train_model(num_epochs):\n    model.train()\n    for epoch in range(num_epochs):\n        total_loss = 0\n        for images, labels in train_loader:\n            images = images.to(device)\n            labels = labels.to(device)\n            images = images.view(images.shape[0], -1)\n\n            optimizer.zero_grad()\n            outputs = model(images)\n            loss = criterion(outputs, labels)\n            loss.backward()\n            optimizer.step()\n            total_loss += loss.item()\n        print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {total_loss/len(train_loader):.4f}')\n\ntrain_model(10)\n\n# Testing the model\ndef test_model():\n    model.eval()\n    with torch.no_grad():\n        correct = 0\n        total = 0\n        for images, labels in test_loader:\n            images = images.to(device)\n            labels = labels.to(device)\n            images = images.view(images.shape[0], -1)\n            outputs = model(images)\n        