# Setup

Pytorch needs pythont 3.10

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader, Dataset

import matplotlib.pyplot as plt

import numpy as np

import pandas as pd

In [2]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

print(device)

cuda


# Dataset

In [3]:
transforms = transforms.Compose([
    transforms.RandomRotation(15),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./../data', train=True, transform=transforms, download=True)
test_dataset = datasets.MNIST(root='./../data', train=False, transform=transforms, download=True)

In [4]:
batch_size = 100
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True, num_workers=2)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False, num_workers=2)

# Define Utility Functions

### Print Model Summary

In [5]:
def print_model_summary(model):
    def layer_summary(layer):
        output_shape = None
        if hasattr(layer, 'out_channels'):
            output_shape = (layer.out_channels, "H_out", "W_out")
        elif hasattr(layer, 'out_features'):
            output_shape = (layer.out_features)
        elif isinstance(layer, torch.nn.modules.pooling._MaxPoolNd):
            output_shape = (layer.kernel_size, "H_out", "W_out")

        num_params = sum(p.numel() for p in layer.parameters() if p.requires_grad)
        return output_shape, num_params

    model_name = model.__class__.__name__
    print(f"'{model_name}' Model Summary:")

    print("="*75)
    print(f"{'Layer':<30} {'Output Shape':<30} {'Param #':<15}")
    print("="*75)
    
    for name, layer in model.named_children():
        output_shape, num_params = layer_summary(layer)
        print(f"{name:<30} {str(output_shape):<30} {num_params:<15}")
    
    total_params = sum(p.numel() for p in model.parameters())
    trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

    print("="*75)
    print(f"Total params:          {total_params}")
    print(f"Trainable params:      {trainable_params}")
    print(f"Non-trainable params:  {total_params - trainable_params}")

### Train and Test Functions

In [6]:
def train(model, criterion, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        for i, (images, labels) in enumerate(train_loader):
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            if (i+1) % 100 == 0:
                print(f'Epoch {epoch+1}/{epochs}, Step {i+1}/{len(train_loader)}, Loss: {loss.item()}')

def test(model):
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in test_loader:
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        print(f'Accuracy: {correct / total * 100}%')

# Softmax Classifier

from https://gist.github.com/dinhnguyenduc1994/b5881bf922054afb311b0c9a053c0357

In [7]:
class SoftMax(nn.Module):
    def __init__(self):
        super(SoftMax, self).__init__()
        self.linear = nn.Linear(28 * 28, 10)
        
    def forward(self, x):
        x = x.reshape(x.size(0), -1)
        z = self.linear(x)
        return z
    
sl_model = SoftMax().to(device)
sl_optimizer = torch.optim.SGD(sl_model.parameters(), lr=0.1)
sl_criterion = nn.CrossEntropyLoss()

print_model_summary(sl_model)

'SoftMax' Model Summary:
Layer                          Output Shape                   Param #        
linear                         10                             7850           
Total params:          7850
Trainable params:      7850
Non-trainable params:  0


In [8]:
train(sl_model, sl_criterion, sl_optimizer, epochs=2)
test(sl_model)

Epoch 1/2, Step 100/600, Loss: 0.5822100639343262
Epoch 1/2, Step 200/600, Loss: 0.6330122947692871
Epoch 1/2, Step 300/600, Loss: 0.3382495641708374
Epoch 1/2, Step 400/600, Loss: 0.4639661908149719
Epoch 1/2, Step 500/600, Loss: 0.25460952520370483
Epoch 1/2, Step 600/600, Loss: 0.6904569864273071
Epoch 2/2, Step 100/600, Loss: 0.31767863035202026
Epoch 2/2, Step 200/600, Loss: 0.4055829644203186
Epoch 2/2, Step 300/600, Loss: 0.32371482253074646
Epoch 2/2, Step 400/600, Loss: 0.5221964120864868
Epoch 2/2, Step 500/600, Loss: 0.5694730281829834
Epoch 2/2, Step 600/600, Loss: 0.37746086716651917
Accuracy: 88.69%


# Simple CNN

In [9]:
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()

        self.conv1 = nn.Conv2d(in_channels=1, out_channels=32, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(in_channels=32, out_channels=64, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(torch.relu(self.conv1(x)))
        x = self.pool(torch.relu(self.conv2(x)))

        x = x.view(-1, 64 * 7 * 7) # Flatten the tensor
        x = torch.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
cnn_model = SimpleCNN().to(device)
cnn_criterion = nn.CrossEntropyLoss()
cnn_optimizer = optim.SGD(cnn_model.parameters(), lr=0.001, momentum=0.9)

print_model_summary(cnn_model)

'SimpleCNN' Model Summary:
Layer                          Output Shape                   Param #        
conv1                          (32, 'H_out', 'W_out')         320            
conv2                          (64, 'H_out', 'W_out')         18496          
pool                           (2, 'H_out', 'W_out')          0              
fc1                            128                            401536         
fc2                            10                             1290           
Total params:          421642
Trainable params:      421642
Non-trainable params:  0


In [10]:
train(cnn_model, cnn_criterion, cnn_optimizer, epochs=2)
test(cnn_model)

Epoch 1/2, Step 100/600, Loss: 2.196310043334961
Epoch 1/2, Step 200/600, Loss: 1.7481510639190674
Epoch 1/2, Step 300/600, Loss: 0.8646388053894043
Epoch 1/2, Step 400/600, Loss: 0.6793437004089355
Epoch 1/2, Step 500/600, Loss: 0.4401340186595917
Epoch 1/2, Step 600/600, Loss: 0.44913265109062195
Epoch 2/2, Step 100/600, Loss: 0.3419264853000641
Epoch 2/2, Step 200/600, Loss: 0.34555405378341675
Epoch 2/2, Step 300/600, Loss: 0.36979442834854126
Epoch 2/2, Step 400/600, Loss: 0.32078173756599426
Epoch 2/2, Step 500/600, Loss: 0.2504320740699768
Epoch 2/2, Step 600/600, Loss: 0.1855747401714325
Accuracy: 92.16%
