In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, Subset

from torchvision.datasets import MNIST
import torchvision.transforms as T

from sklearn.model_selection import train_test_split

In [2]:
!nvidia-smi

Tue Jul 19 18:19:41 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 510.73.05    Driver Version: 510.73.05    CUDA Version: 11.6     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:01:00.0 Off |                  N/A |
| N/A   54C    P8    16W /  N/A |      8MiB /  6144MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

In [3]:
# parameters
DEVICE = ("cuda:0" if torch.cuda.is_available() else "cpu")
NUM_EPOCHS = 10
ALPHA = 0.01
VAL_SIZE = 10_000
BATCH_SIZE=32

NUM_FEATURES = 28*28
HIDDEN_1 = 100
HIDDEN_2 = 50
NUM_LABELS = 10

In [4]:
train_val_dataset = MNIST(root='../datasets/', train=True, download=True, transform=T.ToTensor())
test_dataset = MNIST(root='../datasets/', train=False, download=False, transform=T.ToTensor())

In [5]:
stratify = train_val_dataset.targets.numpy()
train_idxs, val_idxs = train_test_split(
                                range(len(train_val_dataset)),
                                stratify=stratify,
                                test_size=VAL_SIZE)

In [6]:
train_dataset = Subset(dataset=train_val_dataset, indices=train_idxs)
val_dataset = Subset(dataset=train_val_dataset, indices=val_idxs)

In [7]:
train_dataloader = DataLoader(dataset=train_dataset, 
                              batch_size=BATCH_SIZE,
                              shuffle=True,
                              num_workers=4,
                              drop_last=True)

val_dataloader = DataLoader(dataset=val_dataset, 
                              batch_size=BATCH_SIZE,
                              num_workers=4)

test_dataloader = DataLoader(dataset=test_dataset, 
                              batch_size=BATCH_SIZE,
                              num_workers=4)

In [8]:
class SigmoidModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(NUM_FEATURES, HIDDEN_1),
            nn.Sigmoid(),
            nn.Linear(HIDDEN_1, HIDDEN_2),
            nn.Sigmoid(),
            nn.Linear(HIDDEN_2, NUM_LABELS)
        )
        
    def forward(self, features):
        return self.layers(features)
    
class ReluModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.layers = nn.Sequential(
            nn.Linear(NUM_FEATURES, HIDDEN_1),
            nn.ReLU(),
            nn.Linear(HIDDEN_1, HIDDEN_2),
            nn.ReLU(),
            nn.Linear(HIDDEN_2, NUM_LABELS)
        )
        
    def forward(self, features):
        return self.layers(features)

In [9]:
def calculate_performance(model, criterion, dataloader):
    model.eval()
    num_samples = 0
    num_correct = 0
    loss_sum = 0
    
    with torch.inference_mode():
        for batch_idx, (features, labels) in enumerate(dataloader):
            features = features.view(-1, NUM_FEATURES).to(DEVICE)
            labels = labels.to(DEVICE)
            outputs = model(features)
            
            predictions = outputs.max(dim=1)[1]
            num_correct += (predictions == labels).sum().item()
            
            loss = criterion(outputs, labels)
            loss_sum += loss.cpu().item()
            num_samples += len(features)
    return loss_sum/num_samples, num_correct/num_samples

In [10]:
def train(model):
    optimizer = optim.SGD(model.parameters(), lr=ALPHA)
    # combine softmax with cross entropy loss simultaneously, no need to attach softmax to the model
    criterion = nn.CrossEntropyLoss(reduction="sum")
    for epoch in range(NUM_EPOCHS):
        model.train()
        for batch_idx, (features, labels) in enumerate(train_dataloader):
            features = features.view(-1, NUM_FEATURES).to(DEVICE)
            labels = labels.to(DEVICE)

            # empty the gradients
            optimizer.zero_grad()
            # forward pass
            outputs = model(features)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

        val_loss, val_acc = calculate_performance(model, criterion, val_dataloader)
        print(f'Epoch: {epoch+1} | Validation Loss: {val_loss} | Validation Accuracy: {val_acc}')

In [11]:
sigmoidModel = SigmoidModel().to(DEVICE)
reluModel = ReluModel().to(DEVICE)

In [12]:
train(sigmoidModel)

Epoch: 1 | Validation Loss: 0.34811560919284823 | Validation Accuracy: 0.8988
Epoch: 2 | Validation Loss: 0.22348693027496339 | Validation Accuracy: 0.9339
Epoch: 3 | Validation Loss: 0.17333723184764385 | Validation Accuracy: 0.9484
Epoch: 4 | Validation Loss: 0.15025575025081633 | Validation Accuracy: 0.9545
Epoch: 5 | Validation Loss: 0.13564708149731158 | Validation Accuracy: 0.9588
Epoch: 6 | Validation Loss: 0.12167281376868487 | Validation Accuracy: 0.9628
Epoch: 7 | Validation Loss: 0.10694371696859598 | Validation Accuracy: 0.969
Epoch: 8 | Validation Loss: 0.1000077493570745 | Validation Accuracy: 0.9708
Epoch: 9 | Validation Loss: 0.09803582267686725 | Validation Accuracy: 0.9715
Epoch: 10 | Validation Loss: 0.10075653049424291 | Validation Accuracy: 0.9705


In [13]:
train(reluModel)

Epoch: 1 | Validation Loss: 0.16223896453380585 | Validation Accuracy: 0.9473
Epoch: 2 | Validation Loss: 0.1417267472974956 | Validation Accuracy: 0.9551
Epoch: 3 | Validation Loss: 0.11666484325192869 | Validation Accuracy: 0.9652
Epoch: 4 | Validation Loss: 0.11132472120095044 | Validation Accuracy: 0.9671
Epoch: 5 | Validation Loss: 0.12776478067282587 | Validation Accuracy: 0.9657
Epoch: 6 | Validation Loss: 0.1063403736966895 | Validation Accuracy: 0.9709
Epoch: 7 | Validation Loss: 0.11310694124647416 | Validation Accuracy: 0.9709
Epoch: 8 | Validation Loss: 0.11099657119425246 | Validation Accuracy: 0.9713
Epoch: 9 | Validation Loss: 0.11936425841017627 | Validation Accuracy: 0.97
Epoch: 10 | Validation Loss: 0.11850827669205027 | Validation Accuracy: 0.9718
