# GPU accelerated neural network using Torch

In [1]:
%matplotlib inline
%load_ext lab_black
%load_ext autoreload
%autoreload 2

In [2]:
import random
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchvision
from torchvision.datasets import MNIST
import torchvision.transforms as transforms
from torch.utils.data.sampler import SubsetRandomSampler
from torch.utils.data.dataloader import DataLoader

In [4]:
from sklearn.model_selection import train_test_split

In [7]:
BATCH_SIZE = 64
N_EPOCHS = 100
WIDTH = 28
HEIGHT = 28
N_CLASSES = 10
LEARNING_RATE = 1e-3
HIDDEN_SIZE = 32

In [16]:
dataset = MNIST(
    root="../data/", train=True, download=True, transform=transforms.ToTensor()
)

In [19]:
train_indices, val_indices = train_test_split(np.arange(0, len(dataset)))

train_sampler = SubsetRandomSampler(train_indices)
train_loader = DataLoader(dataset, BATCH_SIZE, sampler=train_sampler)

val_sampler = SubsetRandomSampler(val_indices)
val_loader = DataLoader(dataset, BATCH_SIZE, sampler=val_sampler)

In [6]:
class MnistModel(nn.Module):
    def __init__(self, input_size: int, hidden_size: int, num_classes: int):
        super().__init__()
        self.input_size = input_size
        self.layer_1 = nn.Linear(input_size, hidden_size)
        self.layer_2 = nn.Linear(hidden_size, num_classes)

    def forward(self, X):
        X = X.view(X.size(0), -1)
        X = self.layer_1(X)
        X = F.relu(X)
        return self.layer_2(X)

In [8]:
model = MnistModel(WIDTH * HEIGHT, HIDDEN_SIZE, N_CLASSES)

In [14]:
def get_default_device():
    """Pick GPU if available, else CPU"""
    if torch.cuda.is_available():
        return torch.device("cuda")
    else:
        return torch.device("cpu")


def to_device(data, device):
    """Move tensor(s) to chosen device"""
    if isinstance(data, (list, tuple)):
        return [to_device(x, device) for x in data]
    return data.to(device, non_blocking=True)

In [15]:
class DeviceDataLoader:
    def __init__(self, data_loader, device):
        self.data_loader = data_loader
        self.device = device

    def __iter__(self):
        for batch in self.data_loader:
            yield to_device(batch, self.device)

    def __len__(self):
        return len(self.data_loader)

In [20]:
train_loader = DeviceDataLoader(train_loader, get_default_device())
val_loader = DeviceDataLoader(val_loader, get_default_device())

In [22]:
def accuracy(y_preds: torch.Tensor, y_true: torch.Tensor):
    return torch.sum(y_preds == y_true).item() / y_true.numel()

In [None]:
optimiser = torch.optim.SGD(model.parameters(), lr=LEARNING_RATE)
history = {
    "loss": [],
    "acc": [],
    "val_loss": [],
    "val_acc": [],    
}
for i in range(N_EPOCHS):
    _loss = []
    _acc = []
    _val_loss = []
    _val_acc = []
    _batch_sizes = []
    _val_batch_sizes = []
    

    # Training
    for Xb, yb in train_loader:
        logits = model(Xb)
        loss = F.cross_entropy(logits, yb)
        loss.backward()
        optimiser.step()
        optimiser.zero_grad()
        # Metrics
        _loss.append(loss.detach().numpy())
        y_prob = F.softmax(logits, dim=1)
        y_pred_prob, y_preds = torch.max(y_prob, dim=1)
        acc = accuracy(y_preds, yb)
        _acc.append(acc)
        _batch_sizes.append(len(Xb))

    # Validation
    with torch.no_grad():
        for Xb, yb in val_loader:
            logits = model(Xb)
            val_loss = F.cross_entropy(logits, yb).detach().numpy()
            _val_loss.append(val_loss)
            y_prob = F.softmax(logits, dim=1)
            y_pred_prob, y_preds = torch.max(y_prob, dim=1)
            val_acc = accuracy(y_preds, yb)
            _val_acc.append(val_acc)
            _val_batch_sizes.append(len(Xb))
            
        
        # Weighted sum of losses to take into account non-equal batch sizes
        _loss = np.sum(np.multiply(_loss, _batch_sizes)) / np.sum(_batch_sizes)
        _val_loss = np.sum(np.multiply(_val_loss, _val_batch_sizes)) / np.sum(_val_batch_sizes)
        
        history["loss"].append(_loss)
        history["acc"].append(torch.Tensor(_acc).mean().item())
        history["val_acc"].append(torch.Tensor(_val_acc).mean().item())
        history["val_loss"].append(_val_loss)
        print(f"Epoch: {i + 1}/{N_EPOCHS}, acc: {history['acc'][-1]:.4f}, loss: {history['loss'][-1]:.4f}, val_acc: {history['val_acc'][-1]:.4f},  val_loss: {history['val_loss'][-1]:.4f}\r", end="")

In [None]:
f, ax = plt.subplots(figsize=(12, 6))
metric = "acc"
sns.set_style("whitegrid")
ax.plot(
    np.arange(0, len(history[f"{metric}"])), history[f"{metric}"], label=f"{metric}"
)
ax.plot(
    np.arange(0, len(history[f"val_{metric}"])),
    history[f"val_{metric}"],
    label=f"Validation {metric}",
)
ax.set_xlabel("Epoch")
ax.set_ylabel("Loss")
plt.legend()