# Notebook 3: Convolution Neural Network

In [None]:
from collections import defaultdict

import matplotlib.pyplot as plt
import numpy as np
import torch
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from torchvision import datasets
from torchvision.transforms import ToTensor

In [None]:
# using CUDA
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

In [None]:
X, y = load_digits(return_X_y=True)
fig, axes = plt.subplots(
    nrows=1,
    ncols=5,
    figsize=(20, 5),
    sharex=True,
)
plt.gray()
for i, axis in enumerate(axes):
    axis.matshow(X[i, :].reshape(8, 8))
plt.show()

In [None]:
class CNN(torch.nn.Module):
    def __init__(self, linear_layer_size):
        super(CNN, self).__init__()
        self.linear_layer_size = linear_layer_size

        self.conv1 = torch.nn.Conv2d(1, 32, kernel_size=2)
        self.conv2 = torch.nn.Conv2d(32, 16, kernel_size=2)
        self.linear = torch.nn.Linear(linear_layer_size, 10)

    def forward(self, x):
        batch_size = x.size(0)

        relu = torch.nn.ReLU()
        maxpool = torch.nn.MaxPool2d(kernel_size=2)

        x = self.conv1(x)
        x = maxpool(relu(x))

        x = self.conv2(x)
        x = maxpool(relu(x))

        x = x.reshape(batch_size, self.linear_layer_size)
        x = self.linear(x)
        return x

In [None]:
def print_model(model):
    """
    A simple functon that prints out a PyTorch model's structural details
    """
    # Print the number of parameters in the model
    parameter_count = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("In total, this network has ", parameter_count, " parameters")

In [None]:
def run_one_epoch(model, optimizer, X, y, train=True):
    if train:
        model.train()
        optimizer.zero_grad()
    else:
        model.eval()

    output = model(X).squeeze()
    acc = torch.sum(torch.argmax(output, dim=1) == y) / y.size(0)
    loss = torch.nn.CrossEntropyLoss()(output, y)

    if train:
        loss.backward()
        optimizer.step()

    # Detach tells torch to stop tracking a tensor's gradients
    return acc.detach(), loss.detach()

In [None]:
X, y = load_digits(return_X_y=True)
X = torch.tensor(X.reshape(-1, 1, 8, 8)).to(dtype=torch.float32, device=device)
y = torch.tensor(y).to(dtype=torch.long, device=device)
mnist = train_test_split(X, y, test_size=0.2)
print("train:", mnist[0].shape)
print("test: ", mnist[1].shape)

In [None]:
def demo(model, data, n_epochs=100, verbose=False, device="cpu"):
    print(f"Using device: {device}")
    X_train, X_test, y_train, y_test = data
    X_train, X_test = X_train.to(device), X_test.to(device)
    y_train, y_test = y_train.to(device), y_test.to(device)
    model = model.to(device)  # Move model to device

    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

    results = defaultdict(list)
    for i in range(n_epochs):
        train_acc, train_loss = run_one_epoch(model, optimizer, X_train, y_train, train=True)
        test_acc, test_loss = run_one_epoch(model, optimizer, X_test, y_test, train=False)

        results["train_acc"].append(train_acc.cpu())  # Move results back to CPU for easier handling
        results["test_acc"].append(test_acc.cpu())
        results["train_loss"].append(train_loss.cpu())
        results["test_loss"].append(test_loss.cpu())

        if verbose and (i + 1) % (n_epochs // 10) == 0:
            train_stats = f"Train loss: {train_loss:.3f} Train accuracy: {100 * train_acc:4.1f}%"
            test_stats = f"Test loss: {test_loss:.3f} Test accuracy: {100 * test_acc:.1f}%"
            print(f"{i + 1:4d} {train_stats} {test_stats}")

    return results

In [None]:
def plot(results):
    fig, axes = plt.subplots(nrows=1, ncols=2, figsize=(10, 5), constrained_layout=True)

    ax = axes[0]
    ax.set_title("Loss per Epoch")
    train_loss = results["train_loss"]
    test_loss = results["test_loss"]
    n_epochs = len(train_loss)
    ax.plot(np.arange(n_epochs), train_loss, c="r", label="Train Loss")
    ax.plot(np.arange(n_epochs), test_loss, c="b", label="Test Loss")
    ax.legend(loc="best")
    ymin, ymax = ax.get_ylim()
    ax.set_ylim(ymin, 2 * ymax)

    # Bottom right
    ax = axes[1]
    ax.set_title("Accuracy per Epoch")
    train_acc = results["train_acc"]
    test_acc = results["test_acc"]
    n_epochs = len(train_acc)
    ax.plot(np.arange(n_epochs), train_acc, c="r", label="Train Acc")
    ax.plot(np.arange(n_epochs), test_acc, c="b", label="Test Acc")
    ax.legend(loc="best")
    ax.set_ylim(0, 1.1)

    plt.show()

In [None]:
model = CNN(linear_layer_size=16)
print_model(model)
results = demo(model, mnist, 100, verbose=True, device=device)
plot(results)

In [None]:
class Net(torch.nn.Module):
    def __init__(self, input_dim, activation=torch.tanh):
        super().__init__()

        self.input_dim = input_dim
        self.layer1 = torch.nn.Linear(input_dim, 32)
        self.layer2 = torch.nn.Linear(32, 16)
        self.layer3 = torch.nn.Linear(16, 10)
        self.activation = activation

        for layer in [self.layer1, self.layer2, self.layer3]:
            torch.nn.init.xavier_uniform_(layer.weight)

    def forward(self, x):

        x = x.reshape(-1, self.input_dim)

        x = self.activation(self.layer1(x))
        x = self.activation(self.layer2(x))
        x = self.layer3(x)
        return x

In [None]:
model = Net(64)
print_model(model)
results = demo(model, mnist, 100, verbose=True, device=device)
plot(results)

In [None]:
train_data = datasets.MNIST(
    root="data",
    train=True,
    transform=ToTensor(),
    download=True,
)
test_data = datasets.MNIST(root="data", train=False, transform=ToTensor())
n_train = 1437
n_test = 360

In [None]:
X_train = train_data.data[:n_train].to(dtype=torch.float, device=device)
X_train = X_train.reshape(n_train, 1, 28, 28)
y_train = train_data.targets[:n_train].to(dtype=torch.long, device=device)

X_test = test_data.data[:n_test].to(dtype=torch.float, device=device)
X_test = X_test.reshape(n_test, 1, 28, 28)
y_test = test_data.targets[:n_test].to(dtype=torch.long, device=device)

large_mnist = (X_train, X_test, y_train, y_test)
print("train:", large_mnist[0].shape)
print("test: ", large_mnist[1].shape)

In [None]:
model = CNN(linear_layer_size=576)
print_model(model)
results = demo(model, large_mnist, 100, verbose=True, device=device)
plot(results)

In [None]:
model = Net(784)
print_model(model)
results = demo(model, large_mnist, 100, verbose=True, device=device)
plot(results)