In [None]:
import torch
from torch.nn import functional as F
from torch import nn

from torch.utils.data import DataLoader, random_split
from torchvision.datasets import MNIST
import os
from torchvision import datasets, transforms
from torch.optim import Adam

import utils as utils

In [None]:
class PytorchMNIST(nn.Module):

    def __init__(self):
        super(PytorchMNIST, self).__init__()

        # mnist images are (1, 28, 28) (channels, width, height)
        self.layer_1 = torch.nn.Linear(28 * 28, 128)
        self.layer_2 = torch.nn.Linear(128, 256)
        self.layer_3 = torch.nn.Linear(256, 10)

    def forward(self, x):
        batch_size, channels, width, height = x.size()
        # (b, 1, 28, 28) -> (b, 1*28*28)
        x = x.view(batch_size, -1)
        
        # layer 1 (b, 1*28*28) -> (b, 128)
        x = self.layer_1(x)
        x = torch.relu(x)

        # layer 2 (b, 128) -> (b, 256)
        x = self.layer_2(x)
        x = torch.relu(x)

        # layer 3 (b, 256) -> (b, 10)
        x = self.layer_3(x)

        # probability distribution over labels
        x = torch.log_softmax(x, dim=1)

        return x

In [None]:
# Download the data, prepare the train/val/test splits

# transforms for images
transform=transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.1307,), (0.3081,))])
# prepare transforms standard to MNIST
mnist_train = MNIST(os.getcwd(), train=True, download=True, transform=transform)
mnist_test = MNIST(os.getcwd(), train=False, download=True, transform=transform)

mnist_train, mnist_val = random_split(mnist_train, [55000, 5000])

train_dl = DataLoader(mnist_train, batch_size=64)
val_dl = DataLoader(mnist_val, batch_size=64)
test_dl = DataLoader(mnist_test, batch_size=64)

In [None]:
# Display the data
classes = {}
for i in range(10):
    classes[i] = str(i)
    
utils.display_grid_data(train_dl, classes, ncols=8)

In [None]:
# Define the train function
def train(log_interval, model, device, train_loader, optimizer, epoch):
    _ = model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = F.nll_loss(output, target)
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print('Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.format(
                epoch, batch_idx * len(data), len(train_loader.dataset),
                100. * batch_idx / len(train_loader), loss.item()))

In [None]:
# Define validation function
def val(model, device, val_loader):
    _ = model.eval()
    val_loss = 0
    for batch, labels in val_loader:
        batch, labels = batch.to(device), labels.to(device)
        logits = model(batch)
        val_loss += F.nll_loss(logits, labels, reduction='sum').item()
    
    val_loss /= len(val_loader.dataset)
    print(f'Val loss: {val_loss}')

In [None]:
device = 'cpu'
if torch.cuda.is_available():
    device = 'cuda'

print(f'Using device: {device}')
    
model = PytorchMNIST().to(device)
x = torch.Tensor(1, 1, 28, 28).to(device)
out = model(x)

# define the optimizer and the device
optimizer = Adam(model.parameters(), lr=1e-3)

In [None]:
num_epochs = 5
for epoch in range(1, num_epochs + 1):
    train(100, model, device, train_dl, optimizer, epoch)
    val(model, device, val_dl)

In [None]:
classes = {}
for i in range(10):
    classes[i] = str(i)
    
model = model.to('cpu')

labels, predictions = utils.model_predictions(test_dl, model)
df, acc = utils.measure_accuracy(labels, predictions, classes=classes)

In [None]:
df

In [None]:
acc