# MNIST with Pytorch


In [1]:
# load pytorch
import torch
from torch import nn, optim

# dataloader
from mnist import dataloader

# load numpy
import numpy as np

# load pyplot for displaying images
from matplotlib import pyplot as plt

# show images inline on notebook
%matplotlib inline

# for loading the training data
import os
import pickle
import gzip

## Load training data

In [2]:
# load mnist data
dl = dataloader.DataLoader()
((x_train, y_train), (x_valid, y_valid), _) = dl.load_data()

In [3]:
# normalize training and validation sets
(x_train, y_train), (x_valid, y_valid) = dl.normalize(((x_train, y_train), (x_valid, y_valid)))

In [4]:
# turn data to pytorch format
x_train = torch.from_numpy(x_train)
y_train = torch.from_numpy(y_train)
x_valid = torch.from_numpy(x_valid)
y_valid = torch.from_numpy(y_valid)

## Exploring the data

In [None]:
def show(img, title=None):
    plt.imshow(img, cmap="gray")
    if title is not None:
        plt.title(title)

In [None]:
valid_images = np.reshape(x_valid, (-1,28,28))

In [None]:
for i in range(5):
    show(valid_images[i], y_valid[i])
    plt.show()

## Start building the model

In [5]:
def Softmax(x):
    return torch.exp(x)/(torch.exp(x).sum(dim=1)[:,None])

def NLLLoss(y_pred, y):
    out = torch.diag(y_pred[:,y])
    return -torch.mean(out)

def CrossEntropyLoss(y_pred, y):
    out = torch.diag(y_pred[:,y])
    return -torch.mean(torch.log(out))

In [6]:
def get_layer(*dims):
    return nn.Parameter(torch.randn(dims)/dims[0])

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.layer1 = get_layer(28*28, 10)
        self.bias = get_layer(10)
        self.softmax = Softmax
    
    def forward(self, x):
        x = (x @ self.layer1) + self.bias # linear layer
        x = self.softmax(x)
        return x

criterion = CrossEntropyLoss

net = Net()

In [7]:
n_epochs = 3
batch_size = 128
learning_rate = 1e-1

for epoch in range(n_epochs):
    
    for i in range(0, len(x_train), batch_size):

        n = batch_size
        inputs = x_train[i:i+n]
        target = y_train[i:i+n]

        # forward propagation
        y_pred = net(inputs)

        loss = criterion(y_pred, target)

        # backpropagation
        loss.backward()
        
        net.layer1.data -= learning_rate * net.layer1.grad
        net.bias.data -= learning_rate * net.bias.grad
        
        net.layer1.grad.data.zero_()
        net.bias.grad.data.zero_()

    # calculate validation loss with random samples
    random_idxs = torch.LongTensor(batch_size).random_(0, len(x_valid))
    y_valid_pred = net(x_valid[random_idxs])
    loss_valid = criterion(y_valid_pred, y_valid[random_idxs])
    
    # calculate accuracy and validation accuracy
    accuracy = np.mean(y_pred.detach().numpy().argmax(axis=1) == target.detach().numpy())
    valid_accuracy = np.mean(y_valid_pred.detach().numpy().argmax(axis=1) == y_valid[random_idxs].detach().numpy())
    
    print(epoch, loss, loss_valid, accuracy, valid_accuracy)
    

0 tensor(0.2503) tensor(0.3576) 0.9125 0.875
1 tensor(0.2154) tensor(0.2795) 0.925 0.890625
2 tensor(0.1991) tensor(0.1984) 0.95 0.9375
