# MNIST with Pytorch


In [1]:
# load pytorch
import torch
from torch import nn, optim
from torch.distributions.binomial import Binomial

# dataloader
from mnist import dataloader

# load numpy
import numpy as np

# load pyplot for displaying images
from matplotlib import pyplot as plt

# show images inline on notebook
%matplotlib inline

# for loading the training data
import os
import pickle
import gzip

## Load training data

In [2]:
# load mnist data
dl = dataloader.DataLoader()
((x_train, y_train), (x_valid, y_valid), _) = dl.load_data()

In [3]:
# normalize training and validation sets
(x_train, y_train), (x_valid, y_valid) = dl.normalize(((x_train, y_train), (x_valid, y_valid)))

In [4]:
# turn data to pytorch format
x_train = torch.from_numpy(x_train)
y_train = torch.from_numpy(y_train)
x_valid = torch.from_numpy(x_valid)
y_valid = torch.from_numpy(y_valid)

## Exploring the data

In [None]:
def show(img, title=None):
    plt.imshow(img, cmap="gray")
    if title is not None:
        plt.title(title)

In [None]:
valid_images = np.reshape(x_valid, (-1,28,28))

In [None]:
for i in range(5):
    show(valid_images[i], y_valid[i])
    plt.show()

## Start building the model

In [5]:
def Softmax(x):
    return torch.exp(x)/(torch.exp(x).sum(dim=1)[:,None])

def NLLLoss(y_pred, y):
    out = torch.diag(y_pred[:,y])
    return -torch.mean(out)

def CrossEntropyLoss(y_pred, y):
    out = torch.diag(y_pred[:,y])
    return -torch.mean(torch.log(out))

def CrossEntropLossNew(y_pred, y):
    k = inputs.shape[0]
    logprobs = -np.log(y_pred[range(k), y])
    loss = np.sum(logprobs) / k
    return loss

def ReLU(x):
    out = x[x > 0]
    return out

# def Dropout(x, p=0.5):
#     mask = Binomial.sample(sample_shape=x.)
#     return x * mask


In [8]:
torch.manual_seed(1)

def get_layer(*dims):
    return nn.Parameter(torch.randn(dims)/dims[0])

class Net(nn.Module):
    def __init__(self):
        super().__init__()
        self.n_hidden1 = 256
        
        self.dropout1 = nn.Dropout(0.1)
        self.layer1 = get_layer(28*28, self.n_hidden1)
        self.bias1 = get_layer(self.n_hidden1)

        self.dropout2 = nn.Dropout(0.5)
        self.layer2 = get_layer(self.n_hidden1, 10)
        self.bias2 = get_layer(10)

        self.softmax = Softmax
        #self.relu = ReLU
        self.relu = nn.ReLU()
    
    def forward(self, x):
        x = self.dropout1(x)
        x = (x @ self.layer1) + self.bias1 # linear layer
        x = self.relu(x)
        x = self.dropout2(x)
        x = (x @ self.layer2) + self.bias2 # hidden layer
        x = self.softmax(x)
        return x

criterion = CrossEntropyLoss

net = Net()

In [9]:
n_epochs = 20
batch_size = 256
learning_rate = 1e-1

for epoch in range(n_epochs):
    
    for i in range(0, len(x_train), batch_size):

        n = batch_size
        inputs = x_train[i:i+n]
        target = y_train[i:i+n]

        # forward propagation
        y_pred = net(inputs)

        loss = criterion(y_pred, target)

        # backpropagation
        loss.backward()
        
        net.layer1.data -= learning_rate * net.layer1.grad
        net.bias1.data -= learning_rate * net.bias1.grad

        net.layer2.data -= learning_rate * net.layer2.grad
        net.bias2.data -= learning_rate * net.bias2.grad
        
        net.layer1.grad.data.zero_()
        net.layer2.grad.data.zero_()
        net.bias1.grad.data.zero_()
        net.bias2.grad.data.zero_()

    # calculate validation loss with random samples
    net = net.eval()
    random_idxs = torch.LongTensor(batch_size).random_(0, len(x_valid))
    y_valid_pred = net(x_valid[random_idxs])
    loss_valid = criterion(y_valid_pred, y_valid[random_idxs])
    net = net.train()
    
    # calculate accuracy and validation accuracy
    accuracy = np.mean(y_pred.detach().numpy().argmax(axis=1) == target.detach().numpy())
    valid_accuracy = np.mean(y_valid_pred.detach().numpy().argmax(axis=1) == y_valid[random_idxs].detach().numpy())
    
    print(epoch, loss, loss_valid, accuracy, valid_accuracy)
    

0 tensor(0.4166) tensor(0.2757) 0.9 0.91796875
1 tensor(0.2153) tensor(0.1834) 0.9625 0.9453125
2 tensor(0.1958) tensor(0.1862) 0.95 0.93359375
3 tensor(0.1724) tensor(0.1251) 0.9375 0.96875
4 tensor(0.1421) tensor(0.1475) 0.9875 0.95703125
5 tensor(0.1576) tensor(0.1629) 0.9625 0.953125
6 tensor(0.1013) tensor(0.1630) 0.9625 0.96875
7 tensor(1.00000e-02 *
       7.6547) tensor(0.1347) 1.0 0.9609375
8 tensor(0.1010) tensor(0.1034) 0.9625 0.96875
9 tensor(1.00000e-02 *
       6.9674) tensor(1.00000e-02 *
       8.4142) 0.9875 0.96875
10 tensor(1.00000e-02 *
       7.7720) tensor(0.1411) 0.9875 0.97265625
11 tensor(1.00000e-02 *
       6.6308) tensor(0.1641) 0.9875 0.96484375
12 tensor(1.00000e-02 *
       7.5929) tensor(1.00000e-02 *
       7.7253) 1.0 0.96484375
13 tensor(1.00000e-02 *
       6.5709) tensor(1.00000e-02 *
       6.6783) 0.9875 0.984375
14 tensor(1.00000e-02 *
       6.1779) tensor(1.00000e-02 *
       9.1033) 0.9875 0.98046875
15 tensor(1.00000e-02 *
       3.5257) tens

In [None]:
# get some random examples from validation data,
# compare predictions with actual values
n_samples = 10
valid_preds = net(x_valid)
random_idxs = torch.LongTensor(n_samples).random_(0, len(x_valid))
preds = np.argmax(valid_preds.detach().numpy(), axis=1)[random_idxs]
correct = y_valid.detach().numpy()[random_idxs]
print("random_idxs", random_idxs)
print("preds  ", preds)
print("correct", correct)
correct_preds = preds != correct
errors = np.sum(correct_preds)
print("errors:", errors)

In [None]:
test_images = np.reshape(x_test, (-1,28,28))
show(test_images[239])

In [None]:
# complete validation set accuracy
valid_preds = net(x_valid)
all_idxs = torch.LongTensor(len(x_valid)).random_(0, len(x_valid))
accuracy = np.mean(valid_preds.detach().numpy().argmax(axis=1) == y_valid.detach().numpy())
accuracy

In [None]:
# complete _test_ set accuracy
# (_, _, (x_test, y_test)) = dl.load_data()
# x_test = torch.from_numpy(x_test)
# y_test = torch.from_numpy(y_test)

# test_preds = net(x_test)

# all_idxs = torch.LongTensor(len(x_test)).random_(0, len(x_valid))
# accuracy = np.mean(test_preds.detach().numpy().argmax(axis=1) == y_test.detach().numpy())
# accuracy