# MNIST from scratch with numpy

In [1]:
# load changed modules automatically
%reload_ext autoreload
%autoreload 2

In [2]:
# load numpy
import numpy as np

# load dataloaders and required layers
from mnist import dataloader
from mnist.layers import Softmax, Linear, Dropout, ReLU
from mnist.losses import CrossEntropy

# load pyplot for displaying images
from matplotlib import pyplot as plt

# show images inline on notebook
%matplotlib inline

# debugging
import pdb

## Load training data

In [3]:
dl = dataloader.DataLoader()
((x_train, y_train), (x_valid, y_valid), (x_test, (y_test))) = dl.load_data()

In [4]:
x_train.shape, y_train.shape, x_valid.shape, y_valid.shape

((50000, 784), (50000,), (10000, 784), (10000,))

## Normalize data

In [5]:
(x_train, y_train), (x_valid, y_valid) = dl.normalize(((x_train, y_train), (x_valid, y_valid)))

## Exploring the data

In [None]:
def show(img):
    plt.imshow(img, cmap="gray")

In [None]:
valid_images = np.reshape(x_valid, (-1,28,28))

In [None]:
show(valid_images[0])

## Start building the model

In [6]:
def shuffle(arr1, arr2):
    random_idxs = np.arange(len(arr1))
    np.random.shuffle(random_idxs)
    return x_train[random_idxs], y_train[random_idxs]

In [9]:
# always reproduce the same weights
np.random.seed(1)

class Net(object):
    def __init__(self):
        self.train = True
        self.input_layer = Linear(28*28, 10) # linear layer with bias
        self.softmax = Softmax()
        self.dropout = Dropout(0.7)
        self.cross_entropy = CrossEntropy()
    
    def forward(self, x):
        if self.train:
            x = self.dropout.forward(x)

        x = self.input_layer.forward(x)
        x = self.softmax.forward(x)
        return x
    
    def backward(self, targets):
        grad_loss = net.softmax.backward(targets)
        _, grad_input_layer, grad_bias = net.input_layer.backward(grad_loss)
        return grad_input_layer, grad_bias
    
    def loss(self, y_pred, y):
        return self.cross_entropy.loss(y_pred, y)

net = Net()

In [None]:
debug = False

if debug:
    n_epochs = 1
    batch_size = 3
else:
    n_epochs = 50
    batch_size = 512
    
learning_rate = 1e-2

for epoch in range(n_epochs):
    
    for i in range(0, len(x_train), batch_size):

        inputs = x_train[i:i+batch_size]
        targets = y_train[i:i+batch_size]
        
        inputs, targets = shuffle(inputs, targets)

        if debug:
            print("inputs.shape", inputs.shape)
            print("targets.shape", targets.shape)

        # forward propagation
        y_pred = net.forward(inputs)
        predictions = y_pred.copy()

        if debug:
            print("y_pred.shape:", y_pred.shape)
            print("predictions.shape", predictions.shape)
            
        # calculate cross-entropy loss
        loss = net.loss(predictions, targets)
        
        if debug:
            print(epoch, loss)
        
        # backpropagation
        grad_input_layer, grad_bias = net.backward(targets)
        
        if debug:
            print("grad_input_layer.shape", grad_input_layer.shape)
            print("net.input_layer.weights.shape", net.input_layer.weights.shape)
            print("net.input_layer.bias.shape", net.input_layer.bias.shape)
        
        net.input_layer.weights -= learning_rate * grad_input_layer
        net.input_layer.bias -= learning_rate * grad_bias
        
        if debug:
            break
        
    # calculate validation loss for some random indices
    net.train = False
    random_idxs = np.random.randint(0, len(x_valid), batch_size)
    y_valid_pred = net.forward(x_valid[random_idxs])
    loss_valid = net.loss(y_valid_pred, y_valid[random_idxs])
    net.train = True
    
    #calculate accuracy and validation accuracy
    accuracy = np.mean(y_pred.argmax(axis=1) == targets)
    valid_accuracy = np.mean(y_valid_pred.argmax(axis=1) == y_valid[random_idxs])
    
    print(epoch, loss, loss_valid, accuracy, valid_accuracy)


0 0.5394824091541721 0.6910538516122477 0.8720238095238095 0.814453125
1 0.4174654855262794 0.5461995047442303 0.8958333333333334 0.822265625
2 0.38805430993767315 0.5323003747201825 0.8869047619047619 0.8203125
3 0.36190649005957926 0.41800528465176945 0.9017857142857143 0.88671875
4 0.33190516304940043 0.47453546980488276 0.9136904761904762 0.865234375
5 0.3527439663648609 0.41713942475530386 0.8869047619047619 0.8828125
6 0.3077765987682525 0.4526116126184475 0.9315476190476191 0.876953125
7 0.3058923074747741 0.5035840627165902 0.9077380952380952 0.84375
8 0.29052231244484644 0.5003601816389651 0.9017857142857143 0.849609375
9 0.25314776921203763 0.5094530495957906 0.9375 0.83984375
10 0.25548083162702695 0.4219635189079232 0.9255952380952381 0.857421875
11 0.2552913826929397 0.4545912315986147 0.9255952380952381 0.84375
12 0.23288145100753124 0.5024954138355286 0.9375 0.84765625
13 0.24670659362628983 0.5159597887466021 0.9315476190476191 0.8515625
14 0.22228903693179558 0.4052670

## Check results

In [None]:
# get some random examples from validation data,
# compare predictions with actual values
valid_preds = net.forward(x_valid)
random_idxs = np.random.randint(0, len(x_valid), 10)
np.argmax(valid_preds, axis=1)[random_idxs], y_valid[random_idxs]

In [None]:
# display a random image from validation data with
# prediction and correct value
valid_images = np.reshape(x_valid, (-1,28,28))
valid_preds = net.forward(x_valid)
random_idx = np.random.randint(0, len(x_valid))
prediction = np.argmax(valid_preds, axis=1)[random_idx]
correct = y_valid[random_idx]
print("prediction:", prediction, "correct:", correct)
show(valid_images[random_idx])