# MNIST from scratch with numpy

In [1]:
# load changed modules automatically
%reload_ext autoreload
%autoreload 2

In [2]:
# load numpy
import numpy as np

# load dataloaders and required layers
from mnist import dataloader
from mnist import layers
from mnist.layers import Softmax, Linear

# load pyplot for displaying images
from matplotlib import pyplot as plt

# show images inline on notebook
%matplotlib inline

# debugging
import pdb

## Load training data

In [3]:
dl = dataloader.DataLoader()
((x_train, y_train), (x_valid, y_valid), _) = dl.load_data()

In [4]:
x_train.shape, y_train.shape, x_valid.shape, y_valid.shape

((50000, 784), (50000,), (10000, 784), (10000,))

## Normalize data

In [5]:
(x_train, y_train), (x_valid, y_valid) = dl.normalize(((x_train, y_train), (x_valid, y_valid)))

## Exploring the data

In [None]:
def show(img):
    plt.imshow(img, cmap="gray")

In [None]:
valid_images = np.reshape(x_valid, (-1,28,28))

In [None]:
show(valid_images[0])

## Start building the model

In [6]:
def shuffle(arr1, arr2):
    random_idxs = np.arange(len(arr1))
    np.random.shuffle(random_idxs)
    return x_train[random_idxs], y_train[random_idxs]

In [23]:
# always reproduce the same weights
#np.random.seed(1)

class Net():
    def __init__(self):
        self.input_layer = Linear(28*28, 10) # linear layer with bias
        self.softmax = Softmax()
    
    def forward(self, x):
        x = self.input_layer.forward(x)
        x = self.softmax.forward(x)
        return x

net = Net()

In [24]:
debug = False

if debug:
    n_epochs = 1
    batch_size = 8
else:
    n_epochs = 50
    batch_size = 512
    
learning_rate = 1e-3
prev_loss = None

for epoch in range(n_epochs):
    
    for i in range(0, len(x_train), batch_size):

        inputs = x_train[i:i+batch_size]
        targets = y_train[i:i+batch_size]
        
        inputs, targets = shuffle(inputs, targets)
            
        if debug:
            print("inputs.shape", inputs.shape)
            print("targets.shape", targets.shape)

        # forward propagation
        y_pred = net.forward(inputs)
        predictions = net.softmax.forward(y_pred)

        if debug:
            print("y_pred.shape:", y_pred.shape)
            print("predictions.shape", predictions.shape)
            
        # calculate cross-entropy loss
        loss = net.softmax.cross_entropy(predictions, targets)
        
        if debug:
            print(epoch, loss)
        
        # backpropagation        
        grad_loss = net.softmax.backward(predictions, targets)
        grad_input_layer, grad_bias = net.input_layer.backward(inputs, grad_loss)
        
        if debug:
            print("grad_input_layer.shape", grad_input_layer.shape)
            print("net.input_layer.weights.shape", net.input_layer.weights.shape)
            print("net.input_layer.bias.shape", net.input_layer.bias.shape)
        
        net.input_layer.weights -= learning_rate * grad_input_layer.T
        net.input_layer.bias -= learning_rate * grad_bias
        
        if debug:
            break
        
    # calculate validation loss for some random indices
    random_idxs = np.random.randint(0, len(x_valid), batch_size)
    y_valid_pred = net.forward(x_valid[random_idxs])
    loss_valid = net.softmax.cross_entropy(y_valid_pred, y_valid[random_idxs])
    
    #calculate accuracy and validation accuracy
    accuracy = np.mean(y_pred.argmax(axis=1) == targets)
    valid_accuracy = np.mean(y_valid_pred.argmax(axis=1) == y_valid[random_idxs])
    
    print(epoch, loss, loss_valid, accuracy, valid_accuracy)

    if prev_loss is None:
        prev_loss = loss
    else:
        if loss > prev_loss:
            print("EPOCH", epoch, "ITS BIGGER")
            #pdb.set_trace()


0 2.076543342103366 1.6804543203751925 0.6101190476190477 0.49609375
1 1.9110294050639973 1.5860954056228453 0.6875 0.578125
2 1.831531643722002 1.4208547872320407 0.6964285714285714 0.650390625
3 1.7929787882997836 1.7427525296464241 0.7172619047619048 0.6171875
4 1.7722957456274895 1.7972089727254121 0.7232142857142857 0.61328125
5 1.76007463343865 1.8872505570209188 0.7232142857142857 0.634765625
6 1.7522785946222765 2.0790246460075528 0.7261904761904762 0.6484375
7 1.747018177700156 2.010945473784499 0.7261904761904762 0.6640625
8 1.7433241831559356 2.4057902377220644 0.7261904761904762 0.64453125
9 1.740653141903805 2.921471892649608 0.7261904761904762 0.59765625
10 1.738676820059311 3.199908242079837 0.7261904761904762 0.623046875
11 1.7371859206723423 3.53180765122415 0.7261904761904762 0.6171875
12 1.73604184339761 3.3357648507637627 0.7261904761904762 0.642578125
13 1.7351502663861553 3.507613658283627 0.7261904761904762 0.6171875
14 1.7344456216911586 3.81612945485708 0.72321