In [None]:
import sys
sys.path.append('..')

import os, requests, gzip, hashlib
import numpy as np
import matplotlib.pyplot as plt

from autograd.engine import Value

In [None]:
def load_mnist():
    class_names = ['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

    x_train = fetch('http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz')[0x10:]
    y_train = fetch('http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz')[8:]
    x_test = fetch('http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz')[0x10:]
    y_test = fetch('http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz')[8:]

    x_train = x_train.astype('float32') / 255.0
    x_test = x_test.astype('float32') / 255.0

    x_train = x_train.reshape(-1, 28, 28, 1)
    x_test = x_test.reshape(-1, 28, 28, 1)

    return (x_train, y_train), (x_test, y_test), class_names

def fetch(url):
    filepath = os.path.join("/tmp", hashlib.md5(url.encode('utf-8')).hexdigest())
    if os.path.isfile(filepath):
        with open(filepath, 'rb') as f:
            data = f.read()
    else:
        with open(filepath, 'wb') as f:
            data = requests.get(url).content
            f.write(data)
    return np.frombuffer(gzip.decompress(data), dtype=np.uint8).copy()

In [None]:
(x_train, y_train), (x_test, y_test), class_names = load_mnist()

print('Training Dataset Shape:', x_train.shape)
print('Number of training images:', x_train.shape[0])
print("Number of training labels:", (len(y_train)))
print('Number of test images:', x_test.shape[0])
print("Number of test labels:", (len(y_test)))

In [None]:
def calculate_loss(X,Y,W):
    loss = -(1 / X.shape[0]) * np.sum(np.sum(Y * np.log(np.exp(np.matmul(X, W)) / np.sum(np.exp(np.matmul(X, W)), axis=1)[:, None]), axis=1))
    return loss

In [None]:
def to_categorical(y, num_classes):
    """ 1-hot encoding """
    y = np.array(y, dtype="int")
    return np.eye(num_classes, dtype='uint8')[y]

In [None]:
train_images = x_train.reshape(60000, 784)
test_images = x_test.reshape(10000, 784)
train_idx = to_categorical(y_train, len(class_names))

In [None]:
Wb = Value(np.random.randn(x_train[0].shape[0] * x_train[0].shape[1], len(class_names)))
Wb.shape()

In [None]:
batch_size = 32
epochs = 20000

for i in range(epochs):
    ri = np.random.permutation(train_images.shape[0])[:batch_size]
    Xb, Yb = Value(train_images[ri]), Value(train_idx[ri])

    y_pred = Xb.matmul(Wb)
    
    probs = y_pred.softmax()
    log_probs = probs.log()

    Zb = Yb * log_probs

    cost = Zb.reduce_sum(axis=1).reduce_sum()  #Cross-entropy
    cost.backward()

    if i % 1000 == 0:
        loss = calculate_loss(train_images, train_idx, Wb.data)
        print(f'Epoch: {i} | Loss: {loss:.5f}')

    Wb.data = Wb.data + 0.01 * Wb.grad
    Wb.grad = 0

In [None]:
def predict(x_in, W):
    pred = x_in.matmul(W)
    return pred.data.argmax()

In [None]:
index = 123
print("Ground truth:", y_test[index])
plt.imshow(x_test[index])

print("Prediction:", predict(Value(test_images[index]), Wb))