In [None]:
import numpy as np
from tqdm import trange
import gzip
from pathlib import Path
import urllib.request
import shutil

In [None]:
def parse(file):
    if not Path(file).is_file():
        url = "http://yann.lecun.com/exdb/mnist/" + file.name
        with urllib.request.urlopen(url) as response, open(file, 'wb') as out_file:
            shutil.copyfileobj(response, out_file)

    return np.frombuffer(gzip.open(file).read(), dtype=np.uint8).copy()


def fetch_mnist():
    dirname = Path(__file__).parent.resolve()
    X_train = parse(dirname / "mnist/train-images-idx3-ubyte.gz")[
        0x10:].reshape((-1, 28*28)).astype(np.float32)
    Y_train = parse(dirname / "mnist/train-labels-idx1-ubyte.gz")[8:]
    X_test = parse(dirname / "mnist/t10k-images-idx3-ubyte.gz")[
        0x10:].reshape((-1, 28*28)).astype(np.float32)
    Y_test = parse(dirname / "mnist/t10k-labels-idx1-ubyte.gz")[8:]
    print(X_train.shape, Y_train.shape, X_test.shape, Y_test.shape)

    return X_train, Y_train, X_test, Y_test


def one_hot_encoding(Y):
    n = len(Y)
    Y_one_hot = np.zeros((n, 10))
    Y_one_hot[np.arange(n), Y] = 1
    return Y_one_hot


def softmax(Z):
    Z -= np.max(Z, axis=1, keepdims=True)
    e_Z = np.exp(Z)
    A = e_Z / e_Z.sum(axis=1, keepdims=True)
    return A


def cross_entropy_loss(Y, Y_hat):
    epsilon = 1e-10
    return -np.sum(Y * np.log(Y_hat + epsilon)) / len(Y)


def relu(Z):
    return np.maximum(Z, 0)

In [None]:
X_train, Y_train, X_test, Y_test = fetch_mnist()

In [None]:
class Net:
    def __init__(self, input_size, hidden_size, output_size):
        self.W1 = np.random.randn(input_size, hidden_size)
        self.b1 = np.zeros((1, hidden_size))
        self.W2 = np.random.randn(hidden_size, output_size)
        self.b2 = np.zeros((1, output_size))

    def forward(self, X):
        Z1 = X.dot(self.W1) + self.b1
        A1 = relu(Z1)
        Z2 = A1.dot(self.W2) + self.b2
        A2 = softmax(Z2)
        return A2

    def compute_loss(self, X, Y):
        Y_hat = self.forward(X)
        loss = cross_entropy_loss(Y, Y_hat)
        return loss

    def predict(self, X):
        Y_hat = self.forward(X)
        return np.argmax(Y_hat, axis=1)

    def compute_accuracy(self, X, Y):
        predictions = self.predict(X)
        correct = np.sum(predictions == np.argmax(Y, axis=1))
        accuracy = correct / len(Y)
        return accuracy

    def backprop(self, X, Y, learning_rate=0.01):
        Z1 = X.dot(self.W1) + self.b1
        A1 = relu(Z1)
        Z2 = A1.dot(self.W2) + self.b2
        A2 = softmax(Z2)

        m = len(X)
        E2 = (A2 - Y) / m
        dW2 = np.dot(A1.T, E2)
        db2 = np.sum(E2, axis=0, keepdims=True)
        E1 = np.dot(E2, self.W2.T)
        E1[Z1 <= 0] = 0

        X_reshaped = X.reshape(1, -1)
        dW1 = np.dot(X_reshaped.T, E1)
        db1 = np.sum(E1, axis=0, keepdims=True)

        self.W1 -= learning_rate * dW1
        self.b1 -= learning_rate * db1
        self.W2 -= learning_rate * dW2
        self.b2 -= learning_rate * db2

In [None]:
net = Net(784, 100, 10)


for epoch in (t := trange(10)):
    running_loss = 0.0
    total_accuracy = 0.0

    for i, data in enumerate(X_train):
        X, Y = data, Y_train[i]
        Y = one_hot_encoding([Y])
        loss = net.compute_loss(X, Y)
        running_loss += loss
        net.backprop(X, Y, learning_rate=0.001)
        accuracy = net.compute_accuracy(X, Y)
        total_accuracy += accuracy

    average_accuracy = total_accuracy / len(X_train)

    t.set_description(
        f'epoch: {epoch+1}, loss: {running_loss/len(X_train):.3f}, accuracy: {average_accuracy:.3f}')

In [None]:
correct = 0
total = 0

for data in X_train:
    X, Y = data, Y_train[i]
    Y = one_hot_encoding([Y])
    predictions = net.predict(X)
    correct += np.sum(predictions == np.argmax(Y, axis=1))
    total += len(Y)

print('accuracy: %.3f' % (correct/total))