In [1]:
import numpy as np
from PIL import Image
import random
import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F

In [2]:
def int_of_bytes(bytes):
    return int.from_bytes(bytes, "big")


def open_image(path):
    f = open(path, "rb")
    f.read(4)  # magic number
    num_images = int_of_bytes(f.read(4))
    rows = int_of_bytes(f.read(4))
    columns = int_of_bytes(f.read(4))
    f.close()
    data = np.fromfile(path, dtype="uint8")
    data = data[16:]
    return np.reshape(data, (num_images, rows, columns))


def display_image(np_array):
    im = Image.fromarray(np_array)
    im = im.convert("L")
    display(im)


def open_label(path):
    f = open(path, "rb")
    f.read(4)  # magic number
    num_images = int_of_bytes(f.read(4))
    f.close()
    data = np.fromfile(path, dtype="uint8")
    data = data[8:]
    return np.reshape(data, (num_images))


In [3]:
images_train = open_image("train-images-idx3-ubyte")
labels_train = open_label("train-labels-idx1-ubyte")
images_test = open_image("t10k-images-idx3-ubyte")
labels_test = open_label("t10k-labels-idx1-ubyte")


In [4]:
hidden_size = 200

g = torch.Generator().manual_seed(2147483647)
W1 = torch.randn((784, hidden_size), generator=g) * 0.1
b1 = torch.randn(hidden_size, generator=g) * 0.1
W2 = torch.randn((hidden_size, 10), generator=g) * 0.1
b2 = torch.randn(10, generator=g) * 0.1

In [5]:
parameters = [W1, b1, W2, b2]
for p in parameters:
    p.requires_grad = True

In [6]:
num_train = len(images_train)

xTrain, yTrain = torch.from_numpy(images_train).view((num_train, -1)).float(), torch.from_numpy(labels_train)
yTrain = F.one_hot(yTrain.to(torch.int64))

In [7]:
x = xTrain[0]
y = yTrain[0]
x.shape

torch.Size([784])

In [8]:
h = (x @ W1 + b1).sigmoid()
ypredict = (h @ W2 + b2).sigmoid()
e1 = ypredict - y
loss = (e1*e1).mean()/2
loss

tensor(0.1805, grad_fn=<DivBackward0>)

In [9]:
batch_size = 32

g = torch.Generator().manual_seed(2147483647)
for i in range(100000):
    ix = torch.randint(0, xTrain.shape[0], (batch_size,), generator=g)
    xBatch, yBatch = xTrain[ix], yTrain[ix]
    # forward pass
    h = (xBatch @ W1 + b1).sigmoid()
    ypredict = (h @ W2 + b2).sigmoid()
    e1 = ypredict - yBatch
    loss = (e1*e1).mean()
    # backward pass
    for p in parameters:
        p.grad = None
    loss.backward()
    lr = 0.1 if i > 10000 else 0.01
    for p in parameters:
        p.data += -lr * p.grad
    if i % 10000 == 0:
        print(loss.item())
    

0.2926088869571686
0.05402306839823723
0.021857889369130135
0.011928318068385124
0.011884871870279312
0.010624482296407223
0.00790864322334528
0.005835279356688261
0.006550537887960672
0.006806498859077692


In [12]:
g = torch.Generator().manual_seed(2147483647)
ix = torch.randint(0, xTrain.shape[0], (batch_size,), generator=g)
h = (xBatch @ W1 + b1).sigmoid()
ypredict = (h @ W2 + b2).sigmoid()
e1 = ypredict - yBatch
loss = (e1*e1).mean()/2
loss

tensor(0.0030, grad_fn=<DivBackward0>)

In [13]:
yBatch.shape

torch.Size([32, 10])

In [14]:
num_test = len(images_test)

xTrain, yTest = torch.from_numpy(images_test).view((num_test, -1)).float(), torch.from_numpy(labels_test)
total = len(yTest)
correct = 0
for x, y in zip(xTrain, yTest):
    h = (x @ W1 + b1).sigmoid()
    ypredict = (h @ W2 + b2).sigmoid()
    argmax = torch.argmax(ypredict)
    if argmax == y.item():
        correct += 1
print(f"Accuracy = {correct}/{total}")

Accuracy = 9513/10000
