In [1]:
from engine import Vector
import nn
import functional as F

In [2]:
# use torch to get mnist data
import torch
import torchvision 
from torchvision import transforms

MLP definition

In [3]:
class MLP(nn.Module):
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = []
        for i in range(len(nouts)):
            self.layers.append(nn.Linear(sz[i], sz[i+1]))
            if i != len(nouts) - 1:
                self.layers.append(nn.Sigmoid())
    
    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

Training

In [4]:
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transforms.Compose([
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                           download=True)

In [5]:
# put training data in lists for preprocessing
xs = []
ys = []

for x, y in train_dataset:
    xs.append(x)
    ys.append(y)

len(xs), len(ys)

(60000, 60000)

In [6]:
# convert tensors to Vectors
xs = list(map(torch.flatten, xs))
xs = list(map(lambda x: x.tolist(), xs))
xs = list(map(Vector, xs))

In [7]:
model = MLP(784, [16, 16, 10])

# training loop
for it, (x, y) in enumerate(zip(xs, ys)):
    # forward pass
    ypred = model(x)
    loss = F.cross_entropy(ypred, y)
    
    # backward pass
    for p in model.parameters():
        for i in range(len(p.grad)):
            p.grad[i] = 0.0
    loss.backward()
    
    # update
    for p in model.parameters():
        for i in range(len(p.grad)):
            p.data[i] += -0.1 * p.grad[i]
    
    if it % 1000 == 0:
        print(f"it: {it}, loss: {loss}")

it: 0, loss: Vector(data=[3.7758259746949765])
it: 1000, loss: Vector(data=[0.7096865254091745])
it: 2000, loss: Vector(data=[2.1755073925857094])
it: 3000, loss: Vector(data=[2.937726925792173])
it: 4000, loss: Vector(data=[0.8237005189188523])
it: 5000, loss: Vector(data=[1.2924954477194541])
it: 6000, loss: Vector(data=[0.02849247329047058])
it: 7000, loss: Vector(data=[0.4270636586225201])
it: 8000, loss: Vector(data=[0.007921347504474081])
it: 9000, loss: Vector(data=[0.19259613528648062])
it: 10000, loss: Vector(data=[0.1986309883710945])
it: 11000, loss: Vector(data=[0.5966477250787485])
it: 12000, loss: Vector(data=[0.7467733562488219])
it: 13000, loss: Vector(data=[0.15635395665137808])
it: 14000, loss: Vector(data=[0.0008652280179537767])
it: 15000, loss: Vector(data=[0.11089688623004268])
it: 16000, loss: Vector(data=[0.17589293352049917])
it: 17000, loss: Vector(data=[0.06367646974913856])
it: 18000, loss: Vector(data=[0.1591280027737671])
it: 19000, loss: Vector(data=[0.15

Testing

In [8]:
test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=transforms.Compose([
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                          download=True)

In [9]:
# put training data in lists for preprocessing
test_xs = []
test_ys = []

for x, y in test_dataset:
    test_xs.append(x)
    test_ys.append(y)

len(test_xs), len(test_ys)

(10000, 10000)

In [10]:
# convert tensors to Vectors
test_xs = list(map(torch.flatten, test_xs))
test_xs = list(map(lambda x: x.tolist(), test_xs))
test_xs = list(map(Vector, test_xs))

In [11]:
def argmax(input: Vector):
    res = None
    max_val = float('-inf')
    for i, val in enumerate(input.data):
        if val >= max_val:
            res = i
            max_val = val
    return res

In [12]:
# evaluate accuracy on test set
correct = 0
total = 0
for x, y in zip(test_xs, test_ys):
    logits = model(x)
    pred = argmax(logits)
    correct += (y == pred)
    total += 1

print(f"accuracy: {correct / total}")

accuracy: 0.8571
