# Binary MNIST classification

In [13]:
import random
from micrograd.engine import Value
import micrograd.nn as nn
import micrograd.functional as F

# using torch to download MNIST dataset
import torch
import torchvision 
from torchvision import transforms

# MLP definition

In [2]:
class MLP(nn.Module):
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = []
        for i in range(len(nouts)):
            self.layers.append(nn.Linear(sz[i], sz[i+1]))
            self.layers.append(nn.Sigmoid())

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x

    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

# Training

In [12]:
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transforms.Compose([
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                           download=True)

In [4]:
# only use 0s and 1s
xs = []
ys = []

for x, y in train_dataset:
    if y == 0 or y == 1:
        xs.append(x)
        ys.append(y)

len(xs), len(ys)

(12665, 12665)

In [5]:
# convert tensors to 1d python lists
xs = list(map(torch.flatten, xs))
xs = list(map(lambda x: x.tolist(), xs))

In [6]:
model = MLP(784, [16, 16, 1])

In [7]:
# SGD: can change number of updates to tradeoff accuracy for time
for k in range(150):
    x = xs[k]
    y = ys[k]

    # forward pass
    ypred = model(x)[0]
    loss = F.binary_cross_entropy(ypred, y)

    # backward pass
    for p in model.parameters():
    p.grad = 0.0
    loss.backward()

    # update
    for p in model.parameters():
    p.data += -0.1 * p.grad

    if k % 10 == 0:
    print(k, loss.data)


0 0.9462545454928128
10 0.81577553791333
20 1.2227966637286534
30 0.8080784827888361
40 0.41931684464144864
50 0.8000838193974728
60 0.3056201762486865
70 0.5029754654018131
80 0.3117249886614138
90 1.0913402552878135
100 0.24426671324323307
110 0.4129608168254022
120 0.7716425367793331
130 0.17946794367326155
140 0.281612965261799


# Testing

In [8]:
test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=transforms.Compose([
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                          download=True)

In [9]:
# only use 0s and 1s
test_xs = []
test_ys = []

for x, y in test_dataset:
    if y == 0 or y == 1:
        test_xs.append(x)
        test_ys.append(y)

len(test_xs), len(test_ys)

(2115, 2115)

In [10]:
# convert tensors to 1d python lists
test_xs = list(map(torch.flatten, test_xs))
test_xs = list(map(lambda x: x.tolist(), test_xs))

In [11]:
correct = 0
total = 0
for x, y in zip(test_xs[:50], test_ys[:50]):
    pred = model(x)[0]
    num_pred = 1 if pred.data >= 0.5 else 0
    correct += (y == num_pred)
    total += 1

print(f"accuracy: {correct / total}")

accuracy: 1.0
