In [1]:
from micrograd.engine import Tensor
import micrograd.nn as nn
import micrograd.functional as F
from micrograd.optim import SGD
import numpy as np
from tqdm import tqdm

In [10]:
# use torchvision to get mnist data
import torch
from torch.utils.data import DataLoader
import torchvision 
from torchvision import transforms

MLP definition

In [3]:
class MLP(nn.Module):
    def __init__(self, nin, nouts):
        sz = [nin] + nouts
        self.layers = []
        for i in range(len(nouts)):
            self.layers.append(nn.Linear(sz[i], sz[i+1]))
            if i != len(nouts) - 1:
                self.layers.append(nn.Sigmoid())
    
    def __repr__(self):
        return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"

    def __call__(self, x):
        for layer in self.layers:
            x = layer(x)
        return x
    
    def parameters(self):
        return [p for layer in self.layers for p in layer.parameters()]

Training

In [4]:
train_dataset = torchvision.datasets.MNIST(root='./data',
                                           train=True,
                                           transform=transforms.Compose([
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                           download=True)

In [5]:
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)

In [6]:
model = MLP(784, [16, 16, 10])
optimizer = SGD(model.parameters(), lr=0.1)

# training loop
max_epochs = 1
for epoch in range(max_epochs):
    pbar = tqdm(enumerate(train_loader), total=len(train_loader))
    for it, (x, y) in pbar:
        x = Tensor(x.reshape(-1, 28*28).numpy())
        y = y.numpy()
        # forward pass
        ypred = model(x)
        loss = F.batched_cross_entropy(ypred, y)
        
        # backward pass
        optimizer.zero_grad()
        loss.backward()
        
        # update
        optimizer.step()
        
        pbar.set_description(f"epoch: {epoch}, it: {it}, loss: {loss.data.item():.5f}")

epoch: 0, it: 1874, loss: 0.43321: 100%|████| 1875/1875 [00:24<00:00, 77.65it/s]


Testing

In [7]:
test_dataset = torchvision.datasets.MNIST(root='./data',
                                          train=False,
                                          transform=transforms.Compose([
                                                  transforms.ToTensor(),
                                                  transforms.Normalize(mean = (0.1307,), std = (0.3081,))]),
                                          download=True)

In [8]:
test_loader = DataLoader(test_dataset, batch_size=128)

In [9]:
# evaluate accuracy on test set
correct = 0
total = 0
for x, y in test_loader:
    x = Tensor(x.reshape(-1, 28*28).numpy())
    y = y.numpy()
    logits = model(x)
    pred = np.argmax(logits.data, axis=-1)
    correct += (y == pred).sum()
    total += x.data.shape[0]
    

print(f"accuracy: {correct / total}")

accuracy: 0.8797
