A rough copy of https://blog.paperspace.com/writing-lenet5-from-scratch-in-python/

In [1]:
import numpy as np
import torch
from torch import nn, optim
from datasets import load_dataset
from helpers import get_device, train, evaluate

In [2]:
torch.manual_seed(0)
np.random.seed(0)
device = get_device()

In [3]:
dataset = load_dataset("mnist")
X_train = np.array([np.array(image) for image in dataset["train"]["image"]], dtype=np.float32) / 255.0
Y_train = np.array(dataset["train"]["label"])
X_test = np.array([np.array(image) for image in dataset["test"]["image"]], dtype=np.float32) / 255.0
Y_test = np.array(dataset["test"]["label"])

Using the latest cached version of the dataset since mnist couldn't be found on the Hugging Face Hub


Found the latest cached dataset configuration 'mnist' at /Users/jacky/.cache/huggingface/datasets/mnist/mnist/0.0.0/77f3279092a1c1579b2250db8eafed0ad422088c (last modified on Wed Jan  1 12:48:49 2025).


In [4]:
class LeNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.bn1 = nn.BatchNorm2d(6)
        self.max_pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.bn2 = nn.BatchNorm2d(16)
        self.max_pool2 = nn.MaxPool2d(2)
        self.l1 = nn.Linear(256, 128)
        self.l2 = nn.Linear(128, 64)
        self.l3 = nn.Linear(64, 10)

    def __call__(self, x):
        x = torch.unsqueeze(x, 1)
        x = self.bn1(self.conv1(x))
        x = self.max_pool1(x).relu()
        x = self.bn2(self.conv2(x))
        x = self.max_pool2(x).relu()
        x = x.view(-1, 256)
        x = self.l1(x).relu()
        x = self.l2(x).relu()
        x = self.l3(x)
        return x

In [5]:
model = LeNet().to(device)
model(torch.rand(1, 28, 28, device=device))

tensor([[ 0.0708, -0.1283, -0.0492,  0.0211,  0.0976, -0.0847,  0.0503,  0.0386,
         -0.0595, -0.1223]], device='mps:0', grad_fn=<LinearBackward0>)

In [6]:
lr = 0.004
epochs = 2
batch_size = 32

In [7]:
optimizer = optim.Adam(model.parameters(), lr=lr)
train_steps = len(X_train) // batch_size
test_steps = len(X_test) // batch_size

for epoch in range(epochs):
    train(model, X_train, Y_train, optimizer, train_steps, device=device)

evaluate(model, X_test, Y_test, device=device)

loss 0.11 accuracy 0.98: 100%|██████████| 1875/1875 [00:19<00:00, 94.63it/s] 
loss 0.01 accuracy 0.99: 100%|██████████| 1875/1875 [00:19<00:00, 95.37it/s] 
100%|██████████| 79/79 [00:00<00:00, 439.65it/s]

test set accuracy is 0.9902



