In [None]:
import numpy as np
import torch
from torch import nn, optim
from datasets import load_dataset
from helpers import get_device, train, evaluate

In [2]:
torch.manual_seed(0)
np.random.seed(0)
device = get_device()

In [3]:
dataset = load_dataset("mnist")
X_train = np.array([np.array(image) for image in dataset["train"]["image"]], dtype=np.float32) / 255.0
Y_train = np.array(dataset["train"]["label"])
X_test = np.array([np.array(image) for image in dataset["test"]["image"]], dtype=np.float32) / 255.0
Y_test = np.array(dataset["test"]["label"])

In [4]:
class ConvNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool1 = nn.MaxPool2d(2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.pool2 = nn.MaxPool2d(2)
        self.linear = nn.Linear(256, 10)

    def __call__(self, x):
        x = torch.unsqueeze(x, 1)
        x = self.conv1(x).relu()
        x = self.pool1(x)
        x = self.conv2(x).relu()
        x = self.pool2(x)
        x = x.view(-1, 256)
        x = self.linear(x)
        return x

In [5]:
model = ConvNet().to(device)
model(torch.rand(1, 28, 28, device=device))

tensor([[ 0.0561,  0.1196, -0.0440, -0.1433,  0.0334,  0.0502,  0.0142, -0.1147,
         -0.1023, -0.0106]], device='mps:0', grad_fn=<LinearBackward0>)

In [6]:
lr = 0.005
epochs = 1
batch_size = 32

In [7]:
optimizer = optim.Adam(model.parameters(), lr=lr)
train_steps = len(X_train) // batch_size
test_steps = len(X_test) // batch_size

for epoch in range(epochs):
    train(model, X_train, Y_train, optimizer, train_steps, device=device)

evaluate(model, X_test, Y_test, device=device)

loss 0.11 accuracy 0.98: 100%|██████████| 1875/1875 [00:11<00:00, 161.65it/s]
100%|██████████| 79/79 [00:00<00:00, 524.09it/s]

test set accuracy is 0.9864



