In [23]:
import torch
from torch import nn
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor
from tqdm import trange

In [24]:
training_data = datasets.MNIST(
    root = 'data',
    train = True, 
    download = True,
    transform = ToTensor()
)

In [25]:
test_data = datasets.MNIST(
    root = 'data',
    train = False,
    download = True,
    transform = ToTensor()
)

In [26]:
print(f'Training data: {training_data}\n')
print(f'Test data: {test_data}')

Training data: Dataset MNIST
    Number of datapoints: 60000
    Root location: data
    Split: Train
    StandardTransform
Transform: ToTensor()

Test data: Dataset MNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: ToTensor()


In [27]:
batch_size = 64
train_dataloader = DataLoader(training_data, batch_size = batch_size)
test_dataloader = DataLoader(test_data, batch_size = batch_size)

In [28]:
for x, y in test_dataloader:
    print(f'Shape of X [N, C, H, W]: {x.shape}')
    print(f'Shape of y: {y.shape}{y.dtype}')
    break

Shape of X [N, C, H, W]: torch.Size([64, 1, 28, 28])
Shape of y: torch.Size([64])torch.int64


In [29]:
device = (
    'cuda'
    if torch.cuda.is_available()
    else 'mps'
    if torch.backends.mps.is_available()
    else 'cpu'
)
print(f'Using {device} device')

Using cuda device


In [30]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28*28, 512),
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10)
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [31]:
model = NeuralNetwork().to(device)
print(model)

NeuralNetwork(
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (linear_relu_stack): Sequential(
    (0): Linear(in_features=784, out_features=512, bias=True)
    (1): ReLU()
    (2): Linear(in_features=512, out_features=512, bias=True)
    (3): ReLU()
    (4): Linear(in_features=512, out_features=10, bias=True)
  )
)


In [32]:
loss_fn = nn.CrossEntropyLoss()

In [33]:
optimizer = torch.optim.SGD(model.parameters(), lr = 1e-3)

In [34]:
n_epochs = 30
for _ in (pbar := trange(n_epochs)):
    for x, y in train_dataloader:
        x, y = x.to(device), y.to(device)
        pred = model(x)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        pbar.set_description(f'Loss = {loss.item():.3f}')

Loss = 0.191: 100%|██████████| 30/30 [04:04<00:00,  8.13s/it]


In [35]:
correct, total = 0, 0
with torch.no_grad():
    for x, y in test_dataloader:
        x, y = x.to(device), y.to(device)
        logits = model(x)
        pred = torch.argmax(logits, axis = 1)
        correct += sum(pred == y).item()
        total += pred.shape[0]
print(f'Accuracy: {100 * correct / total:.2f}%')

Accuracy: 90.78%
