In [8]:
import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

In [2]:
# model architecture
model = nn.Sequential(
  nn.Linear(28*28, 64),
  nn.ReLU(),
  nn.Linear(64, 64),
  nn.ReLU(),
  nn.Linear(64, 10)
)

In [5]:
# learning optimizer (defines how to take steps)
optimizer = optim.SGD(model.parameters(), lr=1e-2)

In [6]:
# loss fn for classification
loss = nn.CrossEntropyLoss()

In [11]:
# data
train_data = datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor())
train, val = random_split(train_data, [55000, 5000])
train_loader = DataLoader(train, batch_size=32)
val_loader = DataLoader(val, batch_size=32)

In [12]:
# training loops
epochs = 5
for epoch in range(epochs):
  losses = []
  for batch in train_loader:
    x, y = batch

    b = x.size(0)
    x = x.view(b, -1)

    # forward
    logits = model(x)

    # objective fn
    J = loss(logits, y)

    # clean gradients
    model.zero_grad()

    # backward
    J.backward()

    # step opposite gradient (minimize loss)
    optimizer.step()
    losses.append(J.item())

    # see updates
  print(f"Epoch {epoch + 1}, training loss: {torch.tensor(losses).mean():.2f}")

  losses = []
  for batch in val_loader:
    x, y = batch

    b = x.size(0)
    x = x.view(b, -1)

    # forward
    with torch.no_grad():
      logits = model(x)

    # objective fn
    J = loss(logits, y)

    losses.append(J.item())
  print(f"Epoch {epoch + 1}, validation loss: {torch.tensor(losses).mean():.2f}")
  

Epoch 1, training loss: 1.25
Epoch 1, validation loss: 0.47
Epoch 2, training loss: 0.41
Epoch 2, validation loss: 0.33
Epoch 3, training loss: 0.33
Epoch 3, validation loss: 0.29
Epoch 4, training loss: 0.29
Epoch 4, validation loss: 0.26
Epoch 5, training loss: 0.26
Epoch 5, validation loss: 0.23
