<a href="https://colab.research.google.com/github/joe-jachim/pytorch-playground/blob/main/mnist.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
from torch import nn, optim
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

In [None]:
# Define  model
model = nn.Sequential(
    nn.Linear(28 * 28, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(64, 10)
).cuda()

In [None]:
'''
# Define a more flexible model
class ResNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.l1 = nn.Linear(28 * 28, 64)
    self.l2 = nn.Linear(64, 64)
    self.l3 = nn.Linear(64, 10)
    self.do = nn.Dropout(0.1)

  def forward(self, x):
    h1 = nn.functional.relu(self.l1(x))
    h2 = nn.functional.relu(self.l2(h1))
    do = self.do(h2 + h1)
    logits = self.l3(do)
    return logits

  #def backward(self, x)

model = ResNet().cuda()
'''

'\n# Define a more flexible model\nclass ResNet(nn.Module):\n  def __init__(self):\n    super().__init__()\n    self.l1 = nn.Linear(28 * 28, 64)\n    self.l2 = nn.Linear(64, 64)\n    self.l3 = nn.Linear(64, 10)\n    self.do = nn.Dropout(0.1)\n\n  def forward(self, x):\n    h1 = nn.functional.relu(self.l1(x))\n    h2 = nn.functional.relu(self.l2(h1))\n    do = self.do(h2 + h1)\n    logits = self.l3(do)\n    return logits\n\n  #def backward(self, x)\n\nmodel = ResNet().cuda()\n'

In [None]:
# Define optimizer
optimizer = optim.SGD(model.parameters(), lr=1e-2)

In [None]:
# Define my loss
loss = nn.CrossEntropyLoss()

In [None]:
# Train, Val split
train_data = datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor())
train, val = random_split(train_data, [55000, 5000])
train_loader = DataLoader(train, batch_size=32)
val_loader = DataLoader(val, batch_size=32)

In [None]:
# Define my loss
nb_epochs = 5
for epoch in range(nb_epochs):
  losses = list()
  model.train()
  for batch in train_loader:
    x, y = batch
    
    x = x.view(x.size(0), -1).cuda()

    # Forward
    l = model(x)  # l: logits

    # Compute the objective function
    J = loss(l, y.cuda())

    # Cleaning the gradient
    model.zero_grad()

    # Complete partial derivates of J wrt to params
    J.backward()

    # Step in the opposite direction of the gradient
    optimizer.step()

    losses.append(J.item())

  accuracies = list()
  val_losses = list()
  model.eval()
  for batch in val_loader:
    x, y = batch
    
    x = x.view(x.size(0), -1).cuda()

    # Forward
    with torch.no_grad():
      l = model(x)  # l: logits

    # Compute the objective function
    J = loss(l, y.cuda())

    val_losses.append(J.item())
    accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

  print(f'epoch {epoch + 1}, train loss {torch.tensor(losses).mean():.2f}', end=', ')
  print(f'val loss {torch.tensor(val_losses).mean():.2f}', end=', ')
  print(f'val acc {torch.tensor(accuracies).mean():.2f}')

epoch 1, train loss 1.30, val loss 0.51, val acc 0.86
epoch 2, train loss 0.45, val loss 0.36, val acc 0.90
epoch 3, train loss 0.36, val loss 0.31, val acc 0.91
epoch 4, train loss 0.31, val loss 0.28, val acc 0.92
epoch 5, train loss 0.28, val loss 0.26, val acc 0.92
