<a href="https://colab.research.google.com/github/ellamcho/NB240/blob/main/MNIST/20250204_MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Tutorial from https://www.youtube.com/watch?v=OMDn66kM9Qc

In [1]:
# Load packages
import torch
from torch import nn
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

In [13]:
# Define the simple model
model = nn.Sequential(
    nn.Linear(28 * 28, 64),
    nn.ReLU(),
    nn.Linear(64, 64),
    nn.ReLU(),
    nn.Dropout(0.1), # for overfitting
    nn.Linear(64, 10)
).cuda()

In [14]:
# Define a more flexible model

class ResNet(nn.Module):
  def __init__(self):
      super().__init__()
      self.l1 = nn.Linear(28 * 28, 64)
      self.l2 = nn.Linear(64, 64)
      self.l3 = nn.Linear(64,10)
      self.do = nn.Dropout(0.1)
  def forward(self, x):
    h1 = nn.functional.relu(self.l1(x)) # h = hidden layers
    h2 = nn.functional.relu(self.l2(h1))
    do = self.do(h2 + h1) # if h2 is not necessary, the network can go off of h1; do = dropout; this is the "residual connection"
    logits = self.l3(do)
    return logits

model = ResNet().cuda()


In [15]:
# Define the optimizer
params = model.parameters
optimizer = optim.SGD(params(),lr = 1e-2)

In [16]:
# Define loss
loss = nn.CrossEntropyLoss() # updates params

In [5]:
# Training sets

train_data = datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor())
train, val = random_split(train_data, [55000, 5000]) # splits data randomly for training and then validation ("val")
train_loader = DataLoader(train, batch_size =32)
val_loader = DataLoader(val, batch_size = 32)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 11.6MB/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 358kB/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 3.22MB/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 404: Not Found

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 4.94MB/s]

Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw






In [17]:
# Training and validation loops
num_epochs = 5
for epoch in range(num_epochs):
  losses =  list()
  accuracies = list()
  model.train() # because dropout is in use
  for batch in train_loader:
    x, y = batch

    # x: b x 1 x 28 x 28
    b = x.size(0)
    x = x.view(b, -1).cuda()

  ##  5 STEPS FOR SUPERVISED TRAINING ##

    # 1: Forward (l : logits)
    l = model(x)

    # 2: Compute the objective function
    J = loss(l, y.cuda())

    # 3: Clean the gradients
    model.zero_grad()
    #params.grad._zero()

    # 4: Accumulate the partial derivatives of J
    J.backward()
    # params.grad._sum(dJ/dparams)

    # 5: Step in the opposite direction of the gradient
    optimizer.step()

    losses.append(J.item())
    accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

  print(f'Epoch {epoch + 1}, train loss: {torch.tensor(losses).mean():2f}')
  print(f'training accuracy: {torch.tensor(accuracies).mean():.2f}')

    # with torch_nograd: params = eta - params.grad()

  losses =  list()
  accuracies = list()
  model.eval() # because dropout in use
  for batch in val_loader:
    x, y = batch

    # x: b x 1 x 28 x 28
    b = x.size(0)
    x = x.view(b, -1).cuda()

    # 1: Forward (l : logits)
    with torch.no_grad():
      l = model(x)

    # 2: Compute the objective function
    J = loss(l, y.cuda())

    losses.append(J.item())
    accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

  print(f'Epoch {epoch + 1}, validation loss: {torch.tensor(losses).mean():2f}')
  print(f'training accuracy: {torch.tensor(accuracies).mean():.2f}')



Epoch 1, train loss: 0.838922
training accuracy: 0.77
Epoch 1, validation loss: 0.398343
training accuracy: 0.89
Epoch 2, train loss: 0.368020
training accuracy: 0.89
Epoch 2, validation loss: 0.317379
training accuracy: 0.91
Epoch 3, train loss: 0.309110
training accuracy: 0.91
Epoch 3, validation loss: 0.274125
training accuracy: 0.92
Epoch 4, train loss: 0.268038
training accuracy: 0.92
Epoch 4, validation loss: 0.246085
training accuracy: 0.93
Epoch 5, train loss: 0.236423
training accuracy: 0.93
Epoch 5, validation loss: 0.220958
training accuracy: 0.94
