<a href="https://colab.research.google.com/github/alfie1104/deeplearning-with-pytorch/blob/main/pytorch_lightning/pytorch_MNIST_nn.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch
from torch import nn

In [7]:
# # Define model
# model = nn.Sequential(
#     nn.Linear(28*28, 64),
#     nn.ReLU(),
#     nn.Linear(64, 64),
#     nn.ReLU(),
#     nn.Dropout(0.1), # If we're overfitting
#     nn.Linear(64, 10)
# )

# Define a more flexible model
class ResNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.l1 = nn.Linear(28*28, 64)
    self.l2 = nn.Linear(64,64)
    self.l3 = nn.Linear(64,10)
    self.do = nn.Dropout(0.1)

  def forward(self, x):
    h1 = nn.functional.relu(self.l1(x))
    h2 = nn.functional.relu(self.l2(h1))
    do = self.do(h2+h1) # if h2 is not necessary, the network will set the bias of module l2 to very negative value, such that the ReLU gonna be setting to zero the value from L2.
    logits = self.l3(do)
    return logits

# model = ResNet()
model = ResNet().cuda() # GPU 사용

In [8]:
from torch import optim

# Define optimizer
params = model.parameters()
optimizer = optim.SGD(params, lr=1e-2)

In [9]:
# Define my loss
loss = nn.CrossEntropyLoss()

In [10]:
# Train, Val split
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader

train_data = datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor())
train, val = random_split(train_data, [55000, 5000])
train_loader = DataLoader(train, batch_size=32)
val_loader = DataLoader(val, batch_size=32)

In [11]:
# Training and validation loops
epochs = 5
for epoch in range(epochs):
  losses = list()
  accuracies = list()
  model.train() # set the model as training mode, because I use Dropout

  for batch in train_loader:
    x, y = batch

    # x : batch size x channel x 28 x 28
    b = x.size(0) # batch size
    # x = x.view(b, -1)
    x = x.view(b, -1).cuda() # send x to cuda

    # 1) forward
    l = model(x) # l stands for logits (logits is the last output of the model)
    # import pdb; pdb.set_trace() (for debug)

    # 2) compute the objective function
    # J = loss(l, y)
    J = loss(l, y.cuda())

    # 3) cleaning the gradients
    model.zero_grad() # 'params.grad' is set to zero. So this is equivalent to 'params.grad.zero_()' or 'optimizer.zero_grad()'

    # 4) accumulate the partial derivatives of J w.r.t. params
    J.backward() # 'params.grad' is computed.  This is equivalent to 'params.grad.add_(dJ/dparams)'. We only keep the latest gradient

    # 5) step in the opposite direction of the gradient (update parameter)
    optimizer.step() # params = params - eta * params.grad  (eta : learning rate)

    losses.append(J.item())
    accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

  print(f'Epoch : {epoch + 1}', end=", ")
  print(f'training loss : {torch.tensor(losses).mean():.2f}', end=", ")
  print(f'training accuracy : {torch.tensor(accuracies).mean():.2f}')

  losses = list()
  accuracies = list()
  model.eval() # because I use Dropout, I have to set the model evaluation mode

  for batch in val_loader:
    x, y = batch

    # x : batch size x channel x 28 x 28
    b = x.size(0) # batch size
    # x = x.view(b, -1)
    x = x.view(b, -1).cuda()

    # 1) forward
    with torch.no_grad(): # don't compute gradient
      l = model(x) # l stands for logits (logits is the last output of the model)

      # 2) compute the objective function
      # J = loss(l, y)
      J = loss(l, y.cuda())

      losses.append(J.item())
      accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())

  print(f'Epoch : {epoch + 1}', end=", ")
  print(f'validation loss : {torch.tensor(losses).mean():.2f}', end=", ")
  print(f'validation accuracy : {torch.tensor(accuracies).mean():.2f}')

Epoch : 1, training loss : 0.86, training accuracy : 0.78
Epoch : 1, validation loss : 0.39, validation accuracy : 0.89
Epoch : 2, training loss : 0.38, training accuracy : 0.89
Epoch : 2, validation loss : 0.31, validation accuracy : 0.91
Epoch : 3, training loss : 0.31, training accuracy : 0.91
Epoch : 3, validation loss : 0.28, validation accuracy : 0.92
Epoch : 4, training loss : 0.28, training accuracy : 0.92
Epoch : 4, validation loss : 0.24, validation accuracy : 0.93
Epoch : 5, training loss : 0.24, training accuracy : 0.93
Epoch : 5, validation loss : 0.22, validation accuracy : 0.93
