<a href="https://colab.research.google.com/github/TungAnhDep/Fundamental-Deep-Learning/blob/main/MLP.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
#Target: Create a MLP network for MNIST
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

In [2]:
#Import data:
train_data = datasets.MNIST(root='.', train=True, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))
valid_data = datasets.MNIST(root='.', train=False, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))
train_loader = DataLoader(train_data, shuffle = True, batch_size = 32)
valid_loader = DataLoader(valid_data, batch_size = 32)

100%|██████████| 9.91M/9.91M [00:00<00:00, 20.5MB/s]
100%|██████████| 28.9k/28.9k [00:00<00:00, 612kB/s]
100%|██████████| 1.65M/1.65M [00:00<00:00, 5.64MB/s]
100%|██████████| 4.54k/4.54k [00:00<00:00, 5.27MB/s]


In [3]:
train_data

Dataset MNIST
    Number of datapoints: 60000
    Root location: .
    Split: Train
    StandardTransform
Transform: Compose(
               ToTensor()
           )

In [5]:
image, label = train_data[0] # The first image and label, the second image and label is train_data[1]
image, label

(tensor([[[0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000],
          [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000,
           0.0000, 0.0000, 0.0000, 0.0000, 

In [None]:
def relu(X):
    a = torch.zeros_like(X)
    return torch.max(X, a)

In [None]:
#Flatten: test_data = test_data[None,:]
num_classes = 10
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(1*28*28, 512),
    nn.ReLU(),
    nn.Linear(512,512),
    nn.ReLU(),
    nn.Linear(512, num_classes)
)
model

Sequential(
  (0): Flatten(start_dim=1, end_dim=-1)
  (1): Linear(in_features=784, out_features=512, bias=True)
  (2): ReLU()
  (3): Linear(in_features=512, out_features=512, bias=True)
  (4): ReLU()
  (5): Linear(in_features=512, out_features=10, bias=True)
)

In [None]:
#model.to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr = 1e-4)

In [None]:
train_N = len(train_loader.dataset) #No of data points
valid_N = len(valid_loader.dataset)
valid_N

10000

In [None]:
def get_batch_accuracy(output, y, N):
    pred = output.argmax(dim=1, keepdim=True)
    correct = pred.eq(y.view_as(pred)).sum().item()
    return correct / N

In [None]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cpu'

In [None]:
def train():
  loss = 0
  accuracy = 0
  model.train()
  for x,y in train_loader:
    x, y = x.to(device), y.to(device) #move to gpu first
    output = model(x)
    optimizer.zero_grad()
    batch_loss = loss_function(output, y)
    batch_loss.backward()
    optimizer.step()
    loss += batch_loss.item()
    accuracy += get_batch_accuracy(output, y, train_N)
  print('Train - Loss: {:.4f} Accuracy: {:.4f}'.format(loss, accuracy))


In [None]:
def validate():
  loss = 0
  accuracy = 0
  model.eval()
  with torch.no_grad():
    for x,y in valid_loader:
      x,y = x.to(device), y.to(device)
      output = model(x)
      loss+= loss_function(output, y)
      accuracy+= get_batch_accuracy(output, y, valid_N)
  print('Valid - Loss: {:.4f} Accuracy: {:.4f}'.format(loss, accuracy))

In [None]:
epochs = 10
for epoch in range (epochs):
  print('Epoch: {}'.format(epoch))
  train()
  validate()

Epoch: 0
Train - Loss: 778.6934 Accuracy: 0.8923
Valid - Loss: 65.9037 Accuracy: 0.9387
Epoch: 1
Train - Loss: 337.2882 Accuracy: 0.9476
Valid - Loss: 48.2200 Accuracy: 0.9531
Epoch: 2
Train - Loss: 239.9413 Accuracy: 0.9626
Valid - Loss: 36.7327 Accuracy: 0.9640
Epoch: 3
Train - Loss: 179.5223 Accuracy: 0.9717
Valid - Loss: 28.5895 Accuracy: 0.9722
Epoch: 4
Train - Loss: 139.5027 Accuracy: 0.9776
Valid - Loss: 24.3226 Accuracy: 0.9760
Epoch: 5
Train - Loss: 110.6149 Accuracy: 0.9828
Valid - Loss: 22.7821 Accuracy: 0.9767
Epoch: 6
Train - Loss: 87.5146 Accuracy: 0.9861
Valid - Loss: 21.3036 Accuracy: 0.9786
Epoch: 7
Train - Loss: 71.0827 Accuracy: 0.9891
Valid - Loss: 21.5354 Accuracy: 0.9786
Epoch: 8
Train - Loss: 58.3509 Accuracy: 0.9911
Valid - Loss: 20.6979 Accuracy: 0.9802
Epoch: 9
Train - Loss: 47.1305 Accuracy: 0.9930
Valid - Loss: 19.7632 Accuracy: 0.9811


In [None]:
prediction = model(train_data[0][0]) #Image number 0
prediction

tensor([[-12.3071,  -6.8059,  -6.3119,  11.1743, -27.5796,  15.8621, -21.2174,
          -7.1932,  -9.7794,  -3.4958]], grad_fn=<AddmmBackward0>)

In [None]:
#Ten number for each neuron, with the largest of it the label
prediction.argmax(dim=1, keepdim=True)

tensor([[5]])

In [None]:
#the real label:
train_data[0][1]

5