<a href="https://colab.research.google.com/github/deecodess/MNIST/blob/main/MNIST.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Import Torch


In [None]:
import torch 
from torch import nn
from torch import optim
from torchvision import datasets, transforms
from torch.utils.data import random_split, DataLoader




In [None]:
torch.randn(5).cuda()

Train, Val Split


In [None]:
train_data = datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor())
train, val = random_split(train_data, [55000,5000])
train_loader = DataLoader(train, batch_size=32)
val_loader = DataLoader(val, batch_size=32)


Define Model

In [None]:
model = nn.Sequential(
    nn.Linear(28 * 28 , 64),
    nn.ReLU(),
    nn.Linear(64,64),
    nn.ReLU(),
    nn.Dropout(0.1),
    nn.Linear(64,10),
)

Define a more flexible model

In [None]:
class ResNet(nn.Module):
  def __init__(self):
    super().__init__()
    self.l1 = nn.Linear(28 *28, 64)
    self.l2 = nn.Linear(64, 64)
    self.l3 = nn.Linear(64, 10)
    self.do = nn.Dropout(0.1)

  def forward(self,x):
    h1 = nn.functional.relu(self.l1(x))
    h2 = nn.functional.relu(self.l2(h1))
    do = self.do(h2 + h1)
    logits = self.l3(do)
    return logits
model = ResNet().cuda()


Define Optimiser

In [None]:
params = model.parameters()
optimiser = optim.SGD(params , lr = 1e-2)

Define Loss

In [None]:
loss = nn.CrossEntropyLoss()

Training Loop

In [None]:
nb_epochs = 5
for epoch in range(nb_epochs):
    losses = list()
    accuracies = list()
    model.train()   #because dropout used
    for batch in train_loader:
      x,y = batch

      #x(image): batch size * 1*28*28, then it will go through model
      b=x.size(0)
      x=x.view(b, -1).cuda()

      # 5 essential steps for training a model
      # 1 forward
      l = model(x)  # l: logits

      # 2 compute the objective function
      J = loss(l,y.cuda())

      # 3 cleaning the gradients
      model.zero_grad()
      #optimiser.zero_grad()
      #params.grad._zero()

      # 4 accumulate the partial derivatives of J wrt parameters
      J.backward()
      #params.grad.sum(dJ/dparams)


      # 5 step in the opposite direction of the gradient
      optimiser.step()
      #with torch.no_grad(): params = parmas - eta * params.grad

      losses.append(J.item())
      accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())
    print(f'Epoch {epoch + 1}', end=', ')
    print(f'training loss: {torch.tensor(losses).mean():.2f}', end=', ')
    print(f'training accuracy: {torch.tensor(accuracies).mean():.2f}')


    losses = list()
    accuracies = list()
    model.eval()
    for batch in val_loader:
      x,y = batch

      #x(image): batch size * 1*28*28, then it will go through model
      b=x.size(0)
      x=x.view(b, -1).cuda()

      
      # 1 forward
      with torch.no_grad():
        l = model(x)  # l: logits

      # 2 compute the objective function
      J = loss(l,y.cuda())

      losses.append(J.item())
      accuracies.append(y.eq(l.detach().argmax(dim=1).cpu()).float().mean())
    print(f'Epoch {epoch + 1}', end=', ')
    print(f'validation loss: {torch.tensor(losses).mean():.2f}', end=', ')
    print(f'validation accuracy: {torch.tensor(accuracies).mean():.2f}')



In [None]:
from matplotlib import pyplot as plt 


In [None]:
#two methods to show tensors
train_data[0]
train_image, train_num = train_data[0]
print(f'The predicted number is: {train_num}')

# loading the image
testImage=(np.array(train_data[0][0], dtype='float')).reshape(28,28)

#using matplotlib
import numpy as np
plt.imshow(testImage)

#using PIL
import PIL.Image as pil
img = pil.fromarray(np.uint8(testImage*255), 'L')
img






In [None]:
l.detach()[0]