In [0]:
import torch
from torchvision import datasets, transforms   

In [0]:
# datasets module has MNIST() class. 
# 
train_data = datasets.MNIST('', train=True, download=True, transform=transforms.Compose([transforms.ToTensor()]))  
test_data = datasets.MNIST('', train=False, download=True, transform=transforms.Compose([transforms.ToTensor()]))




In [0]:
train_set = torch.utils.data.DataLoader(train_data, batch_size=16, shuffle=True)
test_set = torch.utils.data.DataLoader(test_data, batch_size=16, shuffle=True)

In [0]:
import torch.nn as nn   
import torch.nn.functional as F  # torch modules containing functions


In [0]:
class Net(nn.Module):   # nn.Module class is inherited by the Net class

  def __init__(self):
    super().__init__()               # super() inherits methods from nn.Module class and init() for their initialization
    self.fc1 = nn.Linear(784, 64)    # defining 1st hidden layer with 784  input neurons (28x28 pixels) and 64 output neurons. fc stands for fully connected.
    self.fc2 = nn.Linear(64, 32)     # defining 2nd hidden layer. Note 64, 32
    self.fc3 = nn.Linear(32, 10)     # defining 3rd layer. Note 32, 10. Here, 10 digits are there. 

  def forward(self, x):   # forward propagation
      x = self.fc1(x)
      x = F.relu(x)
      x = self.fc2(x)
      x = F.relu(x)
      x = self.fc3(x)
      return F.log_softmax(x, dim=1)   #  dim=1 for summing up across columns i.e horizontally. F.log_softmax is log(softmax)
  


In [0]:
net = Net() #create a net object for Net() class
params = net.parameters()  #weights and bias for the network

from torch import optim
optimizer = optim.Adam(params, lr=1e-3)   # defining the optimizer. Here Adam() class

EPOCHS = 10
for e in range(EPOCHS):
  for data in train_set:
    Xs, ys = data           # here data is a list of 16 X_Tensor and 16 y_Tensor. So, unpacking the list.
                            # Xs has shape torch.Size([16, 1, 28, 28]) and ys has shape torch.Size([16]). We need to flatten our Xs.
    X = Xs.view(-1, 28*28)  # note the reshaped dimension of X to be taken by the forward()
    net.zero_grad()         # zeroing the gradient for each batch of input data to avoid gradient accumulation.
    outputs = net(X)          # predicted log(softmax)
    loss = F.nll_loss(outputs, ys)   # nll = negative log likelihood. note it is a function from F module

    loss.backward()         # backpropagation: calculate gradients of  loss w.r.t parameters.
    optimizer.step()       # optimizing and updating parameters
  
  print(f"loss after {e+1} epochs: " +str(loss))
  


loss after 1 epochs: tensor(0.0293, grad_fn=<NllLossBackward>)
loss after 2 epochs: tensor(0.6349, grad_fn=<NllLossBackward>)
loss after 3 epochs: tensor(0.1297, grad_fn=<NllLossBackward>)
loss after 4 epochs: tensor(0.0150, grad_fn=<NllLossBackward>)
loss after 5 epochs: tensor(0.0322, grad_fn=<NllLossBackward>)
loss after 6 epochs: tensor(0.1674, grad_fn=<NllLossBackward>)
loss after 7 epochs: tensor(0.0121, grad_fn=<NllLossBackward>)
loss after 8 epochs: tensor(0.0127, grad_fn=<NllLossBackward>)
loss after 9 epochs: tensor(0.0006, grad_fn=<NllLossBackward>)
loss after 10 epochs: tensor(0.1645, grad_fn=<NllLossBackward>)


In [0]:
# finding the accuracy on train_set
total, correct = 0, 0
with torch.no_grad():
  for data in train_set:
    Xs, ys = data
    X = Xs.view(-1, 28*28)
    outputs = net(X)  # it has shape (16,10). It is a log_prob Tensor
    predicted = torch.argmax(outputs, dim=1)  # log is an increasing function. so, argmax for prob or log_prob remains same. Note dim=1, maximum across columns, i.e. horizontally.
    for i in range(len(ys)):
      total+=1
      if ys[i]==predicted[i]:
        correct+=1
print(f"{correct} correct out of {total} training MNIST images with accuracy of {correct*100/total} %")
    

59545 correct out of 60000 with accuracy of 99.24166666666666 %


In [0]:
# finding the accuracy on test_set
total, correct = 0, 0
with torch.no_grad():
  for data in test_set:
    Xs, ys = data
    X = Xs.view(-1, 28*28)
    outputs = net(X)  # it has shape (16,10). It is a log_prob Tensor
    predicted = torch.argmax(outputs, dim=1)  # log is an increasing function. so, argmax for prob or log_prob remains same. Note dim=1, maximum across columns, i.e. horizontally.
    for i in range(len(ys)):
      total+=1
      if ys[i]==predicted[i]:
        correct+=1
print(f"{correct} correct out of {total} test MNIST images with accuracy of {correct*100/total} %")
    

9754 correct out of 10000 test MNIST images with accuracy of 97.54 %
