# Practical NN using Pytorch : Digit recognition

In [4]:
# import all necessary libraries
#import numpy as np
import torch 
import torch.nn as nn
#import pandas as pd
#from sklearn.preprocessing import StandardScaler
#from torch.utils.data import Dataset
from torchvision import datasets 
import torchvision.transforms as transforms # to transform the dataset into tensor

In [6]:
# download the training and test dataset in local folder
train_dataset = datasets.MNIST(root='./',
                           train=True,
                           transform=transforms.ToTensor(),
                           download=True)

test_dataset = datasets.MNIST(root='./',
                           train=False,
                           transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST\raw\train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST\raw\train-images-idx3-ubyte.gz to ./MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST\raw\train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST\raw\train-labels-idx1-ubyte.gz to ./MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST\raw\t10k-images-idx3-ubyte.gz




HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST\raw\t10k-images-idx3-ubyte.gz to ./MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST\raw\t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting ./MNIST\raw\t10k-labels-idx1-ubyte.gz to ./MNIST\raw
Processing...
Done!


In [14]:
# we have 85% training dataset and 15% test dataset
print(train_dataset.data.shape) # we have 60K sample of 28x28 because it is having 28x28 pixels
print(test_dataset.data.shape) # we have 10K sample of 28x28 because it is having 28x28 pixels

torch.Size([60000, 28, 28])
torch.Size([10000, 28, 28])


torch.Size([28])

In [15]:
batch_size =100
# Create dataloader for test and train dataset
# Load the data to your dataloader for batch processing and shuffling
train_loader = DataLoader(dataset=train_dataset,
                          batch_size=batch_size,
                          shuffle=True)
test_loader = DataLoader(dataset=test_dataset,
                          batch_size=batch_size,
                          shuffle=True)

In [20]:
# Netowrk parameters
# As specified above we have input_neurons = 784 (28*28) 
# hidden layer neurons = 400 (apx... (input_neurons+output_neurons/2))
# output neurons = 10 (#no of labels)
input_size = 784        #Number of input neurons (image pixels)
hidden_size = 400       #Number of hidden neurons
out_size = 10           #Number of classes (0-9) 
epochs = 10            #How many times we pass our entire dataset into our network 
batch_size = 100        #Input size of the data during one iteration 
learning_rate = 0.001   #How fast we are learning


In [31]:
## Defining NN Architecture 
class Net(nn.Module):
    def __init__(self,input_size,hidden_size,out_size):
        super(Net,self).__init__()
        self.fc1=nn.Linear(input_size,hidden_size)
        self.fc2=nn.Linear(hidden_size,hidden_size)
        self.fc3=nn.Linear(hidden_size,out_size)
        self.relu=nn.ReLU()
        self.init_weights() # Initalizing weights
    def init_weights(self):
        nn.init.kaiming_normal_(self.fc1.weight) # He initialization
        nn.init.kaiming_normal_(self.fc2.weight) # He initialization
    def forward(self,x):
        out=self.fc1(x)
        out=self.relu(out)
        out=self.fc2(out)
        out=self.relu(out)
        out= self.fc3(out)
        return out       
        

In [32]:
# Creating object of network class and defining loss and optimizer
net=Net(input_size,hidden_size,out_size)
#The loss function. The Cross Entropy loss comes along with Softmax. Therefore, no need to specify Softmax as well
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(), lr=learning_rate)

In [47]:
#Train the network
for epoch in range(epochs):
    correct_train = 0
    running_loss = 0
    total_train =0
    for i,[images,labels] in enumerate(train_loader):
        images=images.view(-1,28*28)
        output=net(images)
        _,predicted = torch.max(output.data,1)
        correct_train += (predicted==labels).sum()
        loss = criterion(output,labels)
        running_loss+=loss.item()
        total_train+=labels.size(0)
        optimizer.zero_grad() 
        loss.backward()                                   # Backpropagation
        optimizer.step()  # How the optimizer and loss is linked?
        if (i+1)%100==0:
            print('Epoch [{}/{}], Training Loss: {:.3f}, Training Accuracy: {:.3f}%'.format
          (epoch+1, epochs, loss.item(), (100*correct_train.double()/total_train)))
print("DONE TRAINING!")

Epoch [1/10], Training Loss: 0.002, Training Accuracy: 99.920%
Epoch [1/10], Training Loss: 0.000, Training Accuracy: 99.930%
Epoch [1/10], Training Loss: 0.000, Training Accuracy: 99.897%
Epoch [1/10], Training Loss: 0.000, Training Accuracy: 99.835%
Epoch [1/10], Training Loss: 0.000, Training Accuracy: 99.826%
Epoch [1/10], Training Loss: 0.003, Training Accuracy: 99.800%
Epoch [2/10], Training Loss: 0.000, Training Accuracy: 99.810%
Epoch [2/10], Training Loss: 0.001, Training Accuracy: 99.660%
Epoch [2/10], Training Loss: 0.001, Training Accuracy: 99.633%
Epoch [2/10], Training Loss: 0.003, Training Accuracy: 99.588%
Epoch [2/10], Training Loss: 0.001, Training Accuracy: 99.604%
Epoch [2/10], Training Loss: 0.000, Training Accuracy: 99.580%
Epoch [3/10], Training Loss: 0.014, Training Accuracy: 99.840%
Epoch [3/10], Training Loss: 0.000, Training Accuracy: 99.855%
Epoch [3/10], Training Loss: 0.026, Training Accuracy: 99.840%
Epoch [3/10], Training Loss: 0.002, Training Accuracy: 

In [49]:
with torch.no_grad():
    correct = 0
    for images, labels in test_loader:
        images = images.view(-1, 28*28)
        outputs = net(images)
        _, predicted = torch.max(outputs.data, 1)
        correct += (predicted == labels).sum().item()

    print('Accuracy of the network on the 10000 test images: {} %'.format(100 * correct / len(test_dataset)))

Accuracy of the network on the 10000 test images: 97.84 %


https://stackoverflow.com/questions/53975717/pytorch-connection-between-loss-backward-and-optimizer-step#:~:text=After%20computing%20the%20gradients%20for,grad%20to%20update%20their%20values.


Without delving too deep into the internals of pytorch, I can offer a simplistic answer:

Recall that when initializing optimizer you explicitly tell it what parameters (tensors) of the model it should be updating. The gradients are "stored" by the tensors themselves (they have a grad and a requires_grad attributes) once you call backward() on the loss. After computing the gradients for all tensors in the model, calling optimizer.step() makes the optimizer iterate over all parameters (tensors) it is supposed to update and use their internally stored grad to update their values.

example:

# Our "model"
x = torch.tensor([1., 2.], requires_grad=True)
y = 100*x

# Compute loss
loss = y.sum()

# Compute gradients of the parameters w.r.t. the loss
print(x.grad)     # None
loss.backward()      
print(x.grad)     # tensor([100., 100.])

# MOdify the parameters by subtracting the gradient
optim = torch.optim.SGD([x], lr=0.001)
print(x)        # tensor([1., 2.], requires_grad=True)
optim.step()
print(x)        # tensor([0.9000, 1.9000], requires_grad=True)
