<a href="https://colab.research.google.com/github/fardil-b/DL-PyTorch/blob/main/Intro_Autograd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Autograd

In [1]:
# Setting seeds to try and ensure we have the same results - this is not guaranteed across PyTorch releases.
import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

import numpy as np
np.random.seed(0)

In [2]:
import torch
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch import nn

mean, std = (0.5,), (0.5,)

# Create a transform and normalise data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean, std)
                              ])

# Download FMNIST training dataset and load training data
trainset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download FMNIST test dataset and load test data
testset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/.pytorch/FMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz



HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/.pytorch/FMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/.pytorch/FMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/.pytorch/FMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw
Processing...
Done!


  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [3]:
class FMNIST(nn.Module):
  def __init__(self):
    super().__init__()
    self.fc1 = nn.Linear(784, 128)
    self.fc2 = nn.Linear(128,64)
    self.fc3 = nn.Linear(64,10)
    
  def forward(self, x):
    x = x.view(x.shape[0], -1)
    
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    x = F.log_softmax(x, dim=1)
    
    return x
    
#model = FMNIST()   

In [6]:
model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))

Indexing the model to find the layers

In [7]:
model[0]

Linear(in_features=784, out_features=128, bias=True)

In [9]:
model[1] # second layer

ReLU()

In [14]:
# batch of sample with the labels (64 images and labels)
images, labels = next(iter(trainloader))
images = images.view(images.shape[0], -1)

In [11]:
criterion = nn.NLLLoss()

In [13]:
# weights for the first layer
print('Initial weights: ',model[0].weight) 


Initial weights:  Parameter containing:
tensor([[ 0.0293, -0.0058, -0.0166,  ...,  0.0080, -0.0320,  0.0049],
        [-0.0325,  0.0265, -0.0318,  ..., -0.0249, -0.0121, -0.0291],
        [ 0.0287, -0.0092, -0.0321,  ...,  0.0192, -0.0141, -0.0038],
        ...,
        [ 0.0341, -0.0226,  0.0211,  ...,  0.0124, -0.0071, -0.0087],
        [-0.0062,  0.0299, -0.0219,  ...,  0.0322, -0.0339,  0.0043],
        [ 0.0222,  0.0252,  0.0015,  ..., -0.0071, -0.0133, -0.0305]],
       requires_grad=True)


random value that have been generated for the weights of the first layer

In [15]:
# 
print('Initial weights gradients: ',model[0].weight.grad) 


Initial weights gradients:  None


As we haven't done a backward pass, so no gradient yet

In [16]:

output = model(images) # forward pass
loss = criterion(output, labels) # calculate loss
loss.backward() # backward pass
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)
        

Initial weights :  Parameter containing:
tensor([[ 0.0293, -0.0058, -0.0166,  ...,  0.0080, -0.0320,  0.0049],
        [-0.0325,  0.0265, -0.0318,  ..., -0.0249, -0.0121, -0.0291],
        [ 0.0287, -0.0092, -0.0321,  ...,  0.0192, -0.0141, -0.0038],
        ...,
        [ 0.0341, -0.0226,  0.0211,  ...,  0.0124, -0.0071, -0.0087],
        [-0.0062,  0.0299, -0.0219,  ...,  0.0322, -0.0339,  0.0043],
        [ 0.0222,  0.0252,  0.0015,  ..., -0.0071, -0.0133, -0.0305]],
       requires_grad=True)
Initial weights gradient :  tensor([[ 1.4081e-03,  1.4067e-03,  1.4081e-03,  ...,  1.2042e-03,
          1.4081e-03,  1.4081e-03],
        [ 5.2849e-03,  5.2850e-03,  5.2871e-03,  ...,  5.6678e-03,
          5.1968e-03,  5.2849e-03],
        [ 3.3629e-04,  3.3721e-04,  3.3881e-04,  ..., -3.3522e-04,
          2.7837e-04,  3.3629e-04],
        ...,
        [-2.6133e-03, -2.6099e-03, -2.6118e-03,  ..., -1.3745e-03,
         -2.5925e-03, -2.6133e-03],
        [-6.4507e-05, -6.4507e-05, -6.4507e-0

weights have stayed exactly the same but gradients have been automatically been calculated. This is done by Autograd. First layer is a tensor of 784 * 128 gradients.

In [17]:
model[1].weight

ModuleAttributeError: ignored

Activation function like Relu don't have any weights associated with them. Their role is to add non-linearity to the neural network

In [20]:
# 3rd layer
model[2]

Linear(in_features=128, out_features=64, bias=True)

In [21]:
model[2].weight # weights

Parameter containing:
tensor([[ 0.0088,  0.0251, -0.0064,  ...,  0.0527,  0.0386, -0.0859],
        [-0.0638, -0.0239,  0.0220,  ...,  0.0402,  0.0338,  0.0470],
        [ 0.0469,  0.0858, -0.0170,  ...,  0.0561,  0.0392,  0.0382],
        ...,
        [-0.0489, -0.0277,  0.0498,  ...,  0.0250,  0.0003, -0.0415],
        [ 0.0246, -0.0806,  0.0318,  ..., -0.0083, -0.0674, -0.0012],
        [-0.0314, -0.0470, -0.0759,  ..., -0.0249,  0.0021,  0.0272]],
       requires_grad=True)

In [23]:
model[2].weight.grad #gradients

tensor([[-6.6124e-04, -2.7212e-04, -7.9206e-04,  ..., -1.0297e-02,
          3.9120e-04, -2.4670e-03],
        [-1.1408e-03, -1.2231e-03,  2.5079e-03,  ...,  1.8252e-02,
         -4.7817e-04,  9.0496e-04],
        [-4.0849e-03, -8.4884e-03,  2.8251e-03,  ...,  7.1904e-03,
         -1.8588e-03,  1.4387e-04],
        ...,
        [ 1.3116e-03,  5.2450e-04,  1.7303e-04,  ...,  5.6837e-04,
          1.0533e-03,  1.5475e-04],
        [-1.2523e-03, -1.5143e-03, -1.1330e-05,  ..., -2.6504e-03,
         -1.6432e-04, -4.1305e-04],
        [-4.6157e-04,  1.3336e-03, -4.4820e-05,  ..., -2.0998e-04,
         -1.2475e-04, -9.1986e-05]])