<h4>torch.autograd is Pytorch's automatic differentiation engine that powers nn training.</h4>
Note: The stuff done in this notebook will work only on the CPU and won't work on GPU devices.

In [1]:
"""
Pretrained model 'ResNet18' will be loaded.
A random data tensor presenting an image with 3 channels will be created
Its label will be initialized to random values
Note: Label in pretrained models has shape (1,1000)
"""

import torch
from torchvision.models import resnet18, ResNet18_Weights

model = resnet18(weights=ResNet18_Weights.DEFAULT)
data = torch.rand(1, 3, 64, 64)
labels = torch.rand(1, 1000)

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\kyse1/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth
100.0%


In [2]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [3]:
data

tensor([[[[0.2117, 0.5543, 0.9860,  ..., 0.7252, 0.4199, 0.1249],
          [0.1111, 0.9561, 0.2540,  ..., 0.3054, 0.3626, 0.7873],
          [0.7984, 0.8573, 0.9884,  ..., 0.1853, 0.2349, 0.4863],
          ...,
          [0.5683, 0.6763, 0.8860,  ..., 0.2888, 0.8374, 0.0172],
          [0.3904, 0.8462, 0.4567,  ..., 0.4410, 0.7195, 0.6702],
          [0.3197, 0.9499, 0.9307,  ..., 0.1056, 0.7432, 0.1767]],

         [[0.3033, 0.7592, 0.4992,  ..., 0.9840, 0.1519, 0.0893],
          [0.8509, 0.8755, 0.8676,  ..., 0.9440, 0.2599, 0.4061],
          [0.2671, 0.9063, 0.4324,  ..., 0.4271, 0.0232, 0.8376],
          ...,
          [0.6181, 0.1994, 0.5076,  ..., 0.8251, 0.9532, 0.4044],
          [0.0194, 0.5830, 0.4186,  ..., 0.3481, 0.6644, 0.5034],
          [0.1284, 0.0809, 0.5181,  ..., 0.7996, 0.6881, 0.4045]],

         [[0.3761, 0.6164, 0.7629,  ..., 0.7560, 0.1402, 0.5244],
          [0.5427, 0.5400, 0.9914,  ..., 0.4850, 0.2918, 0.2418],
          [0.8186, 0.1123, 0.5283,  ..., 0

In [4]:
labels

tensor([[0.9103, 0.5827, 0.3281, 0.9176, 0.0942, 0.7976, 0.6474, 0.3580, 0.4611,
         0.4489, 0.2196, 0.7552, 0.1393, 0.3464, 0.0439, 0.9813, 0.7965, 0.0831,
         0.6596, 0.8187, 0.4120, 0.9241, 0.7272, 0.7154, 0.5381, 0.1474, 0.1309,
         0.5559, 0.0474, 0.0157, 0.1930, 0.8957, 0.8393, 0.8029, 0.8446, 0.1859,
         0.4702, 0.7659, 0.3644, 0.9375, 0.3364, 0.4843, 0.1862, 0.2054, 0.9987,
         0.5101, 0.2509, 0.8099, 0.1266, 0.1096, 0.6033, 0.4787, 0.5501, 0.2900,
         0.8968, 0.7278, 0.5287, 0.3575, 0.4245, 0.5874, 0.5568, 0.5213, 0.6846,
         0.0799, 0.8432, 0.6071, 0.6500, 0.9375, 0.4400, 0.4011, 0.2986, 0.8677,
         0.7065, 0.4443, 0.0424, 0.5432, 0.0328, 0.8372, 0.3691, 0.1141, 0.3408,
         0.7881, 0.6739, 0.5589, 0.5244, 0.8985, 0.5341, 0.2660, 0.2945, 0.2176,
         0.6634, 0.2643, 0.4205, 0.2053, 0.8530, 0.1416, 0.7663, 0.5845, 0.8666,
         0.0524, 0.4315, 0.9919, 0.8921, 0.9445, 0.9686, 0.4712, 0.5403, 0.6039,
         0.7016, 0.8838, 0.3

In [5]:
# Running the data through the model. 
prediction = model(data) # Forward

In [6]:
prediction

tensor([[-0.3340, -0.3251, -0.3337, -1.4205, -0.4836,  0.0316, -0.3429,  0.8390,
          0.6336, -0.5129, -0.8618, -0.8016, -0.4476, -0.8705, -1.1114, -0.3287,
         -0.8865, -0.2240, -0.0344, -0.6132, -1.4345, -0.3658, -1.1085,  0.6282,
         -0.6967, -1.0995, -0.7047, -1.1743, -0.8698, -0.2256, -0.4846, -0.6727,
         -0.3551, -0.6691, -0.3513, -0.4912,  0.6421, -0.8465, -0.5512,  0.2823,
         -0.6742, -0.7810, -1.1979, -0.1570, -0.7531, -0.5526, -0.7830, -0.1931,
         -1.0609, -0.8086, -0.2225,  0.3904, -0.4159, -0.7328, -0.0992, -1.0215,
         -0.3966, -1.4470, -0.4947, -0.4091,  0.7632, -0.0052,  0.0652,  0.4079,
         -0.5526, -0.0791, -0.2292, -0.2164, -0.7383, -1.0964, -1.5240,  0.1300,
         -1.2628, -0.2270, -1.1127, -1.2977, -0.3154, -0.6963,  0.1692,  0.1453,
         -0.4542, -1.2775,  0.2216, -0.4617, -0.1862,  0.0516,  0.1729,  0.4919,
          0.1123, -0.3221, -1.1843, -0.9452, -1.8720, -0.1097,  0.3717, -1.9346,
         -0.5536, -0.1963, -

In [7]:
# Now, the prediction and the labels are used to calculate the error. 
# This error will be backpropagated.
# Then autograd will calculate and store the gradients for each model parameter in the parameter's '.grad' attribute

loss = (prediction - labels).sum()
loss

tensor(-516.5287, grad_fn=<SumBackward0>)

In [8]:
loss.backward()

In [9]:
# An optimizer will be loaded. All the parameters of the model will be registered to the optimizer.
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9) # 1e-2 = 0.01

In [10]:
optim.step() #gradient descent

'ResNet'

#### Differentiation in Autograd

In [14]:
a = torch.tensor([2.,3.], requires_grad=True)
b = torch.tensor([4.,5.], requires_grad=True)

In [15]:
Q = 3*a**3 - b**2

In [16]:
Q

tensor([ 8., 56.], grad_fn=<SubBackward0>)

#### The next explanations are from "Deep Learning with PyTorch: A 60 Minute Blitz > A Gentle Introduction to torch.autograd" tutorial.
#### https://pytorch.org/tutorials/beginner/blitz/autograd_tutorial.html


<h5>Let’s assume a and b to be parameters of an NN, and Q to be the error. In NN training, we want gradients of the error w.r.t. parameters.</h5>
i.e --> ∂Q / ∂a = 9a^2 and ∂Q / ∂b = -2b
<h5>When we call .backward() on Q, autograd calculates these gradients and stores them in the respective tensors’ .grad attribute.</h5>

<h5>We need to explicitly pass a gradient argument in Q.backward() because it is a vector. gradient is a tensor of the same shape as Q, and it represents the gradient of Q w.r.t. itself, i.e.</h5>
dQ / dQ = 1

Equivalently, we can also aggregate Q into a scalar and call backward implicitly, like Q.sum().backward().

In [18]:
external_grad = torch.tensor([1.,1.])
Q.backward(gradient=external_grad)

In [19]:
print(9*a**2 == a.grad)

tensor([True, True])


In [20]:
print(-2*b == b.grad)

tensor([True, True])


In [22]:
from torch import nn, optim

model = resnet18(weights=ResNet18_Weights.DEFAULT)

for param in model.parameters():
    param.requires_grad = False

In [23]:
model.fc = nn.Linear(512, 10)

In [25]:
optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)