In [1]:
import torch, torchvision
model = torchvision.models.resnet18(pretrained=True)
data = torch.rand(1, 3, 64, 64)
labels = torch.rand(1, 1000)

Downloading: "https://download.pytorch.org/models/resnet18-5c106cde.pth" to C:\Users\NERO/.cache\torch\hub\checkpoints\resnet18-5c106cde.pth
100%|██████████| 44.7M/44.7M [00:07<00:00, 6.47MB/s]


In [2]:
prediction = model(data)

In [3]:
loss = (prediction - labels).sum()
loss.backward()

In [4]:
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [5]:
optim.step()

## Differentiation in Autograd

In [6]:
import torch

a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)

In [7]:
Q = 3*a**3 - b**2

In [8]:
external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)

In [9]:
# check if collected gradients are correct
print(9*a**2 == a.grad)
print(-2*b == b.grad)

tensor([True, True])
tensor([True, True])


In [10]:
a.grad

tensor([36., 81.])

he output tensor of an operation will require gradients even if only a single input tensor has requires_grad=True

In [11]:
x = torch.rand(5, 5)
y = torch.rand(5, 5)
z = torch.rand((5, 5), requires_grad=True)

a = x + y
print(f"Does `a` require gradients? : {a.requires_grad}")
b = x + z
print(f"Does `b` require gradients?: {b.requires_grad}")

Does `a` require gradients? : False
Does `b` require gradients?: True


In a NN, parameters that don’t compute gradients are usually called **frozen parameters**. It is useful to “freeze” part of your model if you know in advance that you won’t need the gradients of those parameters (this offers some performance benefits by reducing autograd computations).

Another common usecase where exclusion from the DAG is important is for [finetuning a pretrained network](https://pytorch.org/tutorials/beginner/finetuning_torchvision_models_tutorial.html)

In [12]:
from torch import nn, optim

model = torchvision.models.resnet18(pretrained=True)

# Freeze all the parameters in the network
for param in model.parameters():
    param.requires_grad = False

In [13]:
model.fc

Linear(in_features=512, out_features=1000, bias=True)

In [14]:
model.fc = nn.Linear(512, 10)