In [2]:
import torch, torchvision
import numpy as np

In [3]:
model = torchvision.models.resnet18(pretrained=True)
data = torch.rand(1, 3, 64, 64) # dummy data, represents image
labels = torch.rand(1, 1000) # represents labels

Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to C:\Users\Cihat/.cache\torch\hub\checkpoints\resnet18-f37072fd.pth


  0%|          | 0.00/44.7M [00:00<?, ?B/s]

In [14]:
prediction = model(data) # forward pass

In [17]:
prediction.shape # weights are on ImageNet so there are 1000 class

torch.Size([1, 1000])

In [16]:
loss

tensor(-489.1749, grad_fn=<SumBackward0>)

In [9]:
loss = (prediction - labels).sum()

In [10]:
loss.backward()
loss

tensor(-489.1749, grad_fn=<SumBackward0>)

In [11]:
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [12]:
optim.step()

In [37]:
# autograd
a = torch.tensor([2., 3.], requires_grad=True)
b = torch.tensor([6., 4.], requires_grad=True)

In [41]:
Q = 3 * a ** 3 - b**2

In [48]:
external_grad = torch.tensor([1., 1.])
Q.backward(gradient=external_grad)

In [49]:
a.grad

tensor([36., 81.])

In [56]:
# In NN, parameters that don't compute gradients usally called frozen params. Unless you're frozed
# the tensors saved gradients occupy memory.

# This approach used in fine-tuning in pre-trained network. In fine-tuning we froze most of the model.
# Typically only classification layer will calculate in graph. See example below.

model = torchvision.models.resnet18(pretrained=True) # pre-trained model

# Freeze all the parameters in the network
for param in model.parameters():
    param.requires_grad = False
    
# Last layer of resnets is called "fc" currently for 1000 class.
# (fc): Linear(in_features=512, out_features=1000, bias=True) change this to 10 class

model.fc = torch.nn.Linear(in_features=512, out_features=10)

# All parameters are frozen except the last layer.

# Optimize only the classifier
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

# Notice we passed all model parameters since only last layer will be added in computational graph.