# Simple autograd

In [1]:
import torch, torchvision

model = torchvision.models.resnet18(pretrained=True)
data = torch.rand(1, 3, 64, 64)
labels = torch.rand(1, 1000)

In [2]:
prediction = model(data)  # forward pass

  return torch.max_pool2d(input, kernel_size, stride, padding, dilation, ceil_mode)


In [3]:
prediction

tensor([[-1.0068e-01, -5.2057e-01, -4.2017e-01, -1.2647e+00, -5.0478e-01,
          1.8201e-01, -1.3801e-01,  4.2159e-01,  3.6176e-01, -7.7721e-01,
         -7.3244e-01, -3.5386e-01, -1.1883e-01, -7.1301e-01, -7.9938e-01,
         -6.0982e-01, -8.4926e-01, -3.2990e-01, -4.1159e-01, -5.8680e-01,
         -1.3093e+00, -3.9951e-01, -1.1348e+00,  5.1822e-01, -9.2160e-01,
         -9.6355e-01, -5.3444e-01, -1.3327e+00, -7.7128e-01, -1.4207e-01,
         -4.5393e-01, -7.7444e-01, -1.5041e-01, -5.1622e-01, -3.0619e-01,
         -3.4039e-01,  5.2779e-01, -5.7775e-01, -3.0846e-01,  3.2483e-01,
         -6.1573e-01, -6.5536e-01, -8.5377e-01, -1.5661e-01, -5.3701e-01,
         -3.5975e-01, -7.0454e-01, -4.5725e-01, -1.2100e+00, -8.8316e-01,
         -4.4823e-01,  4.7922e-01, -2.4586e-01, -4.6406e-01, -2.8528e-01,
         -1.0659e+00, -5.5954e-01, -1.3556e+00, -3.4588e-01, -3.4513e-01,
          8.8586e-01,  5.3778e-02,  1.6636e-01,  2.8274e-02, -7.0387e-01,
         -1.5831e-01, -1.3709e-01, -1.

In [4]:
loss = (prediction - labels).sum()
loss.backward()  # backward pass

In [5]:
optim = torch.optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [6]:
optim.step()

# Details 


In [7]:
import torch

a = torch.tensor([2.0, 3.0], requires_grad=True)
b = torch.tensor([6.0, 4.0], requires_grad=True)

In [8]:
Q = 3 * a ** 3 - b ** 2

In [9]:
Q

tensor([-12.,  65.], grad_fn=<SubBackward0>)

In [10]:
external_grad = torch.tensor([1.0, 1.0])
Q.backward(gradient=external_grad)

In [11]:
# check if collected gradients are correct
print(9 * a ** 2 == a.grad)
print(-2 * b == b.grad)

tensor([True, True])
tensor([True, True])


# Exclusion from the DAG

In [12]:
x = torch.rand(5, 5)
y = torch.rand(5, 5)
z = torch.rand((5, 5), requires_grad=True)

a = x + y
print(f"Does `a` require gradients? : {a.requires_grad}")
b = x + z
print(f"Does `b` require gradients?: {b.requires_grad}")

Does `a` require gradients? : False
Does `b` require gradients?: True


In [13]:
from torch import nn, optim

model = torchvision.models.resnet18(pretrained=True)

# Freeze all the parameters in the network
for param in model.parameters():
    param.requires_grad = False

In [14]:
model.fc = nn.Linear(512, 10)

In [15]:
model

ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [16]:
optimizer = optim.SGD(model.parameters(), lr=1e-2, momentum=0.9)

In [17]:
optimizer

SGD (
Parameter Group 0
    dampening: 0
    lr: 0.01
    momentum: 0.9
    nesterov: False
    weight_decay: 0
)

# Autograd

In [17]:
import torch

x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
z = torch.matmul(x, w) + b
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)

In [18]:
x, y

(tensor([1., 1., 1., 1., 1.]), tensor([0., 0., 0.]))

In [4]:
print("Gradient function for z =", z.grad_fn)
print("Gradient function for loss =", loss.grad_fn)

Gradient function for z = <AddBackward0 object at 0x7f30ec1289d0>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward object at 0x7f30ec128790>


In [22]:
loss.backward()

In [23]:
w.grad

tensor([[0.0402, 0.1622, 0.3149],
        [0.0402, 0.1622, 0.3149],
        [0.0402, 0.1622, 0.3149],
        [0.0402, 0.1622, 0.3149],
        [0.0402, 0.1622, 0.3149]])

In [24]:
b.grad

tensor([0.0402, 0.1622, 0.3149])

In [25]:
z = torch.matmul(x, w) + b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w) + b
print(z.requires_grad)

True
False
