# Automatic Differentiation with torch.autograd

In [1]:
import torch

In [9]:
x = torch.ones(5)  # input tensor
y = torch.zeros(3)  # expected output
w = torch.randn(5, 3, requires_grad=True)
b = torch.randn(3, requires_grad=True)
print(f"X Shape: {x.shape}\n y Shape: {y.shape}\n w Shape: {w.shape}\n b Shape: {b.shape}")
print(f"x: {x}\ny: {y}\nw: {w}\nb: {b}")

X Shape: torch.Size([5])
 y Shape: torch.Size([3])
 w Shape: torch.Size([5, 3])
 b Shape: torch.Size([3])
x: tensor([1., 1., 1., 1., 1.])
y: tensor([0., 0., 0.])
w: tensor([[-0.7311, -1.7569,  0.0081],
        [ 0.6640, -0.8849,  0.7299],
        [ 0.1673,  0.0291,  0.6865],
        [ 0.1266,  0.5460,  0.1401],
        [ 0.6023,  0.1726, -0.8535]], requires_grad=True)
b: tensor([-1.4464,  0.3005, -0.1165], requires_grad=True)


In [10]:
z = (x @ w) + b
print(f"z Shape: {z.shape}")
print(f"z: {z}")

z Shape: torch.Size([3])
z: tensor([-0.6173, -1.5937,  0.5947], grad_fn=<AddBackward0>)


In [11]:
loss = torch.nn.functional.binary_cross_entropy_with_logits(z, y)
print(f"Loss: {loss}")

Loss: 0.5501523017883301


In [12]:
print(f"Gradient function for z = {z.grad_fn}")
print(f"Gradient function for loss = {loss.grad_fn}")

Gradient function for z = <AddBackward0 object at 0x000001B9024CDF90>
Gradient function for loss = <BinaryCrossEntropyWithLogitsBackward0 object at 0x000001B93490C880>


## Computing Gradients

In [13]:
loss.backward()
print(w.grad)
print(b.grad)

tensor([[0.1168, 0.0563, 0.2148],
        [0.1168, 0.0563, 0.2148],
        [0.1168, 0.0563, 0.2148],
        [0.1168, 0.0563, 0.2148],
        [0.1168, 0.0563, 0.2148]])
tensor([0.1168, 0.0563, 0.2148])


## Disabling Gradient Tracking

In [14]:
z = torch.matmul(x, w)+b
print(z.requires_grad)

with torch.no_grad():
    z = torch.matmul(x, w)+b
print(z.requires_grad)

True
False


## Tensor Gradients and Jacobian Products

In [15]:
inp = torch.eye(4, 5, requires_grad=True)
out = (inp+1).pow(2).t()
out.backward(torch.ones_like(out), retain_graph=True)
print(f"First call\n{inp.grad}")
out.backward(torch.ones_like(out), retain_graph=True)
print(f"\nSecond call\n{inp.grad}")
inp.grad.zero_()
out.backward(torch.ones_like(out), retain_graph=True)
print(f"\nCall after zeroing gradients\n{inp.grad}")

First call
tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.]])

Second call
tensor([[8., 4., 4., 4., 4.],
        [4., 8., 4., 4., 4.],
        [4., 4., 8., 4., 4.],
        [4., 4., 4., 8., 4.]])

Call after zeroing gradients
tensor([[4., 2., 2., 2., 2.],
        [2., 4., 2., 2., 2.],
        [2., 2., 4., 2., 2.],
        [2., 2., 2., 4., 2.]])
