In [1]:
import torch

torch provides autograd package which computes gradients 

### addition

In [47]:
x = torch.randn(1, requires_grad=True)
print(x)
y = x+2
print(y)
y.backward()
x.grad

tensor([2.0867], requires_grad=True)
tensor([4.0867], grad_fn=<AddBackward0>)


tensor([1.])

### multiplication

In [43]:
x = torch.randn(1, requires_grad=True)
print(x)
y2 = x*x
print(y2)
y2.backward()
x.grad

tensor([1.0289], requires_grad=True)
tensor([1.0587], grad_fn=<MulBackward0>)


tensor([2.0579])

### exponential

In [40]:
x = torch.randn(1, requires_grad=True)
print(torch.exp(x))
y3 = torch.exp(x)
y3.backward()
x.grad

tensor([3.4383], grad_fn=<ExpBackward0>)


tensor([3.4383])

In [48]:
x = torch.randn(2, requires_grad=True)
print(torch.exp(x))
y3 = torch.exp(x)
y3.backward()
x.grad

tensor([0.5196, 2.7819], grad_fn=<ExpBackward0>)


RuntimeError: grad can be implicitly created only for scalar outputs

In [55]:
x = torch.randn(2, requires_grad=True)
print(torch.exp(x))
y3 = torch.exp(x)
print(y3)
y3.backward(torch.tensor([1 , 4]))
x.grad

tensor([5.5600, 3.8629], grad_fn=<ExpBackward0>)
tensor([5.5600, 3.8629], grad_fn=<ExpBackward0>)


tensor([ 5.5600, 15.4517])

### detaching computation

In [None]:
# to not compute gradient wrt x, any of the following three options can be used:
x.requires_grad_(False)
x.detach()
with torch.no_grad():

In [60]:
x = torch.arange(4, requires_grad=True, dtype=torch.float32)
y = x*x
u = y
z = u*x
# here z = [x1^3, x2^3, x3^3, x4^3]
z.sum().backward()

# x.grad = gradient wrt x
# x.grad = [3*x1^2, 3*x2^2, 3*x3^2, 3*x4^2]
print(x.grad)

tensor([ 0.,  3., 12., 27.])


In [61]:
x = torch.arange(4, requires_grad=True, dtype=torch.float32)
y = x*x
u = y.detach()
z = u*x
# here z = [u1*x1, u2*x2, u3*x3, u4*x4]
z.sum().backward()

# u is constant now and is not dependent on any variable 
# x.grad = [u1, u2, u3, u4]
print(x.grad)

tensor([0., 1., 4., 9.])


### clearing gradient 

In [58]:
# Pytorch accumulates the gradient into grad attribute, so it is required to clear them before next update
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights*3).sum()
    model_output.backward()
    print(weights.grad)
    
    # to clear gradients for next iteration
    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


In [62]:
a = 2
print(f'{a=}')

a=2
