In [5]:
import torch

x = torch.rand(3, requires_grad=True)
print(x)

y = x+2 # create a computational graph
print(y)

z = y*y*2
print(z)

z = z.mean()
print(z)

z.backward() #dz/dx
print(x.grad)

tensor([0.6504, 0.7522, 0.8801], requires_grad=True)
tensor([2.6504, 2.7522, 2.8801], grad_fn=<AddBackward0>)
tensor([14.0489, 15.1497, 16.5901], grad_fn=<MulBackward0>)
tensor(15.2629, grad_fn=<MeanBackward0>)
tensor([3.5338, 3.6697, 3.8402])


- if the last operation produce a scalar the backward() will fail
- in that case we can pass a similar size vector to the backward fucntion to product the gradients
    - z.backward(v)
    - the v should be of size z


## Prevent pytorch from tracking a variable
- 3 methods
    - x.requires_grad_(False)
    - s.detach()
    - with torch.no_grad():

In [6]:
x = torch.rand(3, requires_grad=True)
print(x)
x.requires_grad_(False)
print(x)

tensor([0.4276, 0.4825, 0.5585], requires_grad=True)
tensor([0.4276, 0.4825, 0.5585])


In [8]:
x = torch.rand(3, requires_grad=True)
print(x)
x = x.detach()
print(x)

tensor([0.1851, 0.3830, 0.6246], requires_grad=True)
tensor([0.1851, 0.3830, 0.6246])


In [19]:
x = torch.rand(3, requires_grad=True)
print(x)

y = x*3

with torch.no_grad():
    y *= y

z = y.sum()
z.backward()
print(x.grad)

tensor([0.1275, 0.8010, 0.6613], requires_grad=True)
tensor([3., 3., 3.])


## gradients will accumilate over the period

In [22]:
import torch

weights = torch.ones(3, requires_grad=True)

for epoch in range(3):
    model_out = (weights * 3).sum()
    model_out.backward()
    
    print(weights.grad)

tensor([3., 3., 3.])
tensor([6., 6., 6.])
tensor([9., 9., 9.])


- hence at the end of the loop we have to clear the grads

In [23]:
import torch

weights = torch.ones(3, requires_grad=True)

for epoch in range(3):
    model_out = (weights * 3).sum()
    model_out.backward()
    
    print(weights.grad)
    weights.grad.zero_() # empty the gradients

tensor([3., 3., 3.])
tensor([3., 3., 3.])
tensor([3., 3., 3.])


In [None]:
import torch

weights = torch.ones(4, requires_grad=True)

optimzier = torch.optim.SGD(weights, lr=0.001)
optimizer.step() # optimize the weights
optimizer.zero_grad() # clear the grads