In [1]:
import torch

In [20]:
x = torch.rand(3, requires_grad=True)

In [21]:
print(x)

tensor([0.5941, 0.1580, 0.4685], requires_grad=True)


In [22]:
y = x + 2
print(y)

tensor([2.5941, 2.1580, 2.4685], grad_fn=<AddBackward0>)


In [23]:
z = y*y*2
print(z)

tensor([13.4589,  9.3140, 12.1866], grad_fn=<MulBackward0>)


In [24]:
z = z.mean()
print(z)

tensor(11.6532, grad_fn=<MeanBackward0>)


In [25]:
z.backward() # here z is a scaler value, we don't have to put any argument in the backward fun

In [26]:
print(x.grad)

tensor([3.4588, 2.8773, 3.2913])


In [28]:
w = y*y*2 # now here, w is a vector not scaler, we need to modify argumnet of backward function to have grad
print(w)

tensor([13.4589,  9.3140, 12.1866], grad_fn=<MulBackward0>)


w.backward() -> this will give error 'grad can be implicitly created only for scalar outputs'


In [29]:
v = torch.tensor([0.1,1.0,0.001], dtype=torch.float32)
w.backward(v)

In [30]:
print(x.grad)

tensor([ 4.4965, 11.5094,  3.3012])


To prevent PyTorch from tracking the history and creating the gradient function, calculating this grad_fn, we have three methods
When we want to update our waits and this operation should not be the part of the gradient operation
1. x.requires_grad_(False)
2. x.detach()
3. with torch.no_grad():

1. x.requires_grad_(False)

In [31]:
x.requires_grad_(False)
print(x)

tensor([0.5941, 0.1580, 0.4685])


2. x.detach()

In [32]:
a = x.detach() #this will create a new tensor with same value, but it doesn't require gradient
print(a)

tensor([0.5941, 0.1580, 0.4685])


3. with torch.no_grad()

In [33]:
with torch.no_grad():
    y = x + 2
    print(y)

tensor([2.5941, 2.1580, 2.4685])


In [35]:
weights = torch.ones(4, requires_grad=True)

In [36]:
for epoch in range(4):
    model_output = (weights * 3).sum()
    model_output.backward()
    
    print(weights.grad)

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])
tensor([12., 12., 12., 12.])


We can see above gradients is getting change in every epoch, bcz gradient is getting add in the previous calculated gradient

In [38]:
weights.grad.zero_()
for epoch in range(4):
    model_output = (weights * 3).sum()
    model_output.backward()
    
    print(weights.grad)
    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
