In [1]:
import torch

In [3]:
x=torch.rand(3, requires_grad=True)
print(x)

tensor([0.6590, 0.1904, 0.4861], requires_grad=True)


In [4]:
y = x+2

In [5]:
print(y)

tensor([2.6590, 2.1904, 2.4861], grad_fn=<AddBackward0>)


In [8]:
z = y*y*2
print(z)

tensor([14.1402,  9.5954, 12.3617], grad_fn=<MulBackward0>)


In [9]:
z=z.mean()
print(z)

tensor(12.0325, grad_fn=<MeanBackward0>)


In [10]:
print(x.grad)

None


In [11]:
z.backward() #dz/dx
print(x.grad)

tensor([3.5453, 2.9205, 3.3148])


Note: In the background it calulates vector jacobian product to get the gradient

Note: The above piece of code worked, even without multiplying z with a vector, this is due to the reason that, we were taking mean, which was making that as scalar. In next section, we will use same code but without mean, it will not work

In [12]:
x=torch.rand(3, requires_grad=True)
print(x)
y=x+2
z=y*y*2

tensor([0.7231, 0.3237, 0.1743], requires_grad=True)


In [13]:
print(x.grad)

None


In [14]:
z.backward() #dz/dx

RuntimeError: grad can be implicitly created only for scalar outputs

We got error message as,
RuntimeError: grad can be implicitly created only for scalar outputs

Now, we will multiply z with vector of same size, the code will work again

In [16]:
x=torch.rand(3, requires_grad=True)
print(x)
y=x+2
z=y*y*2
v=torch.tensor([0.1,1.0,0.02], dtype=torch.float32)
z.backward(v) #dz/dx
print(x.grad)

tensor([0.9747, 0.9661, 0.2842], requires_grad=True)
tensor([ 1.1899, 11.8645,  0.1827])


So, it is clear that, in the background it calulates vector jacobian product to get the gradient

# Prevent pytorch from tracking the history

There are three ways

1. x.requires_grad_(False)
2. x.detach()
3. with torch.no_grad():

In [19]:
x=torch.randn(3, requires_grad=True)
print(x)

tensor([-1.2661,  1.3568, -0.1329], requires_grad=True)


In [20]:
x.requires_grad_(False)
print(x)

tensor([-1.2661,  1.3568, -0.1329])


In [21]:
x=torch.randn(3, requires_grad=True)
print(x)

tensor([ 1.1553,  0.5643, -0.2779], requires_grad=True)


In [22]:
y=x.detach()
print(y)

tensor([ 1.1553,  0.5643, -0.2779])


In [23]:
with torch.no_grad():
    y = x+2
    print(y)

tensor([3.1553, 2.5643, 1.7221])


In [29]:
weights = torch.ones(4, requires_grad=True)
for epoch in range(1):
    model_output=(weights*3).sum()
    model_output.backward()
    print(weights.grad)

tensor([3., 3., 3., 3.])


In [30]:
weights = torch.ones(4, requires_grad=True)
for epoch in range(2):
    model_output=(weights*3).sum()
    model_output.backward()
    print(weights.grad)

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])


In [31]:
weights = torch.ones(4, requires_grad=True)
for epoch in range(3):
    model_output=(weights*3).sum()
    model_output.backward()
    print(weights.grad)

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


Note: In above piece of codes, gradient is being tracked/remebered, and hence resulting into worong calculatiom
    
Note: Before next iteration, we mush empty the gradient

In [32]:
weights = torch.ones(4, requires_grad=True)
for epoch in range(3):
    model_output=(weights*3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_() #making gradiant empty

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
