In [1]:
import numpy as np
import torch as tr
import matplotlib.pyplot as plt
# import torchvision

In [2]:
w = np.array([0.5])
b = np.array([2])
xx = np.array([0.5])


In [3]:
xx_t = tr.from_numpy(xx)
w_t = tr.from_numpy(w)
b_t = tr.from_numpy(b)


In [4]:
w_t.requires_grad, w_t.dtype, b.dtype, b_t.dtype, b_t.requires_grad
## Note -> Only float type values requires and can have gradient

(False, torch.float64, dtype('int64'), torch.int64, False)

In [5]:
w_t.requires_grad_(True) ## Note the method followed by an underscore

tensor([0.5000], dtype=torch.float64, requires_grad=True)

In [6]:
dtype = tr.float64

In [7]:
b_t = b_t.type(dtype)

In [8]:
b_t.requires_grad_(True)  ## See the method followed by an underscore

tensor([2.], dtype=torch.float64, requires_grad=True)

In [9]:
## Now we both have w_t and b_t have requires_grad to be True
## So lets check if any other var having/depending on w_t and b_t, have their requires_grad to set as True

d_t = w_t + b_t
d_t.requires_grad ## So it did......Hmmm[without even explicitly setting it to be True]


True

In [10]:
p_t = w_t ** 2 + (0.4) * xx_t
p_t.requires_grad, xx_t.requires_grad ## Okkk

(True, False)

In [11]:
def fun(x, ystar):
    y = tr.exp(w_t*x+b_t)
    print(y, y.requires_grad,"Uhhh")
    return tr.sum((y-ystar)**2)

ystar_t = tr.randn_like(xx_t)
l_t = fun(xx_t, ystar_t)
print(l_t.requires_grad,"okkk")


tensor([9.4877], dtype=torch.float64, grad_fn=<ExpBackward0>) True Uhhh
True okkk


In [12]:
## Before calling backward function
print("Before bakward() ",w_t.grad, b_t.grad)


Before bakward()  None None


In [13]:
## Calling backward
l_t.backward()
print(w_t.grad, b_t.grad)

tensor([104.8685], dtype=torch.float64) tensor([209.7369], dtype=torch.float64)


In [14]:
## Calling the backward method again
# l_t.backward() ## Note the error occuring 
## Possible Explaination --> after the backward method gets called out the 
## dynamic computation graph gets deleted(or memory gets freed..), so the error
## one way to retain it is to use the attribute retain_graph = True


In [15]:
## lets first printout the value of the grad
print(w_t.grad,b_t.grad)

tensor([104.8685], dtype=torch.float64) tensor([209.7369], dtype=torch.float64)


In [16]:
l_t = fun(xx_t, ystar_t)
# print(w_t.grad)
l_t.backward(retain_graph=True)
l_t.backward()
print(w_t.grad)
## The more you run this cell, the effective value of the grad gets increased, since the value gets accumulated
## Therefore there is the need to manually set it to the zero


tensor([9.4877], dtype=torch.float64, grad_fn=<ExpBackward0>) True Uhhh
tensor([314.6054], dtype=torch.float64)


In [17]:
## To set the grad value  zero
w_t.grad.data.zero_()
b_t.grad.data.zero_()
### lets print out the grad value again
print(w_t.grad, b_t.grad) ## and thats how its set to zero


tensor([0.], dtype=torch.float64) tensor([0.], dtype=torch.float64)


In [18]:
## The gradient must be set to zero manually, otherwise they will cumulate across several 
## backward () calls, This is desirable when calculating the loss 
## over several mini batches or the gradient of the sum of losses
## 
w_t.grad.data.zero_()
b_t.grad.data.zero_()
l_t = fun(xx_t, ystar_t)
l_t.backward(retain_graph=True)
l_t.backward()
print(w_t.grad)
print(b_t.grad)

tensor([9.4877], dtype=torch.float64, grad_fn=<ExpBackward0>) True Uhhh
tensor([209.7369], dtype=torch.float64)
tensor([419.4739], dtype=torch.float64)
