In [1]:
import torch

In [12]:
x = torch.rand(3, requires_grad = True)
x
# creates a tensor of size 3

tensor([0.6428, 0.2332, 0.0255], requires_grad=True)

In [4]:
y = torch.rand(3 , requires_grad = True)
# if in future we need to calculate the gradient of a function w.r.t y then requires_grad = true
y

tensor([0.2023, 0.3429, 0.4523], requires_grad=True)

In [18]:
# a mulBackward function is created to calculate the gradient of z w.r.t y
z = y*y*2
print(z)
# a AddBackward function is created to calculate the gradient of k w.r.t x
k = x+2
k
# computational graph 

tensor([0.0819, 0.2352, 0.4092], grad_fn=<MulBackward0>)


tensor([2.6428, 2.2332, 2.0255], grad_fn=<AddBackward0>)

In [19]:
z = z.mean()
z.backward()# dz/dy
y.grad

tensor([1.0789, 1.8289, 2.4124])

In [36]:
# right now z is just a scaler , but if z was a vector then we would need to pass a gradient vector
m = torch.rand(3 , requires_grad = True)
print(m)
a = m*m*2
print(a)
# creating the gradient vector 
v = torch.tensor([0.011 , 1.01 , 2.0] , dtype = torch.float32)
a.backward(v)
print(m.grad)

tensor([0.8532, 0.8512, 0.8720], requires_grad=True)
tensor([1.4558, 1.4490, 1.5208], grad_fn=<MulBackward0>)
tensor([0.0375, 3.4387, 6.9760])


In [39]:
# how to prevent pyTorch from calculating the gradients 
# basically stop it from tracking the history in the computational graph
# when we are updating the weights , that should not be the part of the gradient computation 
# x.requires_grad_(False)
# whenever there is a trailing underscore after a function name that means it will alter the specified variable inplace
#x.detach()
# with torch.no_grad()

n = torch.rand(3 , requires_grad = True)
print(n)
n.requires_grad_(False)
print(n)


tensor([0.0679, 0.3337, 0.9648], requires_grad=True)
tensor([0.0679, 0.3337, 0.9648])


In [41]:
# METHOD 2
h = torch.rand(3 , requires_grad = True)
print(h)
h_aux = h.detach()# we need to insert the value into a new a variable
print(h_aux)

tensor([0.5181, 0.9815, 0.1795], requires_grad=True)
tensor([0.5181, 0.9815, 0.1795])


In [43]:
# METHOD 3
new_tensor = torch.rand(3 , requires_grad = True)
print(new_tensor)
with torch.no_grad():
    # all the operations inside this block
    # will automatically have requires_grad = False
    new_tensor_aux = new_tensor*3
    print(new_tensor_aux)

tensor([0.7882, 0.1714, 0.1881], requires_grad=True)
tensor([2.3645, 0.5143, 0.5643])


In [52]:
# we need to remember that the .grad attribute will keep on adding the gradients in multiple iterations
# so after every iteration we need to clear the gradients to zero 

# for eg :- 
weights = torch.ones(3 ,  requires_grad = True)

for epoch in range(4):
    model_output = (weights*3).sum()
    # some operation on the tensor 
    
    model_output.backward()
    
    print(weights.grad)
    
    

tensor([3., 3., 3.])
tensor([6., 6., 6.])
tensor([9., 9., 9.])
tensor([12., 12., 12.])


In [54]:
# so to prevent the above problem :-
weights1 = torch.ones(3 ,  requires_grad = True)

for epoch in range(4):
    model_output1 = (weights1*3).sum()
    # some operation on the tensor 
    
    model_output1.backward()
    
    print(weights1.grad)
    
    # this will reset the gradients to zero 
    weights1.grad.zero_()
   

tensor([3., 3., 3.])
tensor([3., 3., 3.])
tensor([3., 3., 3.])
tensor([3., 3., 3.])


In [57]:
# side note
# we can also use built in optimizer
new_weights = torch.ones(3 , requires_grad = True)
# lr is learning rate
#optimizer = torch.optim.SGD(new_weights ,  lr = 0.01)
#optimizer.step()
#optimizer.zero_grad()