In [32]:
import torch


In [33]:
x = torch.randn(3)
print(x)

tensor([ 0.4046, -0.3089,  0.0404])


In [34]:
x = torch.randn(3,requires_grad=True)  #if we want to calculate gradients in the future, we need to set requires_grad=True
print(x)

tensor([-0.8316,  1.8105,  0.6676], requires_grad=True)


In [35]:
y = x+2
y
#AddBackward0 - grad function

tensor([1.1684, 3.8105, 2.6676], grad_fn=<AddBackward0>)

In [36]:
c = y*y*2
print(c)
#MulBackward0 -  grad function

tensor([ 2.7302, 29.0403, 14.2324], grad_fn=<MulBackward0>)


In [37]:
k = c.mean()
print(k)
#MeanBackward0 - grad function

tensor(15.3343, grad_fn=<MeanBackward0>)


In [38]:
#if we want to calculate the gradients, 

#dk/dx
k.backward() #will calculate gradient of K, with respect to x


In [40]:
print(x.grad)

tensor([1.5578, 5.0807, 3.5568])


In [41]:
x = torch.randn(5,requires_grad=True)
print(x)
y = x+2
z = y*y*2
z = z.mean()
print(z)

tensor([-0.4653,  0.8673, -3.2148,  0.0388, -0.0211], requires_grad=True)
tensor(8.0500, grad_fn=<MeanBackward0>)


In [42]:
z.backward() #dz/dx
print(x.grad)

tensor([ 1.2277,  2.2938, -0.9718,  1.6310,  1.5831])


In [43]:
x = torch.randn(5)
print(x)
y = x+2
z = y*y*2
z = z.mean()
print(z)

tensor([ 0.2470, -0.1975, -2.1230,  2.1002,  0.0234])
tensor(11.6877)


In [46]:
z.backward() #dz/dx
print(x.grad)
#we must specify requires grad

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [48]:
x = torch.randn(5,requires_grad=True)
print(x)
y = x+2
z = y*y*2 #not a ascalar, a vector

z.backward() #dz/dx
print(x.grad)  

tensor([ 0.0429, -0.5521, -0.8023, -1.5637, -0.0280], requires_grad=True)


RuntimeError: grad can be implicitly created only for scalar outputs

In [51]:
x = torch.randn(3,requires_grad=True)
print(x)
y = x+2
z = y*y*2 #not a ascalar, a vector

v = torch.tensor([0.1,1.0,0.001],dtype=torch.float32)
z.backward(v) #vector jacobian operation, we must give it the vector if it is not a scalar
print(x.grad)  

tensor([-0.1534, -0.0679,  1.5682], requires_grad=True)
tensor([0.7386, 7.7285, 0.0143])


In [54]:
x = torch.randn(3,requires_grad=True)
print(x)

#if we do not want gradients for certain operations or steps (like updating weights etc)

#x.requires_grad_(False)
#x.detach() #creates a new tensor that doesnot require gradient
#with torch.no_grad() :
    #operations

tensor([-1.1159,  0.7055,  1.7181], requires_grad=True)


In [55]:
x = torch.randn(3,requires_grad=True)
print(x)
x.requires_grad_(False)
print(x)

tensor([ 0.7212, -0.7527, -0.3640], requires_grad=True)
tensor([ 0.7212, -0.7527, -0.3640])


In [56]:
x = torch.randn(3,requires_grad=True)
print(x)
y = x.detach() #new tensor with same values but that doesnt require the gradients
print(y)

tensor([-1.4923, -1.7609,  0.2414], requires_grad=True)
tensor([-1.4923, -1.7609,  0.2414])


In [58]:
x = torch.randn(3,requires_grad=True)
print(x)
with torch.no_grad():
    y = x+2
print(y) #will not have grad function

tensor([ 0.3289, -0.2471,  1.9028], requires_grad=True)
tensor([2.3289, 1.7529, 3.9028])


In [59]:
x = torch.randn(3,requires_grad=True)
print(x)
y = x+2
print(y) #will have grad function

tensor([ 0.3820,  1.4834, -1.2198], requires_grad=True)
tensor([2.3820, 3.4834, 0.7802], grad_fn=<AddBackward0>)


In [61]:
#important example

weights = torch.ones(4,requires_grad=True)

for epoch in range(1):
    out = (weights*3).sum()

    out.backward()
    
    print(weights.grad)

tensor([3., 3., 3., 3.])


In [62]:
#important example

weights = torch.ones(4,requires_grad=True)

for epoch in range(2):
    out = (weights*3).sum()

    out.backward()
    
    print(weights.grad) #it will accumulate the values from previous operations

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])


In [65]:
#important example

weights = torch.ones(4,requires_grad=True)

for epoch in range(3):
    out = (weights*3).sum()

    out.backward()
    
    print(weights.grad) #it will accumulate the values from previous operations

#our gradients are clearly wrong, they are getting accumulated
#to prevent this we must empty the gradients

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


In [66]:
#important example

weights = torch.ones(4,requires_grad=True)

for epoch in range(3):
    out = (weights*3).sum()

    out.backward()
    
    print(weights.grad) #it will accumulate the values from previous operations

    weights.grad.zero_()


#to prevent gradient accumulation, we must empty the gradients after each iteration

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


In [None]:
weights = torch.ones(4,requires_grad=True)

optimizer = torch.optim.SGD(weights,lr=0.01)
optimizer.step()

#before doing the next iteration, we must call the zero grad function on the optimiser
optimizer.zero_grad()