In [1]:
import torch

In [14]:
x = torch.rand(3) # 3 random numbers in te range [0,1]
print(x)

x = torch.randn(3) # 3 random numbers drawn from the standard normal distribution (mean value is 0, variance is 1)
print(x)

tensor([0.2510, 0.4039, 0.0782])
tensor([ 1.6417, -1.1004, -1.6507])


In [3]:
x = torch.rand(3)
print(x)

tensor([0.6734, 0.7100, 0.0370])


In [17]:
x = torch.randn(3,requires_grad=True)  #if we want to calculate gradients in the future, we need to set requires_grad=True
print(x)

tensor([ 0.5886,  0.3995, -0.5213], requires_grad=True)


In [18]:
y = x+2
y
#AddBackward0 - grad function, that tracks all the steps, in order to calculate the grads later

tensor([2.5886, 2.3995, 1.4787], grad_fn=<AddBackward0>)

In [19]:
c = y*y*2
print(c)
#MulBackward0 -  grad function

tensor([13.4022, 11.5148,  4.3733], grad_fn=<MulBackward0>)


In [20]:
k = c.mean() #every operation you make on "requires_grad" variable is tracked
print(k)
#MeanBackward0 - grad function

tensor(9.7634, grad_fn=<MeanBackward0>)


In [21]:
#if we want to calculate the gradients, 
#dk/dx

k.backward() #will calculate gradient of K, with respect to x


In [23]:
print(x.grad)

tensor([3.4515, 3.1993, 1.9716])


In [24]:
x = torch.randn(5,requires_grad=True)
print(x)
y = x+2
z = y*y*2
z = z.mean()
print(z)

tensor([-1.2970, -0.9738,  0.1661, -0.4632, -2.2943], requires_grad=True)
tensor(3.4751, grad_fn=<MeanBackward0>)


In [25]:
z.backward() #dz/dx
print(x.grad)

tensor([ 0.5624,  0.8210,  1.7329,  1.2294, -0.2354])


In [26]:
x = torch.randn(5) #requires gradient is false by default
print(x)
y = x+2
z = y*y*2
z = z.mean()
print(z)

tensor([-0.2994, -0.0782, -0.4953,  0.2648,  0.9002])
tensor(8.9561)


In [27]:
z.backward() #dz/dx
print(x.grad)
#we must specify requires grad if we want to calculate gradients using .backward()

RuntimeError: element 0 of tensors does not require grad and does not have a grad_fn

In [33]:
x = torch.randn(5,requires_grad=True)
print(x)
y = x+2
print(y) #Y is a vector and not a scalar, not a single value output, gradients cannot be calculated

y.backward() #dz/dx
print(x.grad)  

tensor([-1.2614,  0.9733,  1.7545,  1.2994,  0.4667], requires_grad=True)
tensor([0.7386, 2.9733, 3.7545, 3.2994, 2.4667], grad_fn=<AddBackward0>)


RuntimeError: grad can be implicitly created only for scalar outputs

In [34]:
x = torch.randn(5,requires_grad=True)
print(x)
y = x+2
print(y) #Y is a vector and not a scalar, not a single value output, gradients cannot be calculated
y = y.mean() #y is a scalar quantity, single value

y.backward() #dz/dx
print(x.grad)  

tensor([-0.2090,  1.6224,  0.1300, -0.0889,  0.4065], requires_grad=True)
tensor([1.7910, 3.6224, 2.1300, 1.9111, 2.4065], grad_fn=<AddBackward0>)
tensor([0.2000, 0.2000, 0.2000, 0.2000, 0.2000])


In [31]:
x = torch.randn(5,requires_grad=True)
print(x)
y = x+2
print(y)
z = y*y*2 #not a scalar, a vector
print(z)

z.backward() #dz/dx
print(x.grad)  

tensor([-0.2022, -0.2681, -0.6680,  0.8448, -0.1477], requires_grad=True)
tensor([1.7978, 1.7319, 1.3320, 2.8448, 1.8523], grad_fn=<AddBackward0>)
tensor([ 6.4641,  5.9987,  3.5486, 16.1862,  6.8620], grad_fn=<MulBackward0>)


RuntimeError: grad can be implicitly created only for scalar outputs

In [32]:
x = torch.randn(5,requires_grad=True)
print(x)
y = x+2
print(y)
z = y*y*2 #not a scalar, a vector
print(z)
z = z.mean() #Scalar

z.backward() #dz/dx
print(x.grad)  

tensor([ 0.7664, -0.6811,  0.4952, -1.4559, -0.1810], requires_grad=True)
tensor([2.7664, 1.3189, 2.4952, 0.5441, 1.8190], grad_fn=<AddBackward0>)
tensor([15.3062,  3.4792, 12.4522,  0.5921,  6.6177], grad_fn=<MulBackward0>)
tensor([2.2131, 1.0551, 1.9962, 0.4353, 1.4552])


In [35]:
x = torch.randn(3,requires_grad=True)
print(x)
y = x+2
z = y*y*2 #not a ascalar, a vector

v = torch.tensor([0.1,1.0,0.001],dtype=torch.float32)
z.backward(v) #vector jacobian operation, we must give it the vector if it is not a scalar, read more about this and add some stuff here
print(x.grad)  

tensor([ 0.1860, -0.1098, -1.7606], requires_grad=True)
tensor([8.7439e-01, 7.5608e+00, 9.5773e-04])


In [54]:
x = torch.randn(3,requires_grad=True)
print(x)

#if we do not want gradients for certain operations or steps (like updating weights etc) #no gradient #no grad

#x.requires_grad_(False)
#x.detach() #creates a new tensor that doesnot require gradient
#with torch.no_grad() :
    #operations

tensor([-1.1159,  0.7055,  1.7181], requires_grad=True)


In [55]:
x = torch.randn(3,requires_grad=True)
print(x)
x.requires_grad_(False) #_ inplace
print(x)

tensor([ 0.7212, -0.7527, -0.3640], requires_grad=True)
tensor([ 0.7212, -0.7527, -0.3640])


In [56]:
x = torch.randn(3,requires_grad=True)
print(x)
y = x.detach() #new tensor with same values but that doesnt require the gradients
print(y)

tensor([-1.4923, -1.7609,  0.2414], requires_grad=True)
tensor([-1.4923, -1.7609,  0.2414])


In [39]:
x = torch.randn(3,requires_grad=True)
print(x)
with torch.no_grad():
    y = x+2
    
z = x + 2
g = y + 2
print(y) #will not have grad function
print(z)
print(g)

tensor([-0.6017, -0.1214, -0.2886], requires_grad=True)
tensor([1.3983, 1.8786, 1.7114])
tensor([1.3983, 1.8786, 1.7114], grad_fn=<AddBackward0>)
tensor([3.3983, 3.8786, 3.7114])


In [40]:
x = torch.randn(3,requires_grad=True)
print(x)
y = x+2
print(y) #will have grad function

tensor([ 0.4837, -0.3361,  0.0720], requires_grad=True)
tensor([2.4837, 1.6639, 2.0720], grad_fn=<AddBackward0>)


In [43]:
#important example # gradient accumulation # clear gradients # empty gradients

weights = torch.ones(4,requires_grad=True)

for epoch in range(1):
    out = (weights*3).sum()

    out.backward()
    
    print(weights.grad)

tensor([3., 3., 3., 3.])


In [42]:
#important example

weights = torch.ones(4,requires_grad=True)

for epoch in range(2):
    out = (weights*3).sum()

    out.backward()
    
    print(weights.grad) #it will accumulate the values from previous operations

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])


In [44]:
#important example

weights = torch.ones(4,requires_grad=True)

for epoch in range(3):
    out = (weights*3).sum()

    out.backward()
    
    print(weights.grad) #it will accumulate the values from previous operations

#our gradients are clearly wrong, they are getting accumulated
#to prevent this we must empty the gradients

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])


In [66]:
#important example

weights = torch.ones(4,requires_grad=True)

for epoch in range(3):
    out = (weights*3).sum()

    out.backward()
    
    print(weights.grad) #it will accumulate the values from previous operations

    weights.grad.zero_()


#to prevent gradient accumulation, we must empty the gradients after each iteration

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


In [None]:
weights = torch.ones(4,requires_grad=True)

optimizer = torch.optim.SGD(weights,lr=0.01)
optimizer.step()

#before doing the next iteration, we must call the zero grad function on the optimiser
optimizer.zero_grad()