In [10]:
import torch

x = torch.tensor([[1, 2], [3, 4]], requires_grad=True, dtype=torch.float32)
print(x)

y = x + 2
print(y)

z = y * y * 2
print(z)

z = z.mean()
print(z)

# backpropagation
z.backward()     # dz/dx
print(x.grad)


# dz/dx = 2 * (x + 2) * 2 / N
# dz/dx = 4 * (x + 2) / N
# where N is the number of elements in x
# for x = [[1, 2], [3, 4]]
# dz/dx = 4 * ([[1, 2], [3, 4]] + 2) / 4
# dz/dx = [[3, 4], [5, 6]] = x.grad

# v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
# z.backward(v)  # dz/dx * v
# print(x.grad)
# dz/dx = 2 * (x + 2) * 2
# dz/dx = 4 * (x + 2)
# for x = [[1, 2], [3, 4]]
# dz/dx = 4 * ([[1, 2], [3, 4]] + 2)
# dz/dx = [[12, 16], [20, 24]]
# dz/dx * v = [[1.2, 16, 0.024], [2.0, 24, 0.096]] = x.grad

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)
tensor([[3., 4.],
        [5., 6.]], grad_fn=<AddBackward0>)
tensor([[18., 32.],
        [50., 72.]], grad_fn=<MulBackward0>)
tensor(43., grad_fn=<MeanBackward0>)
tensor([[3., 4.],
        [5., 6.]])


In [14]:
# Prevent pytorch to automatically calculate gradients
# There are 3 ways

# 1. x.requires_grad_(False)
# 2. x.detach()   # This creates a new tensor that does not require gradients
# 3. with torch.no_grad():   # Temporarily set all the requires_grad flags to false

x = torch.tensor([[1, 2], [3, 4]], requires_grad=True, dtype=torch.float)
print(x)

# 1. x.requires_grad_(False)
x.requires_grad_(False)
print(x)

# 2. x.detach()   # This creates a new tensor that does not require gradients
y = x.detach()
q = y + 2
print(y)

# 3. with torch.no_grad():   # Temporarily set all the requires_grad flags to false
with torch.no_grad():
    y = x + 2
    print(y)


tensor([[1., 2.],
        [3., 4.]], requires_grad=True)
tensor([[1., 2.],
        [3., 4.]])
tensor([[1., 2.],
        [3., 4.]])
tensor([[3., 4.],
        [5., 6.]])


In [19]:
# There are an issue
# Gradiesnts will add up by default in every backward call  

# For eg
weights = torch.ones(3, requires_grad=True)
for epoch in range(3):
    model_output = (weights * 3).sum()
    model_output.backward()
    print(weights.grad)



print('\nSolution\n')

# To avoid this we need to manually set the gradients to zero
weights = torch.ones(3, requires_grad=True)
for epoch in range(3):
    model_output = (weights * 3).sum()
    model_output.backward()
    print(weights.grad)
    weights.grad.zero_()  # Manually set the gradients to zero

tensor([3., 3., 3.])
tensor([6., 6., 6.])
tensor([9., 9., 9.])

Solution

tensor([3., 3., 3.])
tensor([3., 3., 3.])
tensor([3., 3., 3.])


In [24]:
# same issue with optimizer in pytorch
# import torch

# weights = torch.ones(4, requires_grad=True)
# optimizer = torch.optim.SGD(weights, lr=0.01)
# optimizer.step() # Does the update
# optimizer.zero_grad() # Manually set the gradients to zero

# print(weights)