https://www.youtube.com/watch?v=DbeIqrwb_dE&list=PLqnslRFeH2UrcDBWF5mfPGpqQDSta6VK4&index=3

https://github.com/patrickloeber/pytorchTutorial/blob/master/03_autograd.py

In [1]:
import torch

# The autograd package provides automatic differentiation
# for all operations on Tensors

# requires_grad = True -> tracks all operations on the tensor
x = torch.randn(3,requires_grad = True)
y = x + 2

# y was created as a result of an operation, so it has a grad_fn attribute.
# grad_fn : references a Function that has created the Tensor

print(x)
print(y)
print(y.grad_fn)

tensor([-0.0158,  0.1426, -0.1710], requires_grad=True)
tensor([1.9842, 2.1426, 1.8290], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x7ba1612cd7b0>


In [2]:
# Do more operations on y
z = y * y * 3
print(z)
z = z.mean()
print(z)

tensor([11.8117, 13.7720, 10.0360], grad_fn=<MulBackward0>)
tensor(11.8732, grad_fn=<MeanBackward0>)


In [3]:
# Let's compute the gradients with backpropagation
# when we finish our computation we can call .backward() and have all the gradients computed automatically
# The gradient for this tensor will be accumulated into . grad() attribute
# It is the partial derivative of the function w.r.t the tensor

z.backward()
print(x.grad) #dz/dx

tensor([3.9685, 4.2852, 3.6581])


In [4]:
# Stop a Tensor from tracking history:
# For example during our training loop when we want to update our weights
# then this update operation should not be part of the gradient computation
# - x.requires_grad(False)
# - x.detach()
# - wrap in 'with torch.no_grad()

# requires_grad_() .. changes an existing flag in-place

a = torch.randn(2,2)
print(a.requires_grad)
b = ((a * 3) / (a - 1))
print(b.grad_fn)
a.requires_grad_(True)
b = (a * a).sum()
print(b.grad_fn)

False
None
<SumBackward0 object at 0x7ba1608e0e20>


In [5]:
# .detach(): get a new Tensor with the same content but no gradient computations:

a = torch.randn(2,2, requires_grad =True)
print(a.requires_grad)
b = a.detach()
print(b.requires_grad)

True
False


In [10]:
# -----------------------------------------------
# backward() accumulated the gradient for this tensor into .grad attribute
# !!! We need to be careful during optimization !!!
# Use .zero_() to empty the gradients before a new optimization step!

weights = torch.ones(4,requires_grad=True)

for epoch in range(3):

  # just a dummy example
  model_output = ( weights * 3).sum()
  print(model_output)
  model_output.backward()

  print(weights.grad)

  #optimize model . i.e adjust weights

  with torch.no_grad():

    weights  -= 0.1 * weights.grad

  # this is important ! It affects the final weights & Output
  weights.grad.zero_()

print(weights)
print(model_output)

tensor(12., grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
tensor(8.4000, grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
tensor(4.8000, grad_fn=<SumBackward0>)
tensor([3., 3., 3., 3.])
tensor([0.1000, 0.1000, 0.1000, 0.1000], requires_grad=True)
tensor(4.8000, grad_fn=<SumBackward0>)


In [None]:
# Optimizer has zero_grad() method
# optimizer = troch.optim.SGD([weights],lr = 0.1)
# During training :
# optimizer.step()
# optimizer.zero_grad()