# **Gradient Calculation with Autograd - Patrick Loeber**

Importing torch

In [1]:
import torch

Gradient Calculation

In [10]:
x=torch.randn(3, requires_grad=True)
print(x)

y=x+2
print(y)

z=y*y*2
#z=z.mean()
print(z) # z is a scalar value (when z=z.mean()) and vector value (when z=y*y*2)
v=torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
z.backward(v) # dz/dx (No argument needed in the function when Z only has one value i.e. due to z=z.mean()) (
print(x.grad) # Creates a vector Jacobean product to get the gradients

# Jacobean Matrix w/ partial derivatives * Gradient Vector = Final Gradients (a.k.a Chain Rule)

tensor([0.0157, 0.1505, 0.0531], requires_grad=True)
tensor([2.0157, 2.1505, 2.0531], grad_fn=<AddBackward0>)
tensor([8.1261, 9.2490, 8.4306], grad_fn=<MulBackward0>)
tensor([8.0628e-01, 8.6018e+00, 8.2125e-03])


Preventing Gradient History

In [13]:
x=torch.randn(3, requires_grad=True)
print('X = ',x)

# x.requires_grad(False)
# x.detach()
# with torch.no_grad():

x.requires_grad_(False)
print('X = ',x)

y=x.detach()
print('Y = ',y)

with torch.no_grad():
    z = x + 2
    print('Z = ',z)

X =  tensor([-0.0121,  1.8233, -1.5379], requires_grad=True)
X =  tensor([-0.0121,  1.8233, -1.5379])
Y =  tensor([-0.0121,  1.8233, -1.5379])
Z =  tensor([1.9879, 3.8233, 0.4621])


Training Example (Zeroing Gradient)

In [19]:
weights = torch.ones(4, requires_grad=True)

for epoch in range(3):
    model_output = (weights*3).sum() # This computes the sum of the tensor obtained by multiplying weights by 3.
    model_output.backward() # This computes the gradient of model_output w.r.t weights. Since model_output is a scalar, it produces a gradient tensor of the same shape as weights
    print(weights.grad) # This prints the gradient of weights. Without zeroing the gradients, this would show accumulated gradients.
    weights.grad.zero_() # This manually sets the gradients of weights to zero.

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


**Why Zeroing Gradients is Necessary**
- Gradient Accumulation: PyTorch accumulates gradients on subsequent backward passes. If you don't zero out the gradients, they will keep accumulating, which will give incorrect gradients and thus incorrect weight updates.
- Correct Gradient Calculation: By zeroing the gradients after each backward pass, you ensure that each epoch starts with a fresh gradient calculation, providing accurate gradients for each epoch