In [1]:
import torch

print("=" * 60)
print("QUESTION 6: Basic Gradient Calculations")
print("=" * 60)

# Create a tensor with requires_grad=True
x = torch.tensor([2.0], requires_grad=True)
# Define a simple function y = x² + 3x
y = x**2 + 3 * x
# Backpropagate
y.backward()
# Print the gradient
print("x.grad =", x.grad)

print("\na) Expected gradient:")
print("   y = x² + 3x")
print("   dy/dx = 2x + 3")
print("   At x=2: dy/dx = 2(2) + 3 = 7")
print("   Expected: tensor([7.])")

print("\nb) If requires_grad=False:")
x_no_grad = torch.tensor([2.0], requires_grad=False)
y_no_grad = x_no_grad**2 + 3 * x_no_grad
print(f"   x.grad will be None: {x_no_grad.grad}")
print("   PyTorch doesn't track operations for gradient computation")

print("\nc) Without specifying requires_grad flag:")
x_default = torch.tensor([2.0])
print(f"   Default requires_grad = {x_default.requires_grad}")
print("   Gradients will NOT be tracked (default is False)")

print("\n" + "=" * 60)
print("QUESTION 7: Introduce Weights")
print("=" * 60)

# Create tensors
x = torch.tensor([2.0], requires_grad=True)
w = torch.tensor([1.0, 3.0], requires_grad=True)  # Important: set requires_grad=True
# Define function y = w[0]*x² + w[1]*x
y = w[0] * x**2 + w[1] * x
# Backpropagate
y.backward()

print(f"x.grad = {x.grad}")
print(f"w.grad = {w.grad}")

print("\na) Original code (w without requires_grad=True):")
print("   w.grad would be None because requires_grad=False by default")
print("   The graph wasn't tracking gradients w.r.t. w")

print("\nb) Modified code above shows w.grad computation")
print("   w[0].grad = dy/dw[0] = x² = 4")
print("   w[1].grad = dy/dw[1] = x = 2")
print("   Result: tensor([4., 2.])")

print("\nc) Same as Q6c: Default requires_grad=False, so gradients")
print("   are NOT tracked unless explicitly set to True")

print("\n" + "=" * 60)
print("QUESTION 8: Breaking the Graph")
print("=" * 60)

print("\nOriginal code:")
print("   x = torch.tensor([1.0], requires_grad=True)")
print("   y = x * 3")
print("   z = y.detach()")
print("   w = z * 2")
print("   w.backward()  # ERROR!")

print("\nWhy it fails:")
print("   - .detach() breaks the computational graph")
print("   - z has no gradient information linked to x")
print("   - w depends on z, but z doesn't track gradients")
print("   - No path exists to backpropagate to x")

print("\nFix 1: Don't detach if you need gradients")
x = torch.tensor([1.0], requires_grad=True)
y = x * 3
w = y * 2  # Remove detach
w.backward()
print(f"   x.grad = {x.grad}  (should be 6.0)")

print("\nFix 2: Use detach_() but compute loss without it:")
x = torch.tensor([1.0], requires_grad=True)
y = x * 3
z_value = y.detach().clone()  # Store the value
# If you need w to have gradients: don't use z
w = z_value * 2  # This won't have gradients
# But if you recompute: w = y * 2 instead

print("\n" + "=" * 60)
print("QUESTION 9: Gradient Accumulation")
print("=" * 60)

x = torch.tensor([1.0], requires_grad=True)
y1 = x * 2
y1.backward()
print("After first backward: x.grad =", x.grad)

y2 = x * 3
y2.backward()
print("After second backward: x.grad =", x.grad)

print("\nWhat's happening:")
print("   - First backward: x.grad = 2")
print("   - Second backward: x.grad += 3 (accumulated!)")
print("   - Result: x.grad = 5 instead of 3")

print("\nWhy:")
print("   PyTorch ACCUMULATES gradients by default")
print("   This is useful for mini-batch training")

print("\nHow to avoid:")
print("   Option 1: Manually zero gradients")
x = torch.tensor([1.0], requires_grad=True)
y1 = x * 2
y1.backward()
print(f"   After y1.backward(): x.grad = {x.grad}")
x.grad.zero_()  # Clear gradients
y2 = x * 3
y2.backward()
print(f"   After zero_() and y2.backward(): x.grad = {y2.grad}")

print("\n   Option 2: In training loops, use optimizer.zero_grad()")
print("   This is the standard practice in PyTorch")

QUESTION 6: Basic Gradient Calculations
x.grad = tensor([7.])

a) Expected gradient:
   y = x² + 3x
   dy/dx = 2x + 3
   At x=2: dy/dx = 2(2) + 3 = 7
   Expected: tensor([7.])

b) If requires_grad=False:
   x.grad will be None: None
   PyTorch doesn't track operations for gradient computation

c) Without specifying requires_grad flag:
   Default requires_grad = False
   Gradients will NOT be tracked (default is False)

QUESTION 7: Introduce Weights
x.grad = tensor([7.])
w.grad = tensor([4., 2.])

a) Original code (w without requires_grad=True):
   w.grad would be None because requires_grad=False by default
   The graph wasn't tracking gradients w.r.t. w

b) Modified code above shows w.grad computation
   w[0].grad = dy/dw[0] = x² = 4
   w[1].grad = dy/dw[1] = x = 2
   Result: tensor([4., 2.])

c) Same as Q6c: Default requires_grad=False, so gradients
   are NOT tracked unless explicitly set to True

QUESTION 8: Breaking the Graph

Original code:
   x = torch.tensor([1.0], requires_grad

  print(f"   After zero_() and y2.backward(): x.grad = {y2.grad}")
