# Imports

In [1]:
import numpy as np
import torch
from torchviz import make_dot

# Gradient tracking

In [None]:
# Autograd = Autodifferentiation + Dynamic Graph


# Autograd in PyTorch is responsible for:

# Automatically building a computational graph (as you do operations on tensors)

# Automatically computing gradients via the chain rule

# Storing gradients in each tensor’s .grad attribute

## ex1

In [2]:
# Create a leaf tensor
x = torch.tensor([2.0, 3.0], requires_grad=True)
y = torch.tensor([4.0, 5.0], requires_grad=True)

z = x**2 + y
s = z.sum()

In [3]:
s

tensor(22., grad_fn=<SumBackward0>)

In [4]:
z

tensor([ 8., 14.], grad_fn=<AddBackward0>)

In [5]:
s.backward()
print(x.grad)
print(y.grad)

tensor([4., 6.])
tensor([1., 1.])


In [7]:
make_dot(s).render("graph", format="png")

'graph.png'

## ex2

In [51]:
w = torch.randn(1, requires_grad=True)
b = torch.randn(1, requires_grad=True)

In [52]:
x = torch.linspace(0,1,100)
y = 2*x + 1

In [53]:
lr = 0.1
for epoch in range(100):
    pred = w * x + b
    loss = ((pred - y)**2).mean()
    loss.backward()
    with torch.no_grad():
        w -= lr * w.grad
        b -= lr * b.grad
    w.grad.zero_()
    b.grad.zero_()

In [62]:
pred.grad_fn

<AddBackward0 at 0x7f25f08ec9a0>

In [63]:
x.grad

## torch.autograd.grad


In [8]:
# What Is torch.autograd.grad?
# torch.autograd.grad is a PyTorch function that computes and returns the gradients of tensors with respect to other tensors. Unlike the typical .backward() method, which accumulates gradients into the .grad attribute of leaf tensors, torch.autograd.grad simply returns the gradients as output without modifying any .grad fields.

# Why Use torch.autograd.grad?
# Functionality: When you want to compute gradients for arbitrary tensors without accumulating them.

# Flexibility: Useful for custom optimization routines, meta-learning, or inspecting gradients of intermediate computations.

# Control: Allows you to compute higher-order derivatives.

In [9]:
# torch.autograd.grad(
#     outputs,
#     inputs,
#     grad_outputs=None,
#     retain_graph=False,
#     create_graph=False,
#     only_inputs=True,
#     allow_unused=False
# )
# outputs: Tensor(s) whose gradients will be computed.

# inputs: Tensor(s) with respect to which gradients are computed.

# grad_outputs: Optional. Specifies the "vector" for which to compute the directional derivative (important for non-scalar outputs).

# retain_graph: Whether to retain the computation graph for additional backward passes.

# create_graph: If True, computes gradients for higher-order derivatives.

# allow_unused: If True, allows missing gradients.

In [10]:
x = torch.tensor(2.0, requires_grad=True)
y = x ** 3 + 2 * x

# Compute dy/dx using autograd.grad
(grads,) = torch.autograd.grad(y, x)
print(grads)  # Output: tensor(14.)

tensor(14.)


  y.grad
