In [1]:
import torch

# AUTOGRAD: AUTOMATIC DIFFERENTIATION
- Autograd provides automatic differentiation for all operations on Tensors

#### Tensor
- `.requires_grad` = True
	+ Track all operations on it
	+ After finishing computations, can call `.backward()` to have all the gradients computed automatically
	+ The gradient for this tensor will be accumulated into `.grad` attribute

- `.backward()`
	+ Compute the derivatives on a Tensor
	+ If Tensor is a scalar, don’t need to specify any arguments to backward()
	+ If Tensor has more elements, need to specify a gradient argument that is a tensor of matching shape

- `.detach()`:  to get a new Tensor with the same content but that does not require gradients

- `with torch.no_grad():`
	+ To prevent tracking history (and using memory) of the code block
	+ Used when evaluating a model

- `.grad_fn`
	+ Each tensor has a `.grad_fn` attribute that references a Function that has created the Tensor
	+ except for Tensors created by the user, `grad_fn` = None

#### Change requires_grad

In [2]:
# User created tensor requires_grad = False
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)

# Change
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(a)
print(b)

False
True
tensor([[ 1.7315, -1.0468],
        [-8.2165,  1.4425]], requires_grad=True)
tensor(73.6851, grad_fn=<SumBackward0>)


#### Detach

In [3]:
x = torch.randn(3, requires_grad=True)
y = x.detach()

print(y.requires_grad)
print(x.eq(y).all())

False
tensor(True)


#### with `.no_grad()`

In [4]:
x = torch.randn(3, requires_grad=True)

with torch.no_grad():
    print((x ** 2).requires_grad)

False


# Creat a computational graph

#### forward

In [5]:
x = torch.tensor(
   [[1.0, 2.0],
    [3.0, 4.0]], requires_grad=True)
y = x + 2
z = y**2 * 3
out = z.mean()

In [6]:
print(x)
print(y, y.grad_fn)
print(z)
print(out)

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)
tensor([[3., 4.],
        [5., 6.]], grad_fn=<AddBackward0>) <AddBackward0 object at 0x7fad3839af50>
tensor([[ 27.,  48.],
        [ 75., 108.]], grad_fn=<MulBackward0>)
tensor(64.5000, grad_fn=<MeanBackward0>)


#### backward

In [7]:
out.backward()

#### Calculate `x.grad`
- **Forward**: $out = \frac{1}{4} \sum\limits_i3(x_i + 2)^2$
- **Backward**: $\frac{\partial\ out_{i}}{\partial\ x_i} = \frac{3}{2}(x_i + 2)$
    + $\frac{\partial\ out_{1}}{\partial\ x_1}\bigr\rvert_{x_1=1} = \frac{3}{2}(x_1 + 2) = 4.5$
    + $\frac{\partial\ out_{2}}{\partial\ x_2}\bigr\rvert_{x_2=2} = \frac{3}{2}(x_2 + 2) = 6.0$
    + $\frac{\partial\ out_{3}}{\partial\ x_3}\bigr\rvert_{x_3=3} = \frac{3}{2}(x_3 + 2) = 7.5$
    + $\frac{\partial\ out_{4}}{\partial\ x_4}\bigr\rvert_{x_2=4} = \frac{3}{2}(x_4 + 2) = 9.0$

In [8]:
# d(out)/dx
J = x.grad
print(J)

tensor([[4.5000, 6.0000],
        [7.5000, 9.0000]])
