In [1]:
import torch

#### track computation with requires_grad = True

In [2]:
x = torch.ones(2,2,requires_grad = True)

In [3]:
x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [4]:
y = x + 2
y

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)

#### y was created as a result of an operation, so it has a grad_fn

In [5]:
y.grad_fn

<AddBackward0 at 0x1872e5bd9b0>

In [9]:
z = y ** 2 * 3 
out = z.mean()
z, out

(tensor([[27., 27.],
         [27., 27.]], grad_fn=<MulBackward0>),
 tensor(27., grad_fn=<MeanBackward1>))

#### change requires_grad flas in-place by .requires_grad_(True/False)

In [10]:
a = torch.randn(2,2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a ** 2).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x000001872E61F400>


### Gradients
#### .backward() is called after computation which result is scalar and have all gradients computed automatically
#### if tensor has more elements, need to specify a gradient argument that os a tensor of matching shape

In [11]:
# out = mean((x+2)^2 * 3)
out.backward()

In [12]:
print(x.grad, y.grad, z.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]]) None None


In [13]:
x = torch.randn(3, requires_grad = True)
y = x * 2
while y.data.norm() < 1000:
    y = y * 2
print(y, y.shape)

tensor([-994.6409,  383.4634, -775.9459], grad_fn=<MulBackward0>) torch.Size([3])


In [14]:
v = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(v) # vT*y 
print(x.grad)

tensor([1.0240e+02, 1.0240e+03, 1.0240e-01])


#### stop autograd from tracking history on Tensor with .requrires_grad = True by wrapping the code block in `with torch.no_grad():`  always used when evaluating a model

In [15]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False
