In [1]:
import torch
import numpy as np

## `requires_grad`

In [2]:
# Set 'requires_grad=True' to track gradients
x = torch.ones(2, 2, requires_grad=True)
x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [3]:
y = x + 2
y

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)

In [4]:
y.grad_fn

<AddBackward0 at 0x7f5751fe4550>

In [5]:
z = (y ** 2) * 3
out = z.mean()
print(z, out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)


In [6]:
a = torch.rand(2,2)
a = (a * 3) / (a - 1)
print(a.requires_grad)
print(a.grad_fn)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

False
None
True
<SumBackward0 object at 0x7f56dfbce1d0>


## Gradients

In [7]:
out.backward()

In [8]:
# o(z) = (1/4)*sum(3(x+2)^2)
# Given x_i = 1, do/dx_i = 3/2(x_i+2) = 4.5
print(x.grad) # Jacobian matrix of o with respect to x_i

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [9]:
x = torch.tensor([1, 2, 3], dtype=torch.float32, requires_grad=True)
print(x)
y = x * 2
while y.data.norm() < 1000:
    y = y * 2
print(y)

tensor([1., 2., 3.], requires_grad=True)
tensor([ 512., 1024., 1536.], grad_fn=<MulBackward0>)


In [10]:
v = torch.tensor([.1, 1, .0001], dtype=torch.float32)
y.backward(v)
print(x.grad)

tensor([5.1200e+01, 5.1200e+02, 5.1200e-02])


In [11]:
print(x.requires_grad)
print((x**2).requires_grad)

True
True


In [12]:
with torch.no_grad():
    print((x**2).requires_grad)

False


In [13]:
# get a new Tensor but without gradients
print(x)
print(x.requires_grad)
y = x.detach()
print(y.requires_grad)
print(x.eq(y).all())
# both will change
torch.add(y, 10, out=y)
print(x, y)

tensor([1., 2., 3.], requires_grad=True)
True
False
tensor(True)
tensor([11., 12., 13.], requires_grad=True) tensor([11., 12., 13.])


In [14]:
# copy Tensor with data
z = x.clone().detach()
print(z.requires_grad)
print(x.eq(z).all())
x = x + 10
print(x, z)

False
tensor(True)
tensor([21., 22., 23.], grad_fn=<AddBackward0>) tensor([11., 12., 13.])


## Gradient for sigmoid function

In [33]:
w = torch.tensor([[2,-3,-3]], dtype=torch.float32, requires_grad=True)
x = torch.tensor([[-1, -2, 1]], dtype=torch.float32, requires_grad=True)
f = 1/(1 + torch.exp(-torch.mm(w, x.T)))

In [34]:
f.backward()

In [35]:
x.grad

tensor([[ 0.3932, -0.5898, -0.5898]])

In [37]:
w.grad

tensor([[-0.1966, -0.3932,  0.1966]])