In [1]:
import torch

In [4]:
# Create a 2x2 tensor with gradient-accumulation capabilities
x = torch.tensor([[1,2], [3,4]], requires_grad= True, dtype = torch.float32)
print(x)

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)


In [5]:
# Deduct 2 from all elements
y = x - 2
print(y)

tensor([[-1.,  0.],
        [ 1.,  2.]], grad_fn=<SubBackward0>)


In [6]:
print(y.grad_fn)

<SubBackward0 object at 0x128cf4cc0>


In [7]:
# What's happening here?
print(x.grad_fn)

None


In [8]:
# Let's dig further...
y.grad_fn

<SubBackward0 at 0x10e00df28>

In [9]:
y.grad_fn.next_functions[0][0]

<AccumulateGrad at 0x128cf4940>

In [10]:
y.grad_fn.next_functions[0][0].variable

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)

In [11]:
# Do more operations on y
z = y * y * 3
a = z.mean()  # average

print(z)
print(a)

tensor([[ 3.,  0.],
        [ 3., 12.]], grad_fn=<MulBackward0>)
tensor(4.5000, grad_fn=<MeanBackward0>)


In [13]:
# Let's visualise the computational graph! (thks @szagoruyko)
from torchviz import make_dot

In [14]:
make_dot(a)

ExecutableNotFound: failed to execute ['dot', '-Tsvg'], make sure the Graphviz executables are on your systems' PATH

<graphviz.dot.Digraph at 0x10bebc9e8>

In [15]:
# Backprop
a.backward()

In [16]:
# Compute it by hand BEFORE executing this
print(x.grad)

tensor([[-1.5000,  0.0000],
        [ 1.5000,  3.0000]])


In [17]:
# Dynamic graphs!
x = torch.randn(3, requires_grad=True)

y = x * 2
i = 0
while y.data.norm() < 1000:
    y = y * 2
    i += 1
print(y)

tensor([ 341.7104, 1853.8811,  146.7048], grad_fn=<MulBackward0>)


In [18]:
# If we don't run backward on a scalar we need to specify the grad_output
gradients = torch.FloatTensor([0.1, 1.0, 0.0001])
y.backward(gradients)

print(x.grad)

tensor([2.0480e+02, 2.0480e+03, 2.0480e-01])


In [19]:
# BEFORE executing this, can you tell what would you expect it to print?
print(i)

10
