In [14]:
# Load libraries
import torch

Create a tensor:

In [15]:
# Create a 2x2 tensor with gradient-accumulation capabilities
x = torch.tensor([[1,2], [3, 4]],
                 requires_grad=True,
                 dtype=torch.float32)
print(x)

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)


In [16]:
# Deduct 2 from all elements
y = x - 2
print(y)

tensor([[-1.,  0.],
        [ 1.,  2.]], grad_fn=<SubBackward0>)


`y` was created as a result of an operation, so it has a `grad_fn`.

In [17]:
print(y.grad_fn)

<SubBackward0 object at 0x12762e0a0>


In [18]:
# What's happening here?
print(x.grad_fn)

None


In [19]:
# Let's dig further...
y.grad_fn

<SubBackward0 at 0x12762e760>

In [20]:
y.grad_fn.next_functions[0][0]

<AccumulateGrad at 0x12762e640>

In [21]:
y.grad_fn.next_functions[0][0].variable

tensor([[1., 2.],
        [3., 4.]], requires_grad=True)

In [22]:
y

tensor([[-1.,  0.],
        [ 1.,  2.]], grad_fn=<SubBackward0>)

In [23]:
# Do more operations on y
z = y * y * 3
a = z.mean() # average

print(z)
print(a)

tensor([[ 3.,  0.],
        [ 3., 12.]], grad_fn=<MulBackward0>)
tensor(4.5000, grad_fn=<MeanBackward0>)


In [24]:
# Let's visualise the computational graph !
from torchviz import make_dot

In [27]:
make_dot(a)

ExecutableNotFound: failed to execute PosixPath('dot'), make sure the Graphviz executables are on your systems' PATH

<graphviz.graphs.Digraph at 0x1271e6220>

## Gradients

Let's backprop now out.backward() is equivalent to doing `out.backward(torch.tensor([1.0]))`

In [28]:
# Backprop
a.backward()

Print gradients $\frac{da}{dx}$

In [32]:
# Compute it by hand BEFORE executing this
print(x.grad)

tensor([[-1.5000,  0.0000],
        [ 1.5000,  3.0000]])


In [33]:
# Dynamic graphs!

x = torch.randn(3, requires_grad=True)
x

tensor([0.9865, 0.2261, 0.2807], requires_grad=True)

In [35]:
y = x * 2
i = 0
while y.data.norm() < 1000:
    y = y * 2
    i += 1
print(y.data.norm())
print(y)

tensor(1075.4554)
tensor([1010.1375,  231.4954,  287.4658], grad_fn=<MulBackward0>)


In [36]:
# If we don't run backward on a scalar we need to specify the grad_output
gradients = torch.FloatTensor([0.1, 1.0, 0.0001])
y.backward(gradients)

print(x.grad)

tensor([1.0240e+02, 1.0240e+03, 1.0240e-01])


In [37]:
# BEFORE executing this, can you tell what would you expect it to print?
print(i)

9


## Inference

In [38]:
# This variable decides the tensor's range below
n = 3

In [39]:
# Both x and w that allows gradient accumulation
x = torch.arange(1., n+1, requires_grad=True)
w =torch.ones(n, requires_grad=True)

print(f"x:{x}")
print(f"w:{w}")

x:tensor([1., 2., 3.], requires_grad=True)
w:tensor([1., 1., 1.], requires_grad=True)


In [40]:
z = w@x
z.backward()
print(x.grad, w.grad, sep='\n')

tensor([1., 1., 1.])
tensor([1., 2., 3.])


In [42]:
# Only w that allows gradient accumulation
x = torch.arange(1., n+1)
w = torch.ones(n, requires_grad=True)
z = w@x
z.backward()
print(x.grad, w.grad, sep='\n')

None
tensor([1., 2., 3.])


In [43]:
x = torch.arange(1., n+1)
w = torch.ones(n, requires_grad=True)

# Regardless of what you do in this contect, all torch tensors will not have gradient accumulation
with torch.no_grad():
    z = w@x
    
try:
    z.backward() # PyTorch will throw an error here, since z has no grad accum.add()
except RuntimeError as e:
    print('RuntimeError!!!>:[')
    print(e)

RuntimeError!!!>:[
element 0 of tensors does not require grad and does not have a grad_fn
