In [1]:
import torch

In [5]:
x = torch.randn(3, requires_grad=True)
print(x)

y = x + 2
print(y)
z = y ** 2 * 2
z = z.mean()
print(z)

z.backward() # dz/dx
print(x.grad)

tensor([ 0.1610, -1.1359, -0.0703], requires_grad=True)
tensor([2.1610, 0.8641, 1.9297], grad_fn=<AddBackward0>)
tensor(6.0937, grad_fn=<MeanBackward0>)
tensor([2.8814, 1.1522, 2.5729])


In [7]:
x = torch.randn(3, requires_grad=True)
print(x)

y = x + 2
print(y)
z = y ** 2 * 2
# z = z.mean()
print(z)

v = torch.tensor([0.1,1.0,0.001], dtype=torch.float32)
z.backward(v) # dz/dx
print(x.grad)

tensor([-0.2875,  0.2090,  0.2434], requires_grad=True)
tensor([1.7125, 2.2090, 2.2434], grad_fn=<AddBackward0>)
tensor([ 5.8652,  9.7594, 10.0653], grad_fn=<MulBackward0>)
tensor([0.6850, 8.8360, 0.0090])


In [None]:
x = torch.randn(3, requires_grad=True)
print(x)

### preventing gradient history tracking in the computation graph
# x.requires_grad_(False)
# print(x)

# y = x.detach()
# print(y)

with torch.no_grad():
    y = x + 2
    print(y)


tensor([ 1.5840,  0.7015, -0.5042], requires_grad=True)
tensor([3.5840, 2.7015, 1.4958])


## gradient accumulation

In [14]:
weights = torch.ones(4, requires_grad=True)
for epoch in range(4):
    model_output = (weights * 3).sum()

    model_output.backward() # computes the gradient of current tensor wrt graph leaves

    print(weights.grad)

tensor([3., 3., 3., 3.])
tensor([6., 6., 6., 6.])
tensor([9., 9., 9., 9.])
tensor([12., 12., 12., 12.])


In [15]:
weights = torch.ones(4, requires_grad=True)
for epoch in range(4):
    model_output = (weights * 3).sum()

    model_output.backward() # computes the gradient of current tensor wrt graph leaves

    print(weights.grad)
    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])
