This notebook uses pytorch to calculate the gradients for a simple linear regression example.

It shows how to define parameters that require gradient calculations, calculating the gradient, and update the parameters.

In [71]:
import torch

X = torch.tensor([[1],[2],[3]])
y = torch.tensor([1,2,3])

In [72]:
# Define out cost function
def mae(preds, acts): return (torch.abs(preds-acts)).mean()

In [79]:
# Pad with ones so we have something to multiply the bias by
X_with_ones = torch.nn.functional.pad(X, (0,1), 'constant', 1)

learning_rate = 0.01

# Define the parameters we'd like to track the gradient calulation for
weight_and_bias = torch.tensor([0.5, 0.5], requires_grad=True)

# Training loop
for iter in range(500):
  preds = torch.sum(torch.mul(weight_and_bias, X_with_ones), dim=1)
  loss = mae(preds, y)
  print(f"Iter {iter} loss {loss}")

  # Calculate the gradients
  loss.backward()

  # Update the weight and bias terms using the gradient
  # torch.no_grad() avoids calculating the gradients for this step
  with torch.no_grad():
    weight_and_bias.sub_(weight_and_bias.grad * learning_rate)
    weight_and_bias.grad.zero_()


Iter 0 loss 0.5
Iter 1 loss 0.47555556893348694
Iter 2 loss 0.4566667079925537
Iter 3 loss 0.4377778470516205
Iter 4 loss 0.41888895630836487
Iter 5 loss 0.40000009536743164
Iter 6 loss 0.3811112344264984
Iter 7 loss 0.3622223436832428
Iter 8 loss 0.34333348274230957
Iter 9 loss 0.32444462180137634
Iter 10 loss 0.3055557310581207
Iter 11 loss 0.2866668701171875
Iter 12 loss 0.2677780091762543
Iter 13 loss 0.24888913333415985
Iter 14 loss 0.23000025749206543
Iter 15 loss 0.211111381649971
Iter 16 loss 0.19222252070903778
Iter 17 loss 0.18666665256023407
Iter 18 loss 0.18555553257465363
Iter 19 loss 0.18444442749023438
Iter 20 loss 0.18333332240581512
Iter 21 loss 0.18222220242023468
Iter 22 loss 0.18111109733581543
Iter 23 loss 0.18000030517578125
Iter 24 loss 0.18111109733581543
Iter 25 loss 0.17999999225139618
Iter 26 loss 0.17888887226581573
Iter 27 loss 0.17777776718139648
Iter 28 loss 0.17666666209697723
Iter 29 loss 0.1755555421113968
Iter 30 loss 0.17444443702697754
Iter 31 loss 

In [69]:
X_test = torch.tensor([[0],[1],[2],[3],[4]])
X_test_with_ones = torch.nn.functional.pad(X_test, (0,1), 'constant', 1)

with torch.no_grad():
  preds = torch.sum(torch.mul(weight_and_bias, X_test_with_ones), dim=1)
  print(preds)

tensor([-0.3870,  1.0057,  2.3983,  3.7910,  5.1837])
