This notebook uses pytorch to calculate the gradients for a simple linear regression example.

It shows how to define parameters that require gradient calculations, calculating the gradient, and update the parameters.

In [7]:
import torch

X = torch.tensor([[1],[2],[3]])
y = torch.tensor([1,2,3])

In [29]:
# Define out cost function
def mae(preds, acts): return (torch.abs(preds-acts)).mean()

In [70]:
# Pad with ones so we have something to multiply the bias by
X_with_ones = torch.nn.functional.pad(X, (0,1), 'constant', 1)

learning_rate = 0.001

# Define the parameters we'd like to track the gradient calulation for
weight_and_bias = torch.tensor([0.5, 0.5], requires_grad=True)

# Training loop
for iter in range(100):
  preds = torch.sum(torch.mul(weight_and_bias, X_with_ones), dim=1)
  loss = mae(preds, y)
  print(f"Iter {iter} loss {loss}")

  # Calculate the gradients
  loss.backward()

  # Update the weight and bias terms using the gradient
  # torch.no_grad() avoids calculating the gradients for this step
  with torch.no_grad():
    weight_and_bias -= weight_and_bias.grad * learning_rate


Iter 0 loss 0.5
Iter 1 loss 0.49755558371543884
Iter 2 loss 0.4932222068309784
Iter 3 loss 0.4870000183582306
Iter 4 loss 0.4788888692855835
Iter 5 loss 0.46888887882232666
Iter 6 loss 0.45699992775917053
Iter 7 loss 0.44322219491004944
Iter 8 loss 0.42755553126335144
Iter 9 loss 0.40999993681907654
Iter 10 loss 0.3905555009841919
Iter 11 loss 0.36922216415405273
Iter 12 loss 0.34599995613098145
Iter 13 loss 0.32088878750801086
Iter 14 loss 0.29388877749443054
Iter 15 loss 0.2649998962879181
Iter 16 loss 0.2342221736907959
Iter 17 loss 0.20155549049377441
Iter 18 loss 0.18766669929027557
Iter 19 loss 0.18966667354106903
Iter 20 loss 0.19155557453632355
Iter 21 loss 0.194000244140625
Iter 22 loss 0.24300019443035126
Iter 23 loss 0.2870001792907715
Iter 24 loss 0.32600024342536926
Iter 25 loss 0.36000022292137146
Iter 26 loss 0.3890000879764557
Iter 27 loss 0.41300010681152344
Iter 28 loss 0.43200019001960754
Iter 29 loss 0.44600018858909607
Iter 30 loss 0.4550003111362457
Iter 31 loss 0

In [69]:
X_test = torch.tensor([[0],[1],[2],[3],[4]])
X_test_with_ones = torch.nn.functional.pad(X_test, (0,1), 'constant', 1)

with torch.no_grad():
  preds = torch.sum(torch.mul(weight_and_bias, X_test_with_ones), dim=1)
  print(preds)

tensor([-0.3870,  1.0057,  2.3983,  3.7910,  5.1837])
