**Linear regression using PyTorch built-ins**
- The model and training process we used is a very common pattern
- PyTorch has several built-in functions and classes to make it easier

In [1]:
import numpy as np
import torch
# contains utility classes for building neural networks
import torch.nn as nn

In [2]:
# inputs (temp, rainfall, humidity)
inputs = np.array([[73, 67, 43],
                   [91, 88, 64],
                   [87, 134, 58],
                   [102, 43, 37],
                   [69, 96, 70],
                   [74, 66, 43],
                   [91, 87, 65],
                   [88, 134, 59],
                   [101, 44, 37],
                   [68, 96, 71],
                   [73, 66, 44],
                   [92, 87, 64],
                   [87, 135, 57],
                   [103, 43, 36],
                   [68, 97, 70]],
                  dtype='float32')

# targets (apples, oranges)
targets = np.array([[56, 70],
                    [81, 101],
                    [119, 133],
                    [22, 37],
                    [103, 119],
                    [57, 69],
                    [80, 102],
                    [118, 132],
                    [21, 38],
                    [104, 118],
                    [57, 69],
                    [82, 100],
                    [118, 134],
                    [20, 38],
                    [102, 120]],
                   dtype='float32')

# convert data into tensors
inputs = torch.from_numpy(inputs)
targets = torch.from_numpy(targets)

**Dataset and DataLoader**
- TensorDataset allows access to rows of inputs and targets as tuples
- Provides standard APIs for working with many different types of datasets

In [3]:
from torch.utils.data import TensorDataset

In [4]:
# define dataset
train_ds = TensorDataset(inputs, targets)
# use array indexing notation to access section of training data
# returns tuple of tensors (inputs, targets)
train_ds[0:3]

(tensor([[ 73.,  67.,  43.],
         [ 91.,  88.,  64.],
         [ 87., 134.,  58.]]), tensor([[ 56.,  70.],
         [ 81., 101.],
         [119., 133.]]))

In [5]:
# DataLoader can split data into batches of predefined size while training
# also provides other utilities - shuffling, random sampling
from torch.utils.data import DataLoader

In [6]:
# define data loader
batch_size = 5
train_dl = DataLoader(train_ds, batch_size, shuffle=True)
# shuffle=True randomly shuffles rows before creating batches
# more randomization --> better training

In [7]:
# data loader typically used in for-in loop
# each iteration returns one batch of data w/ the given batch size
for xb, yb in train_dl:
  print("batch:")
  print(xb)
  print(yb)

batch:
tensor([[ 92.,  87.,  64.],
        [ 74.,  66.,  43.],
        [ 91.,  87.,  65.],
        [ 69.,  96.,  70.],
        [ 87., 135.,  57.]])
tensor([[ 82., 100.],
        [ 57.,  69.],
        [ 80., 102.],
        [103., 119.],
        [118., 134.]])
batch:
tensor([[ 87., 134.,  58.],
        [ 68.,  97.,  70.],
        [ 68.,  96.,  71.],
        [102.,  43.,  37.],
        [ 88., 134.,  59.]])
tensor([[119., 133.],
        [102., 120.],
        [104., 118.],
        [ 22.,  37.],
        [118., 132.]])
batch:
tensor([[ 73.,  66.,  44.],
        [103.,  43.,  36.],
        [101.,  44.,  37.],
        [ 73.,  67.,  43.],
        [ 91.,  88.,  64.]])
tensor([[ 57.,  69.],
        [ 20.,  38.],
        [ 21.,  38.],
        [ 56.,  70.],
        [ 81., 101.]])


**nn.Linear**
- Define model using nn.Linear
- Initializes weights and biases automatically

In [8]:
# define model
model = nn.Linear(3, 2) # parameters: num inputs, num outputs (targets)
print(model.weight)
print(model.bias)

Parameter containing:
tensor([[-0.5110,  0.4001,  0.3474],
        [-0.2289, -0.4924,  0.3901]], requires_grad=True)
Parameter containing:
tensor([-0.4123,  0.1375], requires_grad=True)


In [9]:
# .parameters() returns list containing all weight and bias matrices in model
# our model has only 1 weight matrix and 1 bias matrix
list(model.parameters())

[Parameter containing:
 tensor([[-0.5110,  0.4001,  0.3474],
         [-0.2289, -0.4924,  0.3901]], requires_grad=True),
 Parameter containing:
 tensor([-0.4123,  0.1375], requires_grad=True)]

In [10]:
# generate predictions
preds = model(inputs)
preds

tensor([[  4.0239, -32.7915],
        [ 10.5216, -39.0606],
        [ 28.8840, -63.1380],
        [-22.4814, -29.9523],
        [ 27.0489, -35.6232],
        [  3.1129, -32.5280],
        [ 10.4690, -38.1780],
        [ 28.7204, -62.9768],
        [-21.5703, -30.2158],
        [ 27.9073, -35.0042],
        [  3.9713, -31.9090],
        [  9.6105, -38.7971],
        [ 28.9367, -64.0206],
        [-23.3398, -30.5713],
        [ 27.9600, -35.8868]], grad_fn=<AddmmBackward0>)

**Loss Function**
- Uses built-in loss function mse_loss

In [11]:
# nn.functional package contains many useful loss functions and other utils
import torch.nn.functional as F

In [12]:
# define loss function
loss_fn = F.mse_loss

In [13]:
# compute loss for current predictions
loss = loss_fn(model(inputs), targets)
print(loss)

tensor(12031.2285, grad_fn=<MseLossBackward0>)


**Optimizer**
- Use optimizer optim.SGD instead of modifying weights and biases using gradients
- SGD - stochastic gradient descent
- stochastic - samples are selected in (often random) batches instead of as a single group

In [19]:
# model.parameters() passed in so optimizer knows which matrices to modify
# lr = learning rate
opt = torch.optim.SGD(model.parameters(), lr=1e-5)

**Training the model**

Same process as before:

1. Generate predictions
2. Calculate loss
3. Compute gradients w.r.t. weights and biases
4. Adjust weights by subtacting small quantity proportional to gradient
5. Reset gradients to 0

In [21]:
# utility function to train model for given number of epochs
def fit(num_epochs, model, loss_fn, opt, train_dl):

  # repeat for given number of epochs
  for epoch in range(num_epochs):

    # train w/ batches of data
    for xb, yb in train_dl:

      # 1. generate predictions
      pred = model(xb)

      # 2. calculate loss
      loss = loss_fn(pred, yb)

      # 3. compute gradients
      loss.backward()

      # 4. update parameters using gradients
      opt.step()

      # 5. reset gradients to zero
      opt.zero_grad()

    # print progress every 10 epochs
    if (epoch + 1) % 10 == 0:
      print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {round(loss.item(), 4)}")

In [22]:
# train model for 100 epochs
fit(100, model, loss_fn, opt, train_dl)

Epoch [10/100], Loss: 4.8482
Epoch [20/100], Loss: 1.5326
Epoch [30/100], Loss: 2.7389
Epoch [40/100], Loss: 3.6306
Epoch [50/100], Loss: 2.9668
Epoch [60/100], Loss: 0.997
Epoch [70/100], Loss: 1.2882
Epoch [80/100], Loss: 1.9628
Epoch [90/100], Loss: 1.702
Epoch [100/100], Loss: 1.7025


In [23]:
# generate predictions
preds = model(inputs)
preds

tensor([[ 56.9938,  70.5560],
        [ 81.6100, 100.6300],
        [119.1809, 132.2672],
        [ 21.3434,  38.6005],
        [100.7312, 118.1984],
        [ 55.7461,  69.5031],
        [ 81.3769, 100.7370],
        [119.4325, 132.8806],
        [ 22.5911,  39.6534],
        [101.7458, 119.3583],
        [ 56.7607,  70.6630],
        [ 80.3624,  99.5771],
        [119.4140, 132.1602],
        [ 20.3289,  37.4406],
        [101.9789, 119.2513]], grad_fn=<AddmmBackward0>)

In [24]:
# compare with targets
targets

tensor([[ 56.,  70.],
        [ 81., 101.],
        [119., 133.],
        [ 22.,  37.],
        [103., 119.],
        [ 57.,  69.],
        [ 80., 102.],
        [118., 132.],
        [ 21.,  38.],
        [104., 118.],
        [ 57.,  69.],
        [ 82., 100.],
        [118., 134.],
        [ 20.,  38.],
        [102., 120.]])