# Linear Regression Model using Pytorch

Using the features of PyTorch encountered in the introductory notebook we now build a full linear regression model.

In [1]:
import numpy as np

In [2]:
import torch
import torch.optim as optim
import torch.nn as nn

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

## Create Dataset Structure

In [4]:
np.random.seed(42)

# Data generation
x = np.random.rand(100, 1)
y = 1 + 2 * x + .1 * np.random.randn(100, 1)

idx = np.arange(100)
np.random.shuffle(idx)
train_idx = idx[:80]
val_idx = idx[80:]

# Generates train and validation sets
x_train, y_train = x[train_idx], y[train_idx]
x_val, y_val = x[val_idx], y[val_idx]

### Create Tensors

In [5]:
from torch.utils.data import Dataset, TensorDataset
from torch.utils.data.dataset import random_split

In [6]:
# Does the same as prebuild TensorDataset if only using tensors
# Inherit from Dataset
class CustomDataset(Dataset):
    def __init__(self, x_tensor, y_tensor):
        self.x = x_tensor
        self.y = y_tensor
        
    def __getitem__(self, index):
        return (self.x[index], self.y[index])

    def __len__(self):
        return len(self.x)

In [7]:
# Don't load whole training data to device
x_train_tensor = torch.from_numpy(x_train).float()
y_train_tensor = torch.from_numpy(y_train).float()

train_data = CustomDataset(x_train_tensor, y_train_tensor)
print(train_data[0])

train_data = TensorDataset(x_train_tensor, y_train_tensor)
print(train_data[0])

(tensor([0.7713]), tensor([2.4745]))
(tensor([0.7713]), tensor([2.4745]))


In [8]:
from torch.utils.data import DataLoader

# Slice for mini-batch gradient descent
train_loader = DataLoader(dataset=train_data, batch_size=16, shuffle=True)

For the most part we have been using batch gradient descent. This a variation of the gradient descent algorithm that calculates the error for each example in the training dataset, but only updates the model after all training examples have been evaluated. This is stable but may prematurely converge, and requires all training data in memory.

This is faster but less accurate than stochastic gradient descent, often abbreviated SGD, which is a variation of the gradient descent algorithm that calculates the error and updates the model for each example in the training dataset. This frequency updating can be computationally intensive and lead to a noisy gradient signal.

By using  mini-batch gradient descent we splits the training dataset into small batches, seeking to find a balance between the robustness of stochastic gradient descent and the efficiency of batch gradient descent. It is the most common implementation of gradient descent used in the field of deep learning.

## Setup

In [9]:
# Can use a Sequential model which is equivalent to a single layer linear regression class
# This may be less traceable as the weights aren't labebelled 'a', 'b', etc.
model = nn.Sequential(nn.Linear(1, 1)).to(device)

In [10]:
# Define a function using specific parameters, quite interesting
def make_train_step(model, loss_fn, optimizer):
    def train_step(x, y):
        model.train()
        
        yhat = model(x)
        loss = loss_fn(y, yhat)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        return loss.item()
    
    return train_step

## Training

In [11]:
model = nn.Sequential(nn.Linear(1, 1)).to(device)
lr = 1e-1
n_epochs = 1000
loss_fn = nn.MSELoss(reduction='mean')
optimizer = optim.SGD(model.parameters(), lr=lr)

### Test Loop

In [12]:
losses = []
train_step = make_train_step(model, loss_fn, optimizer)

for epoch in range(n_epochs):
    for x_batch, y_batch in train_loader:
        # Send mini-batches to device as required
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)
        
        loss = train_step(x_batch, y_batch)
        losses.append(loss)
        
print(model.state_dict())

OrderedDict([('0.weight', tensor([[1.9671]])), ('0.bias', tensor([1.0207]))])


### Full Training

In [13]:
x_tensor = torch.from_numpy(x).float()
y_tensor = torch.from_numpy(y).float()

In [14]:
dataset = TensorDataset(x_tensor, y_tensor)
train_dataset, val_dataset = random_split(dataset, [80, 20])

In [15]:
train_loader = DataLoader(dataset=train_dataset, batch_size=16)
val_loader = DataLoader(dataset=val_dataset, batch_size=16)

In [16]:
losses = []
val_losses = []
train_step = make_train_step(model, loss_fn, optimizer)

# Training
for epoch in range(n_epochs):
    for x_batch, y_batch in train_loader:
        x_batch = x_batch.to(device)
        y_batch = y_batch.to(device)

        loss = train_step(x_batch, y_batch)
        losses.append(loss)
    
    # Validation
    with torch.no_grad():
        # Gradients only belong in training, not validation
        for x_val, y_val in val_loader:
            x_val = x_val.to(device)
            y_val = y_val.to(device)
            
            model.eval()  # validation mode

            yhat = model(x_val)
            val_loss = loss_fn(y_val, yhat)
            val_losses.append(val_loss.item())

print(model.state_dict())

OrderedDict([('0.weight', tensor([[1.9268]])), ('0.bias', tensor([1.0260]))])
