# Training Pipeline

This notebook will serve as a continuation and advancement of the steps and implementations covered in the gradient descent notebook. We go over implementing loss and optimization via Pytorch, as well as an actual model. We also see the steps involved in setting up the "pipeline" for the process. <br>

The steps involved are:
1. Design the model (input and output size, forward pass)
2. Construct the loss and optimizer
3. Training loop
    1. Forward pass: compute prediction
    2. Backward pass: gradients
    3. Update weights 

In [3]:
# Imports
import torch
import torch.nn as nn  # Neural network module

In [4]:
# The same example as before, completely in Torch
X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)  # 1, 2, 3, 4 are training samples
Y = torch.tensor([2, 4, 6, 8], dtype=torch.float32)  # 2, 4, 6, 8 are corresponding outputs

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)  # initial weight

# model prediction
def forward_pass(x):
    return w * x

print(f'Prediction before training: f(17) = {forward_pass(17):.3f}')

# Parameters
learning_rate = 0.01
num_epochs = 100  # Numerical gradient computation is more accurate
# Backpropagation less so, but more suited for complex tasks
# We'll just take more epochs for this

# Pytorch loss and optimizer
loss = nn.MSELoss()  # Mean Squared Error
optimizer = torch.optim.SGD([w], lr=learning_rate)  # Stochastic Gradient Descent

# Training loop
for epoch in range(num_epochs):
    # prediction = forward pass
    y_pred = forward_pass(X)
    # loss
    l = loss(Y, y_pred)
    # gradients
    l.backward()  # Pytorch's backward pass dl/dw

    # update weights
    optimizer.step()

    # Additionally reset gradients to zero
    optimizer.zero_grad()

    if epoch % 10 == 0:
        print(f'Epoch {epoch + 1}: weight = {w:.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(17) = {forward_pass(17):.3f}')

Prediction before training: f(17) = 0.000
Epoch 1: weight = 0.300, loss = 30.00000000
Epoch 11: weight = 1.665, loss = 1.16278565
Epoch 21: weight = 1.934, loss = 0.04506890
Epoch 31: weight = 1.987, loss = 0.00174685
Epoch 41: weight = 1.997, loss = 0.00006770
Epoch 51: weight = 1.999, loss = 0.00000262
Epoch 61: weight = 2.000, loss = 0.00000010
Epoch 71: weight = 2.000, loss = 0.00000000
Epoch 81: weight = 2.000, loss = 0.00000000
Epoch 91: weight = 2.000, loss = 0.00000000
Prediction after training: f(17) = 34.000


In [7]:
# We now replace the forward pass method with a Pytorch module
# We also don't need to define weights because the model has params
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)  # 1, 2, 3, 4 are training samples
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)  # 2, 4, 6, 8 are corresponding outputs
# Note the difference in shape, this now a 2D tensor

n_samples, n_features = X.shape
print(n_samples, n_features)  # 4 samples and 1 feature for each sample

input_size = n_features
output_size = n_features
model = nn.Linear(input_size, output_size)  # Linear model with one input and one output

test_val = torch.tensor([17], dtype=torch.float32)

print(f'Prediction before training: f(17) = {model(test_val).item():.3f}')
# gotta call .item() to get access to the actual value instead of the tensor

# Parameters
learning_rate = 0.01
num_epochs = 200
# Backpropagation less so, but more suited for complex tasks
# We'll just take more epochs for this

# Pytorch loss and optimizer
loss = nn.MSELoss()  # Mean Squared Error
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  # Stochastic Gradient Descent
# We don't have weights anymore so model's params work

# Training loop
for epoch in range(num_epochs):
    # prediction = forward pass
    y_pred = model(X)
    # loss
    l = loss(Y, y_pred)
    # gradients
    l.backward()  # Pytorch's backward pass dl/dw

    # update weights
    optimizer.step()

    # Additionally reset gradients to zero
    optimizer.zero_grad()

    if epoch % 20 == 0:
        # Weights and optional bias have to be unpacked
        [w, b] = model.parameters()  # w is a list of lists
        print(f'Epoch {epoch + 1}: weight = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(17) = {model(test_val).item():.3f}')

4 1
Prediction before training: f(17) = -9.033
Epoch 1: weight = -0.175, loss = 45.67658234
Epoch 21: weight = 1.638, loss = 0.17393965
Epoch 41: weight = 1.703, loss = 0.12727781
Epoch 61: weight = 1.721, loss = 0.11287437
Epoch 81: weight = 1.737, loss = 0.10011701
Epoch 101: weight = 1.753, loss = 0.08880156
Epoch 121: weight = 1.767, loss = 0.07876502
Epoch 141: weight = 1.781, loss = 0.06986274
Epoch 161: weight = 1.793, loss = 0.06196666
Epoch 181: weight = 1.805, loss = 0.05496305
Prediction after training: f(17) = 31.416


In [8]:
# Above, we used a simple in-built model because the task at hand is simple
# We usually end up defining our own models as a class
# Here is an example of creating a model for the above task

class LinearRegression(nn.Module):  # Inherit from nn.Module
    def __init__(self, input_dim, output_dim):  # Define params as needed
        super(LinearRegression, self).__init__() # Call super class constructor
        # Define layers
        self.linear = nn.Linear(input_dim, output_dim)
    
    # Define the forward pass
    def forward(self, x):
        return self.linear(x)


# THIS IS AN EXTREMELY SIMPLE EXAMPLE, WE WILL SEE MORE COMPLEX ONES LATER

In [9]:
# We'll repeat the above example using this custom model
X = torch.tensor([[1], [2], [3], [4]], dtype=torch.float32)  # 1, 2, 3, 4 are training samples
Y = torch.tensor([[2], [4], [6], [8]], dtype=torch.float32)  # 2, 4, 6, 8 are corresponding outputs
# Note the difference in shape, this now a 2D tensor

n_samples, n_features = X.shape
print(n_samples, n_features)  # 4 samples and 1 feature for each sample

input_size = n_features
output_size = n_features
model = LinearRegression(input_size, output_size)

test_val = torch.tensor([17], dtype=torch.float32)

print(f'Prediction before training: f(17) = {model(test_val).item():.3f}')
# gotta call .item() to get access to the actual value instead of the tensor

# Parameters
learning_rate = 0.01
num_epochs = 200
# Backpropagation less so, but more suited for complex tasks
# We'll just take more epochs for this

# Pytorch loss and optimizer
loss = nn.MSELoss()  # Mean Squared Error
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  # Stochastic Gradient Descent
# We don't have weights anymore so model's params work

# Training loop
for epoch in range(num_epochs):
    # prediction = forward pass
    y_pred = model(X)
    # loss
    l = loss(Y, y_pred)
    # gradients
    l.backward()  # Pytorch's backward pass dl/dw

    # update weights
    optimizer.step()

    # Additionally reset gradients to zero
    optimizer.zero_grad()

    if epoch % 20 == 0:
        # Weights and optional bias have to be unpacked
        [w, b] = model.parameters()  # w is a list of lists
        print(f'Epoch {epoch + 1}: weight = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f'Prediction after training: f(17) = {model(test_val).item():.3f}')

4 1
Prediction before training: f(17) = 9.476
Epoch 1: weight = 0.798, loss = 17.06891251
Epoch 21: weight = 1.900, loss = 0.01867640
Epoch 41: weight = 1.933, loss = 0.00644375
Epoch 61: weight = 1.937, loss = 0.00570869
Epoch 81: weight = 1.941, loss = 0.00506346
Epoch 101: weight = 1.944, loss = 0.00449118
Epoch 121: weight = 1.948, loss = 0.00398359
Epoch 141: weight = 1.951, loss = 0.00353335
Epoch 161: weight = 1.954, loss = 0.00313399
Epoch 181: weight = 1.956, loss = 0.00277978
Prediction after training: f(17) = 33.419
