### Creating a training pipeline in PyTorch

Typically the training pipeline has 3 steps:
<ul>
    <li>Design the model - figure out the input size, the output size and the forward pass</li>
    <li>Construct the loss function</li>
    <li>Construct the training loop - forward pass, loss, backward pass, update parameters</li>
</ul>

In [1]:
import torch
import torch.nn as nn # Import the Neural Network module

# Assume the same linear regression model
# y = Wx + b = 2x + 3

# # Inputs
# X = torch.tensor([1, 2, 3, 4], dtype=torch.float32)

# # Outputs
# Y = torch.tensor([5, 7, 9, 11], dtype=torch.float32)

# For nn.Linear, the inputs and outputs are declared differently
# Number of rows = number of samples
# Number of columns = number of features
# Inputs
X = torch.tensor([[1],
                 [2],
                 [3],
                 [4],], dtype=torch.float32)

# Outputs
Y = torch.tensor([[5],
                 [7],
                 [9],
                 [11]], dtype=torch.float32)

# We need the number of samples and the number of features
# This is easily obtained from X or the input (rows and columns respectively)
num_samples, num_features = X.shape

# We also need the number of outputs
# This is easily obtained from Y or the output (columns)
num_outputs = Y.shape[1]

# We do not need the weights and biases
# These are handled by the nn.Linear() function
# # Weights and biases
# # We need to calculate the gradient of the loss function with respect
# # to the weights and biases. So we need to add the requires_grad=True
# # flag to the tensors
# W = torch.tensor(0, dtype=torch.float32, requires_grad=True) # Assume that we start with 0 weight initially and there is only one input dimension
# b = torch.tensor(0, dtype=torch.float32, requires_grad=True) # Assume that we start with 0 bias initially

# We do not need this
# This will be handled by the nn.Linear() function
# # Predict
# def forward(W, X, b):
#     return W*X+b
model = nn.Linear(num_features, num_outputs)

# We do not need this
# This will be handled by the nn.MSELoss() function
# # Loss
# # Assume we are going with L = MSE
# def loss(Y, Yhat):
#     return ((Yhat - Y)**2).mean()
loss = nn.MSELoss()

# We do not need this
# This will be handled by the autograd library in PyTorch
# # Gradient - dL
# def gradient(X, Y, Yhat):
#     dW = np.dot(-2*X, Y-Yhat).mean()
#     db = -2*((Y-Yhat)).mean()
#     return dW, db

# We also need an optimiser
# This will be done using the torch.optim.SGD() function
alpha = 0.1
# optimiser = torch.optim.SGD([W, b], lr=alpha) # This is used when the weights and biases are manually defined
optimiser = torch.optim.SGD(model.parameters(), lr=alpha)

In [2]:
# Training
epochs = 100 # Converges at alpha = 0.1

for epoch in range(epochs):
    print("Epoch " + str(epoch+1))
    # Forward pass
    # Yhat = forward(W, X, b)
    Yhat = model(X)
    print("Prediction: " + str(Yhat))
    # Loss prediction
    prediction_loss = loss(Y, Yhat)
    print("Loss: " + str(prediction_loss))
    # Backward pass
    # dW, db = gradient(X, Y, Yhat)
    prediction_loss.backward()
    # Update weights and biases
    # with torch.no_grad():
    #     W -= alpha * W.grad
    #     b -= alpha * b.grad
    optimiser.step()
    # Zero out the gradients
    # W.grad.zero_()
    # b.grad.zero_()
    optimiser.zero_grad()

Epoch 1
Prediction: tensor([[1.3361],
        [1.7311],
        [2.1261],
        [2.5211]], grad_fn=<AddmmBackward0>)
Loss: tensor(40.0817, grad_fn=<MseLossBackward0>)
Epoch 2
Prediction: tensor([[ 5.9874],
        [ 9.8193],
        [13.6512],
        [17.4831]], grad_fn=<AddmmBackward0>)
Loss: tensor(18.1471, grad_fn=<MseLossBackward0>)
Epoch 3
Prediction: tensor([[2.9147],
        [4.4210],
        [5.9273],
        [7.4337]], grad_fn=<AddmmBackward0>)
Loss: tensor(8.2899, grad_fn=<MseLossBackward0>)
Epoch 4
Prediction: tensor([[ 5.0162],
        [ 8.0588],
        [11.1015],
        [14.1441]], grad_fn=<AddmmBackward0>)
Loss: tensor(3.8558, grad_fn=<MseLossBackward0>)
Epoch 5
Prediction: tensor([[3.6494],
        [5.6413],
        [7.6332],
        [9.6251]], grad_fn=<AddmmBackward0>)
Loss: tensor(1.8571, grad_fn=<MseLossBackward0>)
Epoch 6
Prediction: tensor([[ 4.6054],
        [ 7.2806],
        [ 9.9559],
        [12.6312]], grad_fn=<AddmmBackward0>)
Loss: tensor(0.9523, grad_f

It is also possible to implement a custom model by:
<ul>
    <li>Creating a class inheriting the nn.Module class</li>
    <li>Overriding the __init__() and forward() methods</li>
    <ul>
        <li>The __init__() function will usually have the input and output sizes specified and call the super method. Next it will define the model itself or the layers of the model.</li>
        <li>The forward() function will return the prediction</li>
    </ul>
</ul>

In [3]:
class LinearRegression(nn.Module): # This class inherits the nn.Module class
    def __init__(self, input_size, output_size): # Needs to override the __init__
        super(LinearRegression, self).__init__()
        self.linear = nn.Linear(input_size, output_size)
        
    def forward(self, x): # Also needs to override the forward() forward pass
        return self.linear(x)

In [4]:
model = LinearRegression(num_features, num_outputs)
optimiser = torch.optim.SGD(model.parameters(), lr=alpha)

# Very simplified training loop
for epoch in range(epochs):
    print("Epoch " + str(epoch+1))
    prediction = model(X)
    prediction_loss = loss(prediction, Y)
    prediction_loss.backward()
    optimiser.step()
    optimiser.zero_grad()
    print(f"Prediction = {prediction}, Loss = {prediction_loss.item():.3f}")

Epoch 1
Prediction = tensor([[-1.3155],
        [-2.1327],
        [-2.9498],
        [-3.7670]], grad_fn=<AddmmBackward0>), Loss = 121.039
Epoch 2
Prediction = tensor([[ 6.7677],
        [11.9254],
        [17.0832],
        [22.2409]], grad_fn=<AddmmBackward0>), Loss = 54.770
Epoch 3
Prediction = tensor([[1.4252],
        [2.5414],
        [3.6576],
        [4.7737]], grad_fn=<AddmmBackward0>), Loss = 24.992
Epoch 4
Prediction = tensor([[ 5.0766],
        [ 8.8639],
        [12.6513],
        [16.4387]], grad_fn=<AddmmBackward0>), Loss = 11.598
Epoch 5
Prediction = tensor([[2.6994],
        [4.6611],
        [6.6228],
        [8.5845]], grad_fn=<AddmmBackward0>), Loss = 5.562
Epoch 6
Prediction = tensor([[ 4.3596],
        [ 7.5099],
        [10.6602],
        [13.8105]], grad_fn=<AddmmBackward0>), Loss = 2.831
Epoch 7
Prediction = tensor([[ 3.3125],
        [ 5.6327],
        [ 7.9529],
        [10.2731]], grad_fn=<AddmmBackward0>), Loss = 1.586
Epoch 8
Prediction = tensor([[ 4.0775

Prediction = tensor([[ 4.8415],
        [ 6.9232],
        [ 9.0049],
        [11.0866]], grad_fn=<AddmmBackward0>), Loss = 0.010
Epoch 76
Prediction = tensor([[ 4.8463],
        [ 6.9255],
        [ 9.0047],
        [11.0840]], grad_fn=<AddmmBackward0>), Loss = 0.009
Epoch 77
Prediction = tensor([[ 4.8509],
        [ 6.9277],
        [ 9.0046],
        [11.0815]], grad_fn=<AddmmBackward0>), Loss = 0.009
Epoch 78
Prediction = tensor([[ 4.8554],
        [ 6.9299],
        [ 9.0045],
        [11.0790]], grad_fn=<AddmmBackward0>), Loss = 0.008
Epoch 79
Prediction = tensor([[ 4.8597],
        [ 6.9320],
        [ 9.0043],
        [11.0767]], grad_fn=<AddmmBackward0>), Loss = 0.008
Epoch 80
Prediction = tensor([[ 4.8639],
        [ 6.9340],
        [ 9.0042],
        [11.0744]], grad_fn=<AddmmBackward0>), Loss = 0.007
Epoch 81
Prediction = tensor([[ 4.8680],
        [ 6.9360],
        [ 9.0041],
        [11.0721]], grad_fn=<AddmmBackward0>), Loss = 0.007
Epoch 82
Prediction = tensor([[ 4.87