In [None]:
import torch
from torch import nn # contains all the neural network building blocks
from torch.optim import SGD
import numpy as np
import matplotlib.pyplot as plt

In [None]:
print(torch.__version__)

## The goal of this notebook is to explore an example PyTorch workflow
1. Data Preparation
2. Building or using an existing model
3. Fitting the model to the data (training)
4. Making precitions and evaluating a model (inference)
5. Saving and loading a model

### Data (prep and load)
The original data can come in many forms. The goal is to reformat the data numerically such that it can be represented by tensors.

In [None]:
# Lets synthesize the input data by using the output of a 1st degree polynomial with known slope (weight) and intercept (bias)
def lin_reg(X: torch.Tensor, W: torch.Tensor, b: torch.tensor) -> torch.tensor:
    """
    Returns the result of evaluating a first order polynomial given the weight and bias values and an input
    vector

    Inputs
        x: independent variable vector (input)
        W: the weight value (slope)
        b: the bias value (intercept)

    Outputs
        tensor of size x.shape, W.shape
    """

    return W * X + b

In [None]:
# The output of this cell will be used as input to train (and test, using different input range) the model

# Define range
start = 0
stop = 1
step = 0.02

# Define bias and weight values
weight = 0.7 # slope
bias = 0.3 # intercept

X = torch.arange(start, stop, step).unsqueeze(dim=1) # Creating a column vector
y = lin_reg(X, weight, bias)
print(X[:10])
print(y[:10])

In [None]:
# The goal is to create a model that can learn the mathematical function that takes the feature variables (train_X) to the labels (train_y)
# First we, need to divide the data (features AND labels) into 3 distinct sets: Test, Validation, Training
# For this example, only train and test sets will be used

# Train set is used to train the model (typically 80% of the data)
train_stop = int(0.8 * len(X))
train_X = X[:train_stop]
train_y = y[:train_stop]

# Test set is used to test accuracy of model on unseen data (20% of the data in this case)
test_X = X[train_stop:]
test_y = y[train_stop:]

print(f'Test X Len: {len(test_X)}\nTrain X: {len(train_X)}\nTest y: {len(test_y)}\nTrain y: {len(train_y)}')

In [None]:
# Time to create a function to visualize the data along with the models predictions (no predictions yet...)
def plot_data(feature_train: torch.Tensor, feature_test: torch.Tensor, label_train: torch.Tensor, label_test: torch.Tensor, preds: torch.Tensor=None):
    """
    Creates plot depicting the output vs feature, output in {training, test, and prediction data}

    Inputs
        feature_train: Tensor with the feature training data
        feature_test: Tensor with the feature test data
        output_train: Tensor with the output training data
        output_test: Tensor with the output test data
        preds: Tensor with the predictions of model given the feature_test data as input
    """
    plt.figure()
    plt.xlabel('Features')
    plt.ylabel('Labels')
    # plot the feature vs output for training data
    plt.scatter(feature_train, label_train, c='b', s=3, label='Training Data')
    # plot the feature vs output for test data
    plt.scatter(feature_test, label_test, c='g', s=3, label='Test Data')
    # plot the feature vs prediction (feature_test as input)
    if preds is not None:
        plt.scatter(feature_test, preds, c='r', s=3, label='Predictions')
    plt.legend()
    plt.show()

In [None]:
# As expected the function between the feature data and the output data is linear, based on the defined function
plot_data(train_X, test_X, train_y, test_y)

In [None]:
# Now to build the model. Will use Linear Regression
# PyTorch models are class based
class LinearRegressionModel(nn.Module): # Basically every neural network model inherits from nn.Module
    def __init__(self):
        super().__init__() # also calling init from parent class; req'd because we will be overloading methods from parent

        ### Initialize parameters for the model ####

        # Parameter is like a helper class that takes in a tensor and adds it to the model appropriately.
        # For more complicated models, the input tensor can have higher rank or instance variable will use other classes, like "nn.Conv1d"
        # Want grad to be true so that PyTorch can do autograd for backprop
        self.weights = nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float32))

        # Now to create another Parameter for the bias variable
        self.bias = nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float32))


    ### Define the forward pass logic ###

    # The forward pass function defines the computation that should be used
    # to predict the correct output labels from the input training data.
    # Of course, we'll use a first order linear function here:
    # NOTE: forward() is REQUIRED when defining a subclass of nn.Module (it overrides parent class instance method forward())
    def forward(self, X: torch.Tensor) -> torch.Tensor:
        """
        X is input tensor
        """
        return self.weights * X + self.bias



In [None]:
# Now lets inspect the model to see the parameters we've created and their values
seed = torch.manual_seed(6.28)
model = LinearRegressionModel()
print(tuple(model.parameters())) # method returns a generator

# Can also see a dict of the parameters with their associated values
model.state_dict()

In [None]:
# To see how the model predicts values using the initialized parameters
# can use inference mode context manager. Context manager turns off autograd
# since inference is for predictions only.
with torch.inference_mode():
    y_preds = model(test_X)
print(y_preds)
plot_data(train_X, test_X, train_y, test_y, y_preds) # The prediction is obviously way off, negative slope and huge shift in interecept

In [None]:
# Define loss and optimization functions
loss_fn = nn.L1Loss() # This is the mean squared error (can also use summed square error)
optimizer = SGD(params=model.parameters(), # stochastic graident descent
                lr=0.1) # lr is learning rate

In [None]:
# Lets create a training and testing loop to tune the parameters of our model
# and view how it's doing periodically.

# These lists will be used to store key values at different stages of training
# to show the model changing
epoch_count = []
train_loss_vals = []
test_loss_vals = []
epochs = 100

for epoch in range(epochs):
    ### Training Code ###
    model.train() # put model in training mode
    train_preds = model(train_X) # get models current predictions; NOTE: this will flag autograd without context manager
    train_loss = loss_fn(train_preds, train_y) # calculate a tensor with the loss values
    optimizer.zero_grad() # zero out the calculated gradients, it's accumulated by default
    train_loss.backward() # backprop to calculate gradients
    optimizer.step() # update weights based on calculated gradients

    ### Testing Code ###
    model.eval() # put model in evaluate mode
    with torch.inference_mode():
        test_preds = model(test_y) # get test preds after most recent param update
        test_loss = loss_fn(test_preds, test_y) # get the loss between test preds and test labels
    
    ### Diag Code ###
    if epoch % 10 == 0:
        epoch_count.append(epoch)
        train_loss_vals.append(train_loss)
        test_loss_vals.append(test_loss)
        print(f'Epoch: {epoch} | MAE Training Loss: {train_loss} | MAE Test Loss: {test_loss}')



In [None]:
# Now lets see how the model predictions match the true values after initial training
with torch.inference_mode():
    y_preds = model(test_X)
plot_data(train_X, test_X, train_y, test_y, y_preds) # The prediction is much better than before, but still off slightly

In [None]:
# Now lets bundle the loop into a function so that the hyperparameters can be tuned faster
def train_model(model: LinearRegressionModel,
                feature_train: torch.Tensor = train_X,
                feature_test: torch.Tensor = test_X,
                label_train: torch.Tensor = train_y,
                label_test: torch.Tensor = test_y,
                epochs: int = 100,
                lr: float = 0.1) -> dict:

    # Define loss and optimization functions
    loss_fn = nn.L1Loss()
    optimizer = SGD(params=model.parameters(),
                    lr=lr) # lr is learning rate

    # Define lists for useful values
    epoch_count = []
    train_loss_vals = []
    test_loss_vals = []

    # Train/Test loop
    for epoch in range(epochs):
        ### Training Code ###
        model.train() # put model in training mode
        train_preds = model(feature_train) # get models current predictions; NOTE: this will flag autograd without context manager
        train_loss = loss_fn(train_preds, label_train) # calculate a tensor with the loss values
        optimizer.zero_grad() # zero out the calculated gradients, it's accumulated by default
        train_loss.backward() # backprop to calculate gradients
        optimizer.step() # update weights based on calculated gradients

        ### Testing Code ###
        model.eval() # put model in evaluate mode
        with torch.inference_mode():
            test_preds = model(label_test) # get test preds after most recent param update
            test_loss = loss_fn(test_preds, label_test) # get the loss between test preds and test labels

        ### Diag Code ###
        if epoch % 10 == 0: # record progress every 10 epochs
            epoch_count.append(epoch)
            train_loss_vals.append(train_loss.item())
            test_loss_vals.append(test_loss.item())
            print(f'Epoch: {epoch} | MAE Training Loss: {train_loss} | MAE Test Loss: {test_loss}')
    
    # Return useful values as dict
    return {
        'epochs': epoch_count,
        'train_loss': train_loss_vals,
        'test_loss': test_loss_vals
    }

In [None]:
# Can also define a function that plots the training and test losses as a function of epoch
def plot_loss(train_loss_vals: list, test_loss_vals: list, epoch_vals: list) -> None:
    """
    Creates plot depicting the loss curves as function of epoch
    """
    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    # plot the test loss vs epoch
    plt.plot(epoch_vals, test_loss_vals, c='b', label='Test Loss')
    # plot the train loss vs epoch
    plt.plot(epoch_vals, train_loss_vals, c='g', label='Training Loss')
    plt.legend()
    plt.show()

In [None]:
# Changing the epoch count (while also reinitializing the model to get original weights)
seed = torch.manual_seed(6.28)
model = LinearRegressionModel()
print(model.state_dict())

vals = train_model(
    model,
    epochs=200,
    lr=0.1
)

with torch.inference_mode():
    y_preds = model(test_X)
plot_loss(vals['train_loss'], vals['test_loss'], vals['epochs'])
plot_data(train_X, test_X, train_y, test_y, y_preds) # The prediction is much better than the naive one, but still off slightly

In [None]:
# Changing the epoch count and lr (while also reinitializing the model to get original weights)
seed = torch.manual_seed(6.28)
model = LinearRegressionModel()
print(model.state_dict())

vals = train_model(
    model,
    epochs=200,
    lr=0.01
)

with torch.inference_mode():
    y_preds = model(test_X)
plot_loss(vals['train_loss'], vals['test_loss'], vals['epochs'])
plot_data(train_X, test_X, train_y, test_y, y_preds) # Changing the learning rate slowed down learning a lot, lets up the epoch count

In [None]:
# Increasing the epoch count (while also reinitializing the model to get original weights)
seed = torch.manual_seed(6.28)
model = LinearRegressionModel()
print(model.state_dict())

vals = train_model(
    model,
    epochs=1000,
    lr=0.04
)

with torch.inference_mode():
    y_preds = model(test_X)
plot_loss(vals['train_loss'], vals['test_loss'], vals['epochs'])
plot_data(train_X, test_X, train_y, test_y, y_preds) # The prediction is much better than before

In [None]:
# Now lets try moving everything to the GPU and doing inference there (NOTE: Seems like training on the M1 isn't supported yet (autograd not supp))
model.to('mps')
test_X = test_X.to('mps')


with torch.inference_mode():
    y_preds = model(test_X) # inference successfully done on the GPU!
plot_data(train_X, test_X.to(device='cpu'), train_y, test_y, y_preds.to(device='cpu')) # Have to move the tensors back to CPU before using NumPy (called by plt)

## Saving a trained model

Saving a trained model to use for inference later is important for portability. The most common way is by saving the trained model's `state_dict`, which contains the optimized model parameters for a given training set. Pytorch doesn't have a defined data structure for this, it uses Pickle to just serialize the `state_dict`.

In [None]:
# Define model directory
MODEL_DIR = '../../data/models/'
MODEL_FN = 'lin_reg_model_0.pth'

# Define function to save the model
def save_model(model_dir: str, filename: str,  model: nn.Module) -> None:
    print(f'Saving model state_dict to {model_dir + filename}')
    torch.save(obj=model.state_dict(), f=model_dir + filename)

save_model(MODEL_DIR, MODEL_FN, model)

In [None]:
# Now that the trained model parameters are saved, can create a new model instance
# and load the saved parameters.

loaded_model = LinearRegressionModel()
loaded_model.load_state_dict(torch.load(MODEL_DIR + MODEL_FN)) # self_documenting code here!
print(tuple(loaded_model.parameters())) 
print(tuple(model.parameters())) # note how the two parameters are identical!