In [None]:
workflow = {
    1: "data (prepare and load)",
    2: "build model",
    3: "fitting the model to data (training)",
    4: "making predictions and evaluating a model (inference)",
    5: "saving and loading a model",
    6: "putting it all together"
}
workflow

In [None]:
import torch
from torch import nn # nn contain all of PT building blocks
import matplotlib.pyplot as plt

# Py version
torch.__version__

In [None]:
torch.version.cuda

In [None]:
torch.cuda.is_available()

## 1. Data (preparing and loading)

1. Get data into numerical representation
2. Build a model to learn patterns in the numerical representation

Create known data using linear regression formula  
linear regression formula creates a straight line with known parameters

In [None]:
# Create *known* parameters
weight = 0.7
bias = 0.3 

# create range
start = 0
end = 1
step = 0.02
X = torch.arange(start, end, step).unsqueeze(dim=1)
y = weight * X + bias

X[:10], y[:10]

In [None]:
len(X), len(y)

### Splitting data into training and test sets

In [None]:
# create train/test split
train_split = int(0.8 * len(X))
X_train, y_train = X[:train_split], y[:train_split]
X_test, y_test = X[train_split:], y[train_split:]

len(X_train), len(y_train), len(X_test), len(y_test)

### Visualize data

In [None]:
def plot_predictions(train_data=X_train,
                     train_labels=y_train,
                     test_data=X_test,
                     test_labels=y_test,
                     predictions=None):
    """
    Plots training data, test data and compares predictions.
    """
    plt.figure(figsize=(10,7))

    # Plot training data in blue
    plt.scatter(train_data, train_labels, c="b", s=4, label="Training data")

    # Plot test data in green
    plt.scatter(test_data, test_labels, c="g", s=4, label="Testing data")

    # if predictions
    if predictions is not None:
        # Plot the predictions if they exist
        plt.scatter(test_data, predictions, c="r", s=4, label="Predictions")
    
    plt.legend(prop={"size": 14});

In [None]:
plot_predictions();

## 2. Build model

model explanation:
* starts with random values (weight & bias)
* look at training data and adjust the random values to better represent the ideal values
* accomplished through 2 algos:
    1. Gradient descent
    2. Backpropagation

In [None]:
# linear regression model
# most things in PyTorch inherit from nn.Module
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(1, # start with random value
                                                requires_grad=True, # update via gradient descent
                                                dtype=torch.float)) 
        self.bias = nn.Parameter(torch.randn(1,
                                              requires_grad=True,
                                              dtype=torch.float))
        
    # forward method to define the computation in model
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.weights * x + self.bias # linear regression formula 
    

### PyTorch model building essentials

* torch.nn - contains building blocks for computational graphs
* torch.nn.Parameter - what parameters should the model try to learn, often a PT layer from torch.nn will set these
* torch.nn.Module - base clas for nn modules, if subclassed always override the foward method
* torch.optim - is where to find the optimizers for gradient descent
* def forward() - all nn.Module subclasses require this to be overriden, defines what happens in the forward computation

In [None]:
### checking contents of the PyTorch model
# .parameters() displays the parameters for the model
torch.manual_seed(42)

# instance of model (subclass of nn.Module)
model_0 = LinearRegressionModel()

# see parameters
list(model_0.parameters())

In [None]:
model_0.state_dict()

### make predictions using torch.inference_mode()
data passed through model goes through forward method

In [None]:
# make predictions with model
with torch.inference_mode():
    y_preds = model_0(X_test)

y_preds

In [None]:
plot_predictions(predictions=y_preds)

## 3. Training model

idea behind a model is to move from unknown parameters to some know parameters

loss 

This needed to train:
- *loss* (criterion, cost) function  can help measuring how wrong a model's predictions are.
- *Optimizer* - account for loss of model and adjust model parameters to improve the loss function
  
PyTorch:
- training loop
- test loop

In [None]:
# Setup loss function
loss_fn = nn.L1Loss()

# Setup optimizer
optimizer = torch.optim.SGD(params=model_0.parameters(),
                            lr=0.01 # hyperparameter (something developer sets) learning rate
                            )

### building a training (and testing) loop in PyTorch

Need:
0. Loop data
1. Forward pass (data moving through the model's forward()) to make predictions  - aka forward propagation
2. Calculate the loss (compare forward pass to ground truth labels)
3. Loss backward (***backpropagation***) - move backwards to calcualte the gradients of the parameters with respect to the loss
4. Optimizers -  use the optimizer to adjust the model's parametes attempting to improve the loss (***gradient descent**) 

In [None]:
torch.manual_seed(42)
# epoch (hyperparameter) is one loop through the data
epochs = 200

# tracking model metrics
epoch_count = []
loss_values = []
test_loss_values = []

### Training
# 0. loop through data
for epoch in range(epochs):
    # Set model to training mode
    model_0.train() # train mode in PyTorch sets all parameters which require gradients to require gradients

    # 1. Forward pass
    y_pred = model_0(X_train)

    # 2. calc loss
    loss = loss_fn(y_pred, y_train)

    # 3. Optimizer zero grad
    optimizer.zero_grad()

    # 4. perform backpop on loss with repsec tot parameters of model
    loss.backward()

    # 5. Step the optimizer (perform gradient descent)
    optimizer.step() # by default how the optimizers changes will acculumate through the loop, re zeroes for step 3 of the next loop

    # Testing
    model_0.eval() # turns off settings not needed for evaluation (dropout, batch norm)
    with torch.inference_mode(): # turns off gradient tracking
        # 1. forward pass
        test_pred = model_0(X_test)

        # 2. Calc loss
        test_loss = loss_fn(test_pred, y_test)

    if epoch % 10 == 0:
        epoch_count.append(epoch)
        loss_values.append(loss)
        test_loss_values.append(test_loss)
        print(f"Epoch: {epoch} | MAE Train Loss: {loss} | MAE Test loss: {test_loss}")

        # print model state
        print(model_0.state_dict())


In [None]:
import numpy as np
plt.plot(epoch_count, np.array(torch.tensor(loss_values).cpu().numpy()), label="Train loss")
plt.plot(epoch_count, test_loss_values, label="Test loss")
plt.title("Training and test loss curves")
plt.ylabel("loss")
plt.ylabel("Epochs")
plt.legend()

In [None]:
# print model state
print(model_0.state_dict())

In [None]:
weight, bias

In [None]:
with torch.inference_mode():
    y_preds_new = model_0(X_test)


In [None]:
plot_predictions(predictions=y_preds_new)

# Saving a model in PyTorch

Three main methods
1. `torch.save()` - saves model in python pickle
2. `torch.load()` - loads saved model
3. `torch.nn.Module.load_state_dict()` - loads model's saved state dictionary

In [None]:
# Saving PyTorch model
from pathlib import Path

# 1. Create models directory
MODEL_PATH = Path("models")
MODEL_PATH.mkdir(parents=True, exist_ok=True)

# 2.Create model save path
MODEL_NAME = "01_pytorch_workflow_model_0.pth"
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

# 3. Save model state dictionary
print(f"Saving model to: {MODEL_SAVE_PATH}")
torch.save(obj=model_0.state_dict(), f=MODEL_SAVE_PATH)

# Loading PyTorch model
when saving a state_dict() -> a new model class instance is loaded with the saved state_dict()

In [None]:
# 1. Instantiate new model class
load_model_0 = LinearRegressionModel()

# 2. load saves state_dict
load_model_0.load_state_dict(torch.load(f=MODEL_SAVE_PATH))


In [None]:
load_model_0.state_dict()

In [None]:
# Make prediction
load_model_0.eval()
with torch.inference_mode():
    load_model_preds = load_model_0(X_test)

load_model_preds

In [None]:
model_0.eval()
with torch.inference_mode():
    y_preds = model_0(X_test)
y_preds

In [None]:
#  Compare models
y_preds == load_model_preds