### Imports and Model class

In [1]:
import torch
from torch import nn

In [3]:
class LinearRegressionModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.weights = nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float))
        self.bias = nn.Parameter(torch.randn(1, requires_grad=True, dtype=torch.float))

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.weights * x + self.bias

**Notes**:
- `torch.nn` is the base package for everything neural networks, building computation graphs, etc.
- `torch.nn.Module` is the base class extended to write your own neural networks. The class should initialize the required parameters (using `nn.Parameter` module) or using any of the pre-built layers as required.


### Make Predictions using PyTorch Inference mode

In [None]:
with torch.inference_mode(): # alternatively, use `torch.no_grad()` - this is from the older API.
    m = LinearRegressionModel()
    x = torch.randn(10, requires_grad=True, dtype=torch.float)
    y = m(x)

display(x, y)

tensor([ 0.3887, -1.7681,  1.5649,  1.1250, -0.2040, -1.3876, -0.5842, -0.0266,
        -1.4381, -0.3459], requires_grad=True)

tensor([ 0.2426, -1.7963,  1.3546,  0.9387, -0.3176, -1.4366, -0.6770, -0.1499,
        -1.4843, -0.4518])

### Train a *proper* model

In [10]:
loss_fn = nn.L1Loss()
optimizer = torch.optim.SGD(m.parameters(), lr=0.001)

#### Training loop (and a testing loop?)

1. Loop through the data
2. Forward pass
3. Calculate the loss
4. Optimizer Zero grad
5. Backprop - Traverse backward to calculate the gradients w.r.t the loss
6. Gradient Descent - Adjust the parameters to improve the model based on teh calculated gradients

In [21]:
torch.manual_seed(42) # for reproducibility

X, y = torch.randn(100, 1), torch.randn(100, 1)
X_test = torch.randn(10, 1)

In [22]:
m = LinearRegressionModel()

In [None]:
epoch_count = []
loss_values = []

In [28]:
torch.manual_seed(42) # for reproducibility
epochs = 400

# 1. Loop through the data
for epoch in range(epochs):
    m.train()  # Set the model to training mode
    # 2. Forward pass
    y_pred = m(X)
    
    # 3. Compute loss
    loss = loss_fn(y_pred, y)
    
    # 4. Zero gradients
    optimizer.zero_grad() # clear the gradients from the previous step (happens in step 6 from the previous epoch)

    # 5. Backpropagation
    loss.backward()
    
    # 6. Gradient Descent - Update weights
    optimizer.step()

    # 7. Print loss
    if epoch % 10 == 0:
        epoch_count.append(epoch)
        loss_values.append(loss)
        
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

Epoch 1/400, Loss: 1.6250
Epoch 11/400, Loss: 1.6250
Epoch 21/400, Loss: 1.6250
Epoch 31/400, Loss: 1.6250
Epoch 41/400, Loss: 1.6250
Epoch 51/400, Loss: 1.6250
Epoch 61/400, Loss: 1.6250
Epoch 71/400, Loss: 1.6250
Epoch 81/400, Loss: 1.6250
Epoch 91/400, Loss: 1.6250
Epoch 101/400, Loss: 1.6250
Epoch 111/400, Loss: 1.6250
Epoch 121/400, Loss: 1.6250
Epoch 131/400, Loss: 1.6250
Epoch 141/400, Loss: 1.6250
Epoch 151/400, Loss: 1.6250
Epoch 161/400, Loss: 1.6250
Epoch 171/400, Loss: 1.6250
Epoch 181/400, Loss: 1.6250
Epoch 191/400, Loss: 1.6250
Epoch 201/400, Loss: 1.6250
Epoch 211/400, Loss: 1.6250
Epoch 221/400, Loss: 1.6250
Epoch 231/400, Loss: 1.6250
Epoch 241/400, Loss: 1.6250
Epoch 251/400, Loss: 1.6250
Epoch 261/400, Loss: 1.6250
Epoch 271/400, Loss: 1.6250
Epoch 281/400, Loss: 1.6250
Epoch 291/400, Loss: 1.6250
Epoch 301/400, Loss: 1.6250
Epoch 311/400, Loss: 1.6250
Epoch 321/400, Loss: 1.6250
Epoch 331/400, Loss: 1.6250
Epoch 341/400, Loss: 1.6250
Epoch 351/400, Loss: 1.6250
Epo

In [29]:
m.eval()  # Set the model to evaluation mode
with torch.inference_mode():
    test_pred = m(torch.randn(10, 1))
    test_loss = loss_fn(test_pred, X_test)
    print(f"Test Loss: {test_loss.item():.4f}")



Test Loss: 1.5558


### Saving and loading a PyTorch Model
- `torch.save(MODEL_OBJ, PATH)` - Saves the PyTorch model object.
- `torch.load(MODEL_OBJ, PATH)` - Loads the saved PyTorch model object.
- `torch.nn.Module.load_state_dict()` - Loads the python dictionary that maps each layer(?) to it's parameter.

In [30]:
from pathlib import Path

MODEL_PATH = Path("first_model")
MODEL_PATH.mkdir(parents=True, exist_ok=True)
MODEL_NAME = "first_linear_regression_model.pth" # conventionally, .pt or .pth is used for PyTorch models
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

In [31]:
torch.save(obj=m.state_dict(), f=MODEL_SAVE_PATH) # save the model's state_dict

In [33]:
m_reloaded = LinearRegressionModel() # create a new instance of the model
m_reloaded.load_state_dict(torch.load(MODEL_SAVE_PATH)) # load the state_dict into the new model instance

<All keys matched successfully>

In [32]:
m.state_dict()

OrderedDict([('weights', tensor([-0.4749])), ('bias', tensor([1.6640]))])

In [34]:
m_reloaded.state_dict()

OrderedDict([('weights', tensor([-0.4749])), ('bias', tensor([1.6640]))])

### Putting it all back together (again!)

*Writing a device agnostic code*

#### Imports and device setting

In [1]:
import torch
from torch import nn

torch.__version__

'2.6.0'

In [2]:
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")

Using device: cpu


#### 1. Data

In [3]:
weight = 0.7
bias = 0.3

start = 0; end = 1; step = 0.02
X = torch.arange(start, end, step).unsqueeze(dim=1).to(device)
y = weight * X + bias

In [4]:
X.shape, y.shape

(torch.Size([50, 1]), torch.Size([50, 1]))

In [5]:
train_split = int(0.8 * len(X))
X_train, y_train = X[:train_split], y[:train_split]
X_test, y_test = X[train_split:], y[train_split:]

X_train.shape, y_train.shape, X_test.shape, y_test.shape

(torch.Size([40, 1]),
 torch.Size([40, 1]),
 torch.Size([10, 1]),
 torch.Size([10, 1]))

#### 2. Build a PyTorch Model

In [6]:
class LinearRegressionModelv2(nn.Module):
    def __init__(self):
        super().__init__()
        self.linear = nn.Linear(in_features=1, out_features=1) # in_features = number of input features, out_features = number of output features

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        return self.linear(x)


In [12]:
torch.manual_seed(42) # for reproducibility
model_v2 = LinearRegressionModelv2().to(device) # create an instance of the model and move it to the device

In [13]:
model_v2.state_dict() # check the model's state_dict

OrderedDict([('linear.weight', tensor([[0.7645]])),
             ('linear.bias', tensor([0.8300]))])

#### 3. Train the model

Needs:
- Loss function
- Optimizer
- Training loop
- Testing loop

In [14]:
loss_fn = nn.L1Loss() # MAE loss function
optimizer = torch.optim.SGD(params=model_v2.parameters(), lr=0.001) # SGD optimizer with learning rate of 0.001

In [15]:
torch.manual_seed(42)

epochs = 400

for epoch in range(epochs):
    model_v2.train() # set the model to training mode

    y_pred = model_v2(X_train) # forward pass
    loss = loss_fn(y_pred, y_train) # compute loss
    optimizer.zero_grad() # zero gradients
    loss.backward() # backpropagation
    optimizer.step() # update weights
    if epoch % 10 == 0:
        print(f"Epoch {epoch+1}/{epochs}, Loss: {loss.item():.4f}")

    model_v2.eval() # set the model to evaluation mode

    with torch.inference_mode():
        test_pred = model_v2(X_test)
        test_loss = loss_fn(test_pred, y_test)
        if epoch % 10 == 0:
            print(f"Test Loss: {test_loss.item():.4f}")

Epoch 1/400, Loss: 0.5552
Test Loss: 0.5861
Epoch 11/400, Loss: 0.5437
Test Loss: 0.5726
Epoch 21/400, Loss: 0.5321
Test Loss: 0.5592
Epoch 31/400, Loss: 0.5206
Test Loss: 0.5457
Epoch 41/400, Loss: 0.5091
Test Loss: 0.5322
Epoch 51/400, Loss: 0.4976
Test Loss: 0.5187
Epoch 61/400, Loss: 0.4861
Test Loss: 0.5053
Epoch 71/400, Loss: 0.4745
Test Loss: 0.4918
Epoch 81/400, Loss: 0.4630
Test Loss: 0.4783
Epoch 91/400, Loss: 0.4515
Test Loss: 0.4649
Epoch 101/400, Loss: 0.4400
Test Loss: 0.4514
Epoch 111/400, Loss: 0.4284
Test Loss: 0.4379
Epoch 121/400, Loss: 0.4169
Test Loss: 0.4245
Epoch 131/400, Loss: 0.4054
Test Loss: 0.4110
Epoch 141/400, Loss: 0.3939
Test Loss: 0.3975
Epoch 151/400, Loss: 0.3824
Test Loss: 0.3840
Epoch 161/400, Loss: 0.3708
Test Loss: 0.3706
Epoch 171/400, Loss: 0.3593
Test Loss: 0.3571
Epoch 181/400, Loss: 0.3478
Test Loss: 0.3436
Epoch 191/400, Loss: 0.3363
Test Loss: 0.3302
Epoch 201/400, Loss: 0.3248
Test Loss: 0.3167
Epoch 211/400, Loss: 0.3132
Test Loss: 0.3032

In [16]:
model_v2.state_dict()

OrderedDict([('linear.weight', tensor([[0.6085]])),
             ('linear.bias', tensor([0.4300]))])

#### 4. Making predictions

*Ideally, should be used on unseen data.*

In [None]:
model_v2.eval()

with torch.inference_mode():
    y_pred = model_v2(X_test)

y_pred

tensor([[0.9168],
        [0.9290],
        [0.9412],
        [0.9534],
        [0.9655],
        [0.9777],
        [0.9899],
        [1.0020],
        [1.0142],
        [1.0264]])

In [18]:
y_test

tensor([[0.8600],
        [0.8740],
        [0.8880],
        [0.9020],
        [0.9160],
        [0.9300],
        [0.9440],
        [0.9580],
        [0.9720],
        [0.9860]])

#### Saving and load the trained model

In [19]:
from pathlib import Path
MODEL_PATH = Path("first_model")
MODEL_PATH.mkdir(parents=True, exist_ok=True)
MODEL_NAME = "first_linear_regression_model_v2.pth" # conventionally, .pt or .pth is used for PyTorch models
MODEL_SAVE_PATH = MODEL_PATH / MODEL_NAME

In [20]:
torch.save(obj=model_v2.state_dict(), f=MODEL_SAVE_PATH) # save the model's state_dict
model_v2_reloaded = LinearRegressionModelv2().to(device) # create a new instance of the model
model_v2_reloaded.load_state_dict(torch.load(MODEL_SAVE_PATH)) # load the state_dict into the new model instance
model_v2_reloaded.state_dict() # check the reloaded model's state_dict

OrderedDict([('linear.weight', tensor([[0.6085]])),
             ('linear.bias', tensor([0.4300]))])

In [21]:
model_v2.state_dict()

OrderedDict([('linear.weight', tensor([[0.6085]])),
             ('linear.bias', tensor([0.4300]))])