## Step 1: Implment linear regression: replace the gradient function with autograd

- Recall key steps for training
    - Forward model (1) = compute prediction with model
    - Forward model (2) = Compute loss
    - Backward = compute gradients
    - Update weights 

- replace np array with pytorch tensor 
- replace gradient function with `loss.backward()`    

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import torch

def forward(w, x):
    return w * x

# MSE as the loss function
def loss(y, y_pred):
    return ((y_pred - y)**2).mean()

# don't need this any more as we use autograd
# MSE = j = 1/N * (w*x - y)**2
# dJ/dw = 2/N (w*x - y)*x
"""
def gradient(x, y, y_pred):
    return np.mean(2*x*(y_pred - y))
"""

# Train function
def train(learning_rate, n_iters, w, X, Y):
    # Convert inputs to PyTorch tensors
    w = torch.tensor(w, dtype=torch.float32, requires_grad=True)

    for epoch in range(n_iters):
        y_pred = forward(w, X)  # Forward pass
        l = loss(Y, y_pred)     # Loss
        
        # Backward pass, compute autograde
        l.backward()        

        # Update weights
        with torch.no_grad():
            w.data -= learning_rate * w.grad
        
        w.grad.zero_()  # Reset gradients
        
        # Print using .item() for scalars to avoid NumPy conversion
        print(f'epoch {epoch+1}: w = {w.item():.3f}, loss = {l.item():.8f}')
        
    print(f'Prediction after training: f(6) = {forward(w.item(), 6):.3f}')
    
    
# Define the data, make sure to use torch tensor, not np.array
X = torch.tensor([1.0, 2.0, 3, 4], dtype=torch.float32)
Y = torch.tensor([2.3, 3.4, 6.5, 6.8], dtype=torch.float32)

# Configration
learning_rate = 0.01
n_iters = 20
w_init = 30
train(learning_rate, n_iters, w_init, X, Y)


epoch 1: w = 25.779, loss = 5939.33496094
epoch 2: w = 22.191, loss = 4291.27685547
epoch 3: w = 19.141, loss = 3100.55566406
epoch 4: w = 16.549, loss = 2240.25878906
epoch 5: w = 14.346, loss = 1618.69470215
epoch 6: w = 12.473, loss = 1169.61450195
epoch 7: w = 10.881, loss = 845.15423584
epoch 8: w = 9.528, loss = 610.73156738
epoch 9: w = 8.378, loss = 441.36120605
epoch 10: w = 7.400, loss = 318.99108887
epoch 11: w = 6.569, loss = 230.57872009
epoch 12: w = 5.863, loss = 166.70083618
epoch 13: w = 5.262, loss = 120.54901886
epoch 14: w = 4.752, loss = 87.20434570
epoch 15: w = 4.318, loss = 63.11280441
epoch 16: w = 3.949, loss = 45.70667267
epoch 17: w = 3.636, loss = 33.13073730
epoch 18: w = 3.370, loss = 24.04463005
epoch 19: w = 3.143, loss = 17.47991371
epoch 20: w = 2.951, loss = 12.73690891
Prediction after training: f(6) = 17.704


## Step 2: Implment linear regression: replace the update weights (gradient descent) with an optimizor
- replace loss function with built-in loss function `loss = nn.MSELoss()`
- Update weights  with `  optimizer.step()`

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

# replace this with a Linear Model
"""
def forward(w, x):
    return w * x
"""

# don't need this any more as we use autograd
# MSE as the loss function
"""
def loss(y, y_pred):
   return ((y_pred - y)**2).mean()
"""

# don't need this any more as we use autograd
# MSE = j = 1/N * (w*x - y)**2
# dJ/dw = 2/N (w*x - y)*x
"""
def gradient(x, y, y_pred):
    return np.mean(2*x*(y_pred - y))
"""

# Train function
def train(n_iters, X, Y):
    for epoch in range(n_iters):
        y_pred = model(X)  # Forward pass
        # l = loss(Y, y_pred)     # Loss
        l = criterion(y_pred, Y)
        
        # Backward pass, compute autograde (directioin of change for each parameter)
        l.backward()        

        # Update weights
        optimizer.step()
        
        optimizer.zero_grad() # Reset gradients
        
        # Print using .item() for scalars to avoid NumPy conversion
        # Print w and b
        w = model.weight.item()  # Scalar value of weight
        b = model.bias.item()    # Scalar value of bias
        print(f'epoch {epoch+1}: w = {w:.3f}, b = {b:.3f}, loss = {l.item():.8f}')
    
# Define the data, make sure to use torch tensor, not np.array
X = torch.tensor([1.0, 2.0, 3, 4], dtype=torch.float32)
X = X.reshape(4, 1)
Y = torch.tensor([2.3, 3.4, 6.5, 6.8], dtype=torch.float32)
Y = Y.reshape(4, 1)

n_samples, n_features = X.shape 

# Linear model f = wx + b
input_size = n_features
output_size = 1
model = nn.Linear(input_size, output_size)


# Loss and optimizer
learning_rate = 0.01
criterion = nn.MSELoss()

# Stochastic Gradient Descent (SGD)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  


n_iters = 20

train(n_iters,  X, Y)

epoch 1: w = -0.212, b = -0.831, loss = 60.44245148
epoch 2: w = 0.140, b = -0.709, loss = 42.06572342
epoch 3: w = 0.434, b = -0.607, loss = 29.31435013
epoch 4: w = 0.678, b = -0.522, loss = 20.46628952
epoch 5: w = 0.881, b = -0.450, loss = 14.32666016
epoch 6: w = 1.051, b = -0.390, loss = 10.06634808
epoch 7: w = 1.192, b = -0.340, loss = 7.11005878
epoch 8: w = 1.309, b = -0.298, loss = 5.05860424
epoch 9: w = 1.406, b = -0.262, loss = 3.63499498
epoch 10: w = 1.487, b = -0.232, loss = 2.64703655
epoch 11: w = 1.555, b = -0.207, loss = 1.96136689
epoch 12: w = 1.611, b = -0.185, loss = 1.48545051
epoch 13: w = 1.658, b = -0.167, loss = 1.15507805
epoch 14: w = 1.696, b = -0.152, loss = 0.92569691
epoch 15: w = 1.729, b = -0.139, loss = 0.76639163
epoch 16: w = 1.755, b = -0.127, loss = 0.65571183
epoch 17: w = 1.777, b = -0.118, loss = 0.57877225
epoch 18: w = 1.796, b = -0.109, loss = 0.52524626
epoch 19: w = 1.811, b = -0.102, loss = 0.48796645
epoch 20: w = 1.823, b = -0.095, 

In [3]:
# Test the model with x = 6
# predicted = model(X).detach().numpy()
test_input = torch.tensor([[6.0]], dtype=torch.float32)  # Shape: (1, 1)
with torch.no_grad():  # Disable gradient tracking for inference
    y_pred = model(test_input)
print(f'Prediction for x = 6: {y_pred.item():.3f}')

Prediction for x = 6: 10.844
