## Step 1: Implment linear regression: replace the gradient function with autograd

- Recall key steps for training
    - Forward model (1) = compute prediction with model
    - Forward model (2) = Compute loss
    - Backward = compute gradients
    - Update weights 

- replace np array with pytorch tensor 
- replace gradient function with `loss.backward()`    

In [61]:
import numpy as np
import matplotlib.pyplot as plt
import torch

def forward(w, x):
    return w * x

# MSE as the loss function
def loss(y, y_pred):
    return ((y_pred - y)**2).mean()

# don't need this any more as we use autograd
# MSE = j = 1/N * (w*x - y)**2
# dJ/dw = 2/N (w*x - y)*x
"""
def gradient(x, y, y_pred):
    return np.mean(2*x*(y_pred - y))
"""

# Train function
def train(learning_rate, n_iters, w, X, Y):
    # Convert inputs to PyTorch tensors
    w = torch.tensor(w, dtype=torch.float32, requires_grad=True)

    for epoch in range(n_iters):
        y_pred = forward(w, X)  # Forward pass
        l = loss(Y, y_pred)     # Loss
        
        # Backward pass, compute autograde
        l.backward()        

        # Update weights
        with torch.no_grad():
            w.data -= learning_rate * w.grad
        
        w.grad.zero_()  # Reset gradients
        
        # Print using .item() for scalars to avoid NumPy conversion
        print(f'epoch {epoch+1}: w = {w.item():.3f}, loss = {l.item():.8f}')
        
    print(f'Prediction after training: f(6) = {forward(w.item(), 6):.3f}')
    
    
# Define the data, make sure to use torch tensor, not np.array
X = torch.tensor([1.0, 2.0, 3, 4], dtype=torch.float32)
Y = torch.tensor([2.3, 3.4, 6.5, 6.8], dtype=torch.float32)

# Configration
learning_rate = 0.01
n_iters = 20
w_init = 30
train(learning_rate, n_iters, w_init, X, Y)


epoch 1: w = 25.779, loss = 5939.33496094
epoch 2: w = 22.191, loss = 4291.27685547
epoch 3: w = 19.141, loss = 3100.55566406
epoch 4: w = 16.549, loss = 2240.25878906
epoch 5: w = 14.346, loss = 1618.69470215
epoch 6: w = 12.473, loss = 1169.61450195
epoch 7: w = 10.881, loss = 845.15423584
epoch 8: w = 9.528, loss = 610.73156738
epoch 9: w = 8.378, loss = 441.36120605
epoch 10: w = 7.400, loss = 318.99108887
epoch 11: w = 6.569, loss = 230.57872009
epoch 12: w = 5.863, loss = 166.70083618
epoch 13: w = 5.262, loss = 120.54901886
epoch 14: w = 4.752, loss = 87.20434570
epoch 15: w = 4.318, loss = 63.11280441
epoch 16: w = 3.949, loss = 45.70667267
epoch 17: w = 3.636, loss = 33.13073730
epoch 18: w = 3.370, loss = 24.04463005
epoch 19: w = 3.143, loss = 17.47991371
epoch 20: w = 2.951, loss = 12.73690891
Prediction after training: f(6) = 17.704


## Step 2: Implment linear regression: replace the update weights (gradient descent) with an optimizor
- replace loss function with built-in loss function `loss = nn.MSELoss()`
- Update weights  with `  optimizer.step()`

In [62]:
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn

# replace this with a Linear Model
"""
def forward(w, x):
    return w * x
"""

# don't need this any more as we use autograd
# MSE as the loss function
"""
def loss(y, y_pred):
   return ((y_pred - y)**2).mean()
"""

# don't need this any more as we use autograd
# MSE = j = 1/N * (w*x - y)**2
# dJ/dw = 2/N (w*x - y)*x
"""
def gradient(x, y, y_pred):
    return np.mean(2*x*(y_pred - y))
"""

# Train function
def train(n_iters, X, Y):
    for epoch in range(n_iters):
        y_pred = model(X)  # Forward pass
        l = loss(Y, y_pred)     # Loss
        
        # Backward pass, compute autograde (directioin of change for each parameter)
        l.backward()        

        # Update weights
        optimizer.step()
        
        optimizer.zero_grad() # Reset gradients
        
        # Print using .item() for scalars to avoid NumPy conversion
        # Print w and b
        w = model.weight.item()  # Scalar value of weight
        b = model.bias.item()    # Scalar value of bias
        print(f'epoch {epoch+1}: w = {w:.3f}, b = {b:.3f}, loss = {l.item():.8f}')
    
# Define the data, make sure to use torch tensor, not np.array
X = torch.tensor([1.0, 2.0, 3, 4], dtype=torch.float32)
X = X.reshape(4, 1)
Y = torch.tensor([2.3, 3.4, 6.5, 6.8], dtype=torch.float32)
Y = Y.reshape(4, 1)

n_samples, n_features = X.shape 

# Linear model f = wx + b
input_size = n_features
output_size = 1
model = nn.Linear(input_size, output_size)


# Loss and optimizer
learning_rate = 0.01
criterion = nn.MSELoss()

# Stochastic Gradient Descent (SGD)
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)  


n_iters = 20

train(n_iters,  X, Y)

epoch 1: w = 0.900, b = 0.874, loss = 5.15727806
epoch 2: w = 1.001, b = 0.907, loss = 3.69042563
epoch 3: w = 1.084, b = 0.934, loss = 2.67253804
epoch 4: w = 1.154, b = 0.956, loss = 1.96617866
epoch 5: w = 1.212, b = 0.974, loss = 1.47598338
epoch 6: w = 1.260, b = 0.989, loss = 1.13578010
epoch 7: w = 1.301, b = 1.001, loss = 0.89965343
epoch 8: w = 1.335, b = 1.011, loss = 0.73574245
epoch 9: w = 1.363, b = 1.019, loss = 0.62194228
epoch 10: w = 1.387, b = 1.025, loss = 0.54291308
epoch 11: w = 1.406, b = 1.031, loss = 0.48801088
epoch 12: w = 1.423, b = 1.035, loss = 0.44985026
epoch 13: w = 1.437, b = 1.038, loss = 0.42330658
epoch 14: w = 1.448, b = 1.040, loss = 0.40482411
epoch 15: w = 1.458, b = 1.042, loss = 0.39193565
epoch 16: w = 1.466, b = 1.043, loss = 0.38292903
epoch 17: w = 1.473, b = 1.044, loss = 0.37661636
epoch 18: w = 1.479, b = 1.045, loss = 0.37217349
epoch 19: w = 1.484, b = 1.045, loss = 0.36902806
epoch 20: w = 1.488, b = 1.045, loss = 0.36678365


In [None]:
# Test the model with x = 6
# predicted = model(X).detach().numpy()
test_input = torch.tensor([[6.0]], dtype=torch.float32)  # Shape: (1, 1)
with torch.no_grad():  # Disable gradient tracking for inference
    y_pred = model(test_input)
print(f'Prediction for x = 6: {y_pred.item():.3f}')

Prediction for x = 6: 9.973
