#### Problem 15 here

With NumPy

In [5]:
import numpy as np

In [8]:

def linear_regression_gradient_descent(X: np.ndarray, y: np.ndarray, alpha: float, iterations: int) -> np.ndarray:
    # Your code here, make sure to round
    m, n = X.shape
    y = y.reshape(-1, 1)        # ensure column vector (m, 1)
    theta = np.zeros((n, 1))    # initialize parameters (n, 1)

    # Gradient descent loop
    for _ in range(iterations):
        predictions = X @ theta                    # (m, 1)
        errors = predictions - y                   # (m, 1)
        gradient = (X.T @ errors) / m              # (n, 1)
        theta -= alpha * gradient                  # update step

    # Round to 4 decimals
    theta = np.round(theta.flatten(), 4)

    return theta

In [9]:
print(linear_regression_gradient_descent(np.array([[1, 1], [1, 2], [1, 3]]), np.array([1, 2, 3]), 0.01, 1000))

[0.1107 0.9513]


With Pytorch

In [1]:
import torch

In [3]:
def linear_regression_gradient_descent(X, y, alpha, iterations) -> torch.Tensor:
    """
    Solve linear regression via gradient descent using PyTorch autograd.
    X: Tensor or convertible shape (m,n); y: shape (m,) or (m,1).
    alpha: learning rate; iterations: number of steps.
    Returns a 1-D tensor of length n, rounded to 4 decimals.
    """
    X_t = torch.as_tensor(X, dtype=torch.float)
    y_t = torch.as_tensor(y, dtype=torch.float).reshape(-1,1)
    m, n = X_t.shape
    theta = torch.zeros((n,1), requires_grad=True)
    # Your implementation here
	# Gradient descent loop
    for _ in range(iterations):
        # Forward pass: predictions and MSE loss
        y_pred = X_t @ theta                          # (m, 1)
        loss = ((y_pred - y_t) ** 2).mean()           # Mean Squared Error

        # Backward pass: compute d(loss)/d(theta)
        loss.backward()

        # Parameter update (manual SGD) and grad reset
        with torch.no_grad():
            theta -= alpha * theta.grad
            theta.grad.zero_()

    # Round to 4 decimals and return as 1-D tensor
    with torch.no_grad():
        theta = torch.round(theta * 10000) / 10000

    return theta.detach().squeeze(1)

In [4]:
res = linear_regression_gradient_descent( torch.eye(2), torch.tensor([5.0, 3.0]), 0.1, 10 )
print(res.detach().numpy().tolist())

[3.2565999031066895, 1.9539999961853027]
