# Linear Regression from Scratch

This notebook implements multivariate linear regression and
gradient descent from scratch using NumPy.

In [1]:
import sys
import os

project_root = os.path.abspath("..")
if project_root not in sys.path:
    sys.path.insert(0, project_root)

In [2]:
import numpy as np
from src.model import (
    gradient_descent,
    gradient_descent_l2,
    predict,
    mean_squared_error,
)

In [3]:
import pandas as pd
from src.preprocessing import train_test_split, normalize_features

df = pd.read_csv("../data/house_prices.csv")

df = df[
    ["sqft_living", "bedrooms", "bathrooms", "floors", "view", "price"]
]

X = df.drop("price", axis=1).values
y = df["price"].values

X_train, X_test, y_train, y_test = train_test_split(X, y)

X_train_norm, mean, std = normalize_features(X_train)
X_test_norm, _, _ = normalize_features(X_test, mean, std)

In [4]:
W = np.zeros(X_train_norm.shape[1])
b = 0.0

In [5]:
initial_pred = predict(X_train_norm, W, b)
initial_loss = mean_squared_error(y_train, initial_pred)
print("Initial loss:", initial_loss)

Initial loss: 438945531125.18933


In [6]:
W, b, loss_history = gradient_descent_l2(
    X_train_norm,
    y_train,
    W,
    b,
    learning_rate=0.01,
    epochs=1000,
    lambda_=0.1
)

In [7]:
print("Final loss:", loss_history[-1])

Final loss: 77519814462.91484


In [8]:
print("X_train_norm shape:", X_train_norm.shape)
print("W shape:", W.shape)

X_train_norm shape: (3680, 5)
W shape: (5,)
