# Linear Regression — From Scratch

Gradient Descent, Mini-Batch, and Normal Equation implemented with **NumPy only**.

See [math.md](./math.md) for the full mathematical derivation.

In [None]:
import sys, os
sys.path.insert(0, os.path.abspath(".."))

import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import r2_score

from linear_regression.linear_regression import LinearRegression

plt.style.use("seaborn-v0_8-darkgrid")
print("Libraries loaded ✓")

## 1. Load & Prepare Data — California Housing

In [None]:
housing = fetch_california_housing(as_frame=False)
X, y = housing.data, housing.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test  = scaler.transform(X_test)

print(f"Train: {X_train.shape}  |  Test: {X_test.shape}")

## 2. Train — Batch Gradient Descent vs Normal Equation

In [None]:
# --- Batch Gradient Descent ---
gd_model = LinearRegression(learning_rate=0.1, n_iterations=500, method="gradient_descent")
gd_model.fit(X_train, y_train)
gd_r2 = gd_model.score(X_test, y_test)

# --- Normal Equation ---
ne_model = LinearRegression(method="normal_equation")
ne_model.fit(X_train, y_train)
ne_r2 = ne_model.score(X_test, y_test)

print(f"Gradient Descent  →  R² = {gd_r2:.4f}")
print(f"Normal Equation   →  R² = {ne_r2:.4f}")

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# --- Loss Curve ---
axes[0].plot(gd_model.loss_history, color="royalblue", lw=2)
axes[0].set_title("Training Loss (MSE) — Gradient Descent")
axes[0].set_xlabel("Iteration")
axes[0].set_ylabel("MSE")

# --- Predictions vs Actuals ---
y_pred = gd_model.predict(X_test)
axes[1].scatter(y_test, y_pred, alpha=0.3, s=10, color="steelblue")
axes[1].plot([y_test.min(), y_test.max()], [y_test.min(), y_test.max()], "r--", lw=2, label="Perfect fit")
axes[1].set_title(f"Predicted vs Actual  (R²={gd_r2:.3f})")
axes[1].set_xlabel("Actual")
axes[1].set_ylabel("Predicted")
axes[1].legend()

plt.tight_layout()
plt.show()