# 🧠 Linear Regression from Scratch using NumPy

In this project, we build a linear regression model using the **Normal Equation** and evaluate it using **MSE** and **R² Score** — all with only NumPy.

In [10]:
import numpy as np


In [11]:
# Generate synthetic housing dataset (Size, Rooms) → Price
np.random.seed(42)

data = np.genfromtxt("../data/housing.csv", delimiter=",", skip_header=1)

# Combine features
X = data[:, :-1]   # Size, Rooms
y = data[:, -1]

print(X.shape, y.shape)


(10, 2) (10,)


In [12]:
def normalize_features(X):
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0)
    return (X - mean) / std, mean, std

X_norm, mean, std = normalize_features(X)

def add_bias(X):
    return np.c_[np.ones(X.shape[0]), X]

X_bias = add_bias(X_norm)



In [13]:
def normal_equation(X, y):
    return np.linalg.inv(X.T @ X) @ X.T @ y


theta = normal_equation(X_bias, y)
print("Coefficients:", theta)


y_pred = X_bias @ theta


def mean_squared_error(y_true, y_pred):
    return np.mean((y_true - y_pred) ** 2)

def r2_score(y_true, y_pred):
    ss_res = np.sum((y_true - y_pred)**2)
    ss_tot = np.sum((y_true - np.mean(y_true))**2)
    return 1 - (ss_res / ss_tot)

mse = mean_squared_error(y, y_pred)
r2 = r2_score(y, y_pred)

print(f"Mean Squared Error: {mse:.2f}")
print(f"R² Score: {r2:.4f}")


# Test: House with 1800 sq.ft and 3 rooms
new_input = np.array([[1800, 3]])
new_input_norm = (new_input - mean) / std
new_input_bias = add_bias(new_input_norm)

predicted_price = new_input_bias @ theta
print(f"Predicted price: ${predicted_price[0]:,.2f}")


Coefficients: [286500.          42470.01616051   2830.47980941]
Mean Squared Error: 1359635415.37
R² Score: 0.5829
Predicted price: $295,606.35
