<a href="https://colab.research.google.com/github/inderpreetsingh01/ml_machine_coding/blob/main/Linear_regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Method  |  Time Complexity
# Closed-form	O(n³) => due to matrix inversion
# Gradient Descent	=> O(n_samples × n_features × n_iters)

In [1]:
import numpy as np

class LinearRegression:
    """
    Linear Regression Model supporting both:
    - Closed-form solution (Normal Equation)
    - Gradient Descent Optimization
    """

    def __init__(self, method="gradient_descent", lr=0.01, n_iters=1000):
        self.method = method
        self.lr = lr
        self.n_iters = n_iters
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        """
        Fit the model using selected method.
        """
        if self.method == "closed_form":
            self._fit_closed_form(X, y)
        elif self.method == "gradient_descent":
            self._fit_gradient_descent(X, y)
        else:
            raise ValueError("Unknown method selected")

    def _fit_closed_form(self, X, y):
        """
        Closed-form solution using Normal Equation:
        theta = (X^T X)^(-1) X^T y
        """
        # Add bias term to features (intercept)
        X_bias = np.c_[np.ones((X.shape[0], 1)), X]  # shape (n_samples, n_features + 1)
        theta_best = np.linalg.inv(X_bias.T @ X_bias) @ X_bias.T @ y
        self.bias = theta_best[0]
        self.weights = theta_best[1:]

    def _fit_gradient_descent(self, X, y):
        """
        Fit using Batch Gradient Descent.
        """
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            y_pred = np.dot(X, self.weights) + self.bias
            dw = (1 / n_samples) * np.dot(X.T, (y_pred - y))
            db = (1 / n_samples) * np.sum(y_pred - y)
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        """
        Predict target values.
        """
        return np.dot(X, self.weights) + self.bias

    def evaluate(self, X, y_true):
        """
        Evaluate using Mean Squared Error and R^2 Score.
        """
        y_pred = self.predict(X)
        mse = np.mean((y_true - y_pred) ** 2)
        r2 = 1 - (np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2))
        return {"mse": mse, "r2_score": r2}


# ==== Synthetic Test ====
np.random.seed(42)

# Generate synthetic data
X = np.random.rand(100, 2)
true_weights = np.array([3, 2])
y = X @ true_weights + 5 + np.random.randn(100) * 0.5  # y = 3*x1 + 2*x2 + 5 + noise

print("==== Testing Closed-Form Solution ====")
model_closed = LinearRegression(method="closed_form")
model_closed.fit(X, y)
metrics_closed = model_closed.evaluate(X, y)
print("Weights:", model_closed.weights)
print("Bias:", model_closed.bias)
print("Metrics:", metrics_closed)

print("\n==== Testing Gradient Descent Solution ====")
model_gd = LinearRegression(method="gradient_descent", lr=0.1, n_iters=1000)
model_gd.fit(X, y)
metrics_gd = model_gd.evaluate(X, y)
print("Weights:", model_gd.weights)
print("Bias:", model_gd.bias)
print("Metrics:", metrics_gd)

==== Testing Closed-Form Solution ====
Weights: [3.16933339 2.17747302]
Bias: 4.886136132050842
Metrics: {'mse': np.float64(0.24534574806972081), 'r2_score': np.float64(0.8375977028740942)}

==== Testing Gradient Descent Solution ====
Weights: [3.16909262 2.17727327]
Bias: 4.8863618600794965
Metrics: {'mse': np.float64(0.24534575648891244), 'r2_score': np.float64(0.8375976973011586)}


In [None]:
# with L1 and L2 regularization
class LinearRegression:
    """
    Linear Regression supporting:
    - Closed-form (Normal Equation) [with L2 Regularization (Ridge) only]
    - Gradient Descent [with L1 (Lasso) and L2 (Ridge) Regularization]
    """

    def __init__(self,
                 method="gradient_descent",
                 lr=0.01,
                 n_iters=1000,
                 l1_lambda=0.0,
                 l2_lambda=0.0):
        self.method = method
        self.lr = lr
        self.n_iters = n_iters
        self.l1_lambda = l1_lambda
        self.l2_lambda = l2_lambda
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        if self.method == "closed_form":
            if self.l1_lambda != 0:
                raise NotImplementedError("Closed-form solution doesn't support L1 regularization.")
            self._fit_closed_form(X, y)
        elif self.method == "gradient_descent":
            self._fit_gradient_descent(X, y)
        else:
            raise ValueError("Unknown method selected")

    def _fit_closed_form(self, X, y):
        """
        Closed-form solution with L2 Regularization:
        theta = (X^T X + λ * I)^(-1) X^T y
        """
        n_samples, n_features = X.shape
        X_bias = np.c_[np.ones((n_samples, 1)), X]
        I = np.eye(n_features + 1)
        I[0, 0] = 0  # Don't regularize bias term

        theta_best = np.linalg.inv(
            X_bias.T @ X_bias + self.l2_lambda * I
        ) @ X_bias.T @ y

        self.bias = theta_best[0]
        self.weights = theta_best[1:]

    def _fit_gradient_descent(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0

        for _ in range(self.n_iters):
            y_pred = np.dot(X, self.weights) + self.bias
            error = y_pred - y

            # Gradients for weights and bias
            dw = (1 / n_samples) * np.dot(X.T, error)
            db = (1 / n_samples) * np.sum(error)

            # L2 Regularization
            dw += self.l2_lambda * self.weights

            # L1 Regularization (Sub-gradient)
            dw += self.l1_lambda * np.sign(self.weights)

            # Update parameters
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

    def evaluate(self, X, y_true):
        y_pred = self.predict(X)
        mse = np.mean((y_true - y_pred) ** 2)
        r2 = 1 - np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2)
        return {"mse": mse, "r2_score": r2}


# ==== Synthetic Test ====
np.random.seed(42)
X = np.random.rand(100, 2)
true_weights = np.array([3, 2])
y = X @ true_weights + 5 + np.random.randn(100) * 0.5

print("==== Closed-Form Ridge Regression ====")
model_ridge = LinearRegression(method="closed_form", l2_lambda=0.1)
model_ridge.fit(X, y)
metrics_ridge = model_ridge.evaluate(X, y)
print("Weights:", model_ridge.weights)
print("Bias:", model_ridge.bias)
print("Metrics:", metrics_ridge)

print("\n==== Gradient Descent with L1 + L2 Regularization ====")
model_l1_l2 = LinearRegression(method="gradient_descent", lr=0.1, n_iters=1000, l1_lambda=0.1, l2_lambda=0.1)
model_l1_l2.fit(X, y)
metrics_l1_l2 = model_l1_l2.evaluate(X, y)
print("Weights:", model_l1_l2.weights)
print("Bias:", model_l1_l2.bias)
print("Metrics:", metrics_l1_l2)

In [None]:
# Feature Scaling (Standardization):
# 1. Normalizes features to zero mean & unit variance.
# 2. Handled automatically during fit and predict.
# 3. Scaling params stored inside the model.

# Early Stopping:
# 1. Stops gradient descent early if MSE improvement < tol threshold.
# 2. Saves training time, avoids overfitting on small datasets.

class LinearRegression:
    """
    Linear Regression with:
    - Closed-form (with L2 Regularization)
    - Gradient Descent (with L1/L2 Regularization)
    - Feature Scaling (Standardization)
    - Early Stopping
    """

    def __init__(self,
                 method="gradient_descent",
                 lr=0.01,
                 n_iters=1000,
                 l1_lambda=0.0,
                 l2_lambda=0.0,
                 early_stopping=False,
                 tol=1e-4):
        self.method = method
        self.lr = lr
        self.n_iters = n_iters
        self.l1_lambda = l1_lambda
        self.l2_lambda = l2_lambda
        self.early_stopping = early_stopping
        self.tol = tol
        self.weights = None
        self.bias = None
        self.scaler_mean = None
        self.scaler_std = None

    def _scale_features(self, X, fit=True):
        if fit:
            self.scaler_mean = np.mean(X, axis=0)
            self.scaler_std = np.std(X, axis=0)
            self.scaler_std[self.scaler_std == 0] = 1  # Prevent division by zero
        return (X - self.scaler_mean) / self.scaler_std

    def fit(self, X, y):
        X_scaled = self._scale_features(X, fit=True)
        if self.method == "closed_form":
            if self.l1_lambda != 0:
                raise NotImplementedError("Closed-form does not support L1 regularization.")
            self._fit_closed_form(X_scaled, y)
        elif self.method == "gradient_descent":
            self._fit_gradient_descent(X_scaled, y)
        else:
            raise ValueError("Unknown method selected")

    def _fit_closed_form(self, X, y):
        n_samples, n_features = X.shape
        X_bias = np.c_[np.ones((n_samples, 1)), X]
        I = np.eye(n_features + 1)
        I[0, 0] = 0  # Don't regularize bias term

        theta_best = np.linalg.inv(
            X_bias.T @ X_bias + self.l2_lambda * I
        ) @ X_bias.T @ y

        self.bias = theta_best[0]
        self.weights = theta_best[1:]

    def _fit_gradient_descent(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        prev_loss = float('inf')

        for i in range(self.n_iters):
            y_pred = np.dot(X, self.weights) + self.bias
            error = y_pred - y

            dw = (1 / n_samples) * np.dot(X.T, error)
            db = (1 / n_samples) * np.sum(error)

            # Regularization
            dw += self.l2_lambda * self.weights
            dw += self.l1_lambda * np.sign(self.weights)

            # Parameter update
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

            # Early stopping
            current_loss = np.mean(error ** 2)
            if self.early_stopping and abs(prev_loss - current_loss) < self.tol:
                print(f"Early stopping at iteration {i+1}. Loss change: {abs(prev_loss - current_loss):.6f}")
                break
            prev_loss = current_loss

    def predict(self, X):
        X_scaled = self._scale_features(X, fit=False)
        return np.dot(X_scaled, self.weights) + self.bias

    def evaluate(self, X, y_true):
        y_pred = self.predict(X)
        mse = np.mean((y_true - y_pred) ** 2)
        r2 = 1 - np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2)
        return {"mse": mse, "r2_score": r2}


# ==== Synthetic Test ====
np.random.seed(42)
X = np.random.rand(100, 2)
true_weights = np.array([3, 2])
y = X @ true_weights + 5 + np.random.randn(100) * 0.5

print("==== Gradient Descent with Feature Scaling, L1/L2 Regularization, Early Stopping ====")
model = LinearRegression(
    method="gradient_descent",
    lr=0.1,
    n_iters=1000,
    l1_lambda=0.1,
    l2_lambda=0.1,
    early_stopping=True,
    tol=1e-5
)
model.fit(X, y)
metrics = model.evaluate(X, y)
print("Weights:", model.weights)
print("Bias:", model.bias)
print("Metrics:", metrics)

In [None]:
# Added learning rate decay inside gradient descent.
# Decays after every iteration based on:
class LinearRegression:
    """
    Linear Regression with:
    - Closed-form (with L2 Regularization)
    - Gradient Descent (with L1/L2 Regularization)
    - Feature Scaling (Standardization)
    - Early Stopping
    - Learning Rate Decay
    """

    def __init__(self,
                 method="gradient_descent",
                 lr=0.01,
                 n_iters=1000,
                 l1_lambda=0.0,
                 l2_lambda=0.0,
                 early_stopping=False,
                 tol=1e-4,
                 decay_rate=0.0):
        self.method = method
        self.lr = lr
        self.n_iters = n_iters
        self.l1_lambda = l1_lambda
        self.l2_lambda = l2_lambda
        self.early_stopping = early_stopping
        self.tol = tol
        self.decay_rate = decay_rate
        self.weights = None
        self.bias = None
        self.scaler_mean = None
        self.scaler_std = None

    def _scale_features(self, X, fit=True):
        if fit:
            self.scaler_mean = np.mean(X, axis=0)
            self.scaler_std = np.std(X, axis=0)
            self.scaler_std[self.scaler_std == 0] = 1
        return (X - self.scaler_mean) / self.scaler_std

    def fit(self, X, y):
        X_scaled = self._scale_features(X, fit=True)
        if self.method == "closed_form":
            if self.l1_lambda != 0:
                raise NotImplementedError("Closed-form does not support L1 regularization.")
            self._fit_closed_form(X_scaled, y)
        elif self.method == "gradient_descent":
            self._fit_gradient_descent(X_scaled, y)
        else:
            raise ValueError("Unknown method selected")

    def _fit_closed_form(self, X, y):
        n_samples, n_features = X.shape
        X_bias = np.c_[np.ones((n_samples, 1)), X]
        I = np.eye(n_features + 1)
        I[0, 0] = 0

        theta_best = np.linalg.inv(
            X_bias.T @ X_bias + self.l2_lambda * I
        ) @ X_bias.T @ y

        self.bias = theta_best[0]
        self.weights = theta_best[1:]

    def _fit_gradient_descent(self, X, y):
        n_samples, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        prev_loss = float('inf')
        current_lr = self.lr

        for i in range(self.n_iters):
            y_pred = np.dot(X, self.weights) + self.bias
            error = y_pred - y

            dw = (1 / n_samples) * np.dot(X.T, error)
            db = (1 / n_samples) * np.sum(error)

            dw += self.l2_lambda * self.weights
            dw += self.l1_lambda * np.sign(self.weights)

            self.weights -= current_lr * dw
            self.bias -= current_lr * db

            current_loss = np.mean(error ** 2)
            if self.early_stopping and abs(prev_loss - current_loss) < self.tol:
                print(f"Early stopping at iteration {i+1}. Loss change: {abs(prev_loss - current_loss):.6f}")
                break
            prev_loss = current_loss

            # Learning rate decay step
            if self.decay_rate > 0:
                current_lr = self.lr / (1 + self.decay_rate * (i + 1))

    def predict(self, X):
        X_scaled = self._scale_features(X, fit=False)
        return np.dot(X_scaled, self.weights) + self.bias

    def evaluate(self, X, y_true):
        y_pred = self.predict(X)
        mse = np.mean((y_true - y_pred) ** 2)
        r2 = 1 - np.sum((y_true - y_pred) ** 2) / np.sum((y_true - np.mean(y_true)) ** 2)
        return {"mse": mse, "r2_score": r2}


# ==== Synthetic Test ====
np.random.seed(42)
X = np.random.rand(100, 2)
true_weights = np.array([3, 2])
y = X @ true_weights + 5 + np.random.randn(100) * 0.5

print("==== Gradient Descent with L1/L2 Regularization, Early Stopping, Learning Rate Decay ====")
model = LinearRegression(
    method="gradient_descent",
    lr=0.1,
    n_iters=1000,
    l1_lambda=0.1,
    l2_lambda=0.1,
    early_stopping=True,
    tol=1e-5,
    decay_rate=0.05
)
model.fit(X, y)
metrics = model.evaluate(X, y)
print("Weights:", model.weights)
print("Bias:", model.bias)
print("Metrics:", metrics)

In [None]:
# Feature	Benefit
# Feature Scaling	Stable, fast convergence
# L1 & L2 Regularization	Sparsity + Generalization
# Early Stopping	Saves compute, prevents overfitting
# Learning Rate Decay	Smooth convergence, avoids overshooting
# Modular Class Design	Reusable for real-world systems and machine coding

In [None]:
def test_linear_regression():
    np.random.seed(42)
    # Generate synthetic data: y = 3*x1 + 2*x2 + 5 + noise
    X = np.random.rand(100, 2)
    true_weights = np.array([3, 2])
    y = X @ true_weights + 5 + np.random.randn(100) * 0.5  # with noise

    model = LinearRegression(
        method="gradient_descent",
        lr=0.1,
        n_iters=1000,
        l1_lambda=0.1,
        l2_lambda=0.1,
        early_stopping=True,
        tol=1e-5,
        decay_rate=0.05
    )
    model.fit(X, y)
    metrics = model.evaluate(X, y)

    # ✅ Test assertions
    assert metrics['mse'] < 0.5, f"Expected low MSE, got {metrics['mse']}"
    assert 0.95 < metrics['r2_score'] <= 1.0, f"Expected high R2 score, got {metrics['r2_score']}"
    print("✅ Linear Regression Test Passed:", metrics)