In [1]:
import numpy as np
import matplotlib.pyplot as plt

class LinearRegression:
    """Linear Regression Model supporting Gradient Descent and Closed-Form Solution."""
    
    def __init__(self, learning_rate=0.001, n_iters=1000):
        self.lr = learning_rate
        self.n_iters = n_iters
        self.weights = None
        self.bias = None
        self.loss_history = []

    def _mse(self, y, y_pred):
        """Compute Mean Squared Error (MSE)."""
        return np.mean((y - y_pred) ** 2)

    def fit(self, X, y):
        """Train the model using Gradient Descent."""
        n_samples, n_features = X.shape

        # Initialize parameters
        self.weights = np.zeros((n_features, 1))
        self.bias = np.zeros((1, 1))

        # Gradient Descent
        for _ in range(self.n_iters):
            y_predicted = np.dot(X, self.weights) + self.bias
            
            # Compute gradients
            dw = (1 / n_samples) * np.dot(X.T, (y_predicted - y))
            db = (1 / n_samples) * np.sum(y_predicted - y)

            # Update parameters
            self.weights -= self.lr * dw
            self.bias -= self.lr * db

            # Compute and store loss
            loss = self._mse(y, y_predicted)
            self.loss_history.append(loss)

    def closed_form_solution(self, X, y):
        """Compute the closed-form solution using the Normal Equation."""
        n_samples = X.shape[0]

        # Add bias column (column of ones)
        X_b = np.c_[np.ones((n_samples, 1)), X]

        # Compute (X^T X)^(-1) X^T y
        w_closed = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y)

        # Extract bias and weights
        self.bias = w_closed[0]  # First element is bias
        self.weights = w_closed[1:]  # Remaining are weights

    def predict(self, X):
        """Make predictions using the trained model."""
        return np.dot(X, self.weights) + self.bias

In [2]:
class RegressionUtils:
    """Utility class for regression models: evaluation metrics and visualization."""
    
    @staticmethod
    def mean_squared_error(y, y_pred):
        """Compute Mean Squared Error (MSE)."""
        return np.mean((y - y_pred) ** 2)

    @staticmethod
    def r2_score(y, y_pred):
        """Compute R² Score."""
        y_true = y.flatten()
        y_pred = y_pred.flatten()
        ss_total = np.sum((y_true - np.mean(y_true)) ** 2)
        ss_residual = np.sum((y_true - y_pred) ** 2)
        return 1 - (ss_residual / ss_total)

    @staticmethod
    def plot_loss(loss_history):
        """Plot the loss over iterations."""
        plt.plot(loss_history)
        plt.title('Loss over Gradient Descent')
        plt.xlabel('Iterations')
        plt.ylabel('Mean Squared Error')
        plt.grid(True)
        plt.show()

    @staticmethod
    def plot_predictions(model, X, y):
        """Scatter plot of actual vs. predicted values, with a perfect prediction line."""
        y_pred = model.predict(X)

        # Create a perfect prediction line (y = x)
        x_line = np.linspace(y.min(), y.max(), 100)

        # Scatter plot of predicted vs. actual values
        plt.scatter(y_pred, y, color='blue', marker='x', s=20, label='Predicted vs Actual')
        plt.plot(x_line, x_line, linestyle='-', color='red', label='Perfect Prediction')

        plt.xlabel('Predicted Values')
        plt.ylabel('Actual Values')
        plt.title('Actual vs. Predicted Values')
        plt.legend()
        plt.grid(True)
        plt.show()

        # Compute correlation coefficient safely
        if np.std(y_pred) == 0 or np.std(y) == 0:
            correlation = 0
        else:
            correlation = np.corrcoef(y_pred.flatten(), y.flatten())[0, 1]
        
        print(f"Prediction Correlation: {correlation:.4f}")

    @staticmethod
    def plot_residuals(model, X, y):
        """Plot residuals vs. predicted values."""
        y_pred = model.predict(X)
        residuals = y - y_pred

        # Scatter plot of residuals vs. predicted values
        plt.scatter(y_pred, residuals, color='blue', marker='x', s=20, label='Residuals')
        plt.axhline(y=0, color='red', linestyle='-', label='Zero Residual Line')

        plt.xlabel("Predicted Values")
        plt.ylabel("Residuals")
        plt.title("Residuals vs. Predicted Values")
        plt.legend()
        plt.grid(True)
        plt.show()