# Part 2: Linear Regression with Multi Variable

In [5]:
import numpy as np
import matplotlib.pyplot as plt

class LinearRegressionMV:
    def __init__(self):
        self.coefficients = None
        self.losses = None
        self.b_values = None

    def fit(self, X, y, method='gradient_descent', learn_rate=0.01, batch_size=32, n_iter=10000, tolerance=1e-6, random_state=None):
        if method == 'gradient_descent':
            self.coefficients, self.losses, self.b_values = self.gradient_descent(X, y, learn_rate, n_iter, tolerance)
        elif method == 'stochastic_gradient_descent':
            self.coefficients, self.losses, self.b_values = self.stochastic_gradient_descent(X, y, learn_rate, n_iter, tolerance, random_state)
        elif method == 'mini_batch_gradient_descent':
            self.coefficients, self.losses, self.b_values = self.mini_batch_gradient_descent(X, y, learn_rate, batch_size, n_iter, tolerance, random_state)
        else:
            raise ValueError("Invalid method. Choose from 'gradient_descent', 'stochastic_gradient_descent', or 'mini_batch_gradient_descent'.")

    def gradient_descent(self, X, y, learn_rate, n_iter, tolerance):
        n_obs, m = X.shape
        vector = np.ones(m + 1) * 0.5  # Initialize coefficients
        losses = []
        b_values = [[] for _ in range(m + 1)]

        for _ in range(n_iter):
            gradient_mean, gradient_b = self.gradient(X, y, vector)
            diff = -learn_rate * np.concatenate((gradient_mean[None, :], gradient_b))
            losses.append(self.mse(X, y, vector))

            for j in range(m + 1):
                b_values[j].append(vector[j])

            if np.all(np.abs(diff) <= tolerance):
                break

            vector += diff

        return vector, losses, b_values

    def stochastic_gradient_descent(self, X, y, learn_rate, n_iter, tolerance, random_state):
        n_obs, m = X.shape
        vector = np.ones(m + 1) * 0.5  # Initialize coefficients
        losses = []
        b_values = [[] for _ in range(m + 1)]

        rng = np.random.default_rng(seed=random_state)

        for _ in range(n_iter):
            index = rng.choice(n_obs)
            x_batch, y_batch = X[index:index + 1], y[index:index + 1]

            gradient_mean, gradient_b = self.gradient(x_batch, y_batch, vector)
            diff = -learn_rate * np.concatenate((gradient_mean[None, :], gradient_b))
            losses.append(self.mse(x_batch, y_batch, vector))

            for j in range(m + 1):
                b_values[j].append(vector[j])

            if np.all(np.abs(diff) <= tolerance):
                break

            vector += diff

        return vector, losses, b_values

    def mini_batch_gradient_descent(self, X, y, learn_rate, batch_size, n_iter, tolerance, random_state):
        n_obs, m = X.shape
        vector = np.ones(m + 1) * 0.5  # Initialize coefficients
        losses = []
        b_values = [[] for _ in range(m + 1)]

        rng = np.random.default_rng(seed=random_state)
        n_batches = int(np.ceil(n_obs / batch_size))

        for _ in range(n_iter):
            rng.shuffle(np.c_[X, y])

            for i in range(n_batches):
                start_idx = i * batch_size
                end_idx = min((i + 1) * batch_size, n_obs)

                x_batch, y_batch = X[start_idx:end_idx], y[start_idx:end_idx]

                gradient_mean, gradient_b = self.gradient(x_batch, y_batch, vector)
                diff = -learn_rate * np.concatenate((gradient_mean[None, :], gradient_b))
                losses.append(self.mse(x_batch, y_batch, vector))

                for j in range(m + 1):
                    b_values[j].append(vector[j])

                if np.all(np.abs(diff) <= tolerance):
                    return vector, losses, b_values

                vector += diff

        return vector, losses, b_values

    def gradient(self, X, y, coefficients):
        coefficients = coefficients.reshape(-1, 1)
        residuals = X.dot(coefficients) - y.reshape(-1, 1)
        gradient_mean = residuals.mean(axis=0)
        gradient_b = (residuals.T.dot(X) / len(X)).flatten()
        return gradient_mean, gradient_b

    def mse(self, X, y, coefficients):
        return np.mean(np.square(coefficients.dot(X.T) - y)) / 2

    def plot_loss_function(self):
        plt.figure(figsize=(10, 6))

        iterations = np.arange(len(self.losses))

        plt.subplot(211)
        plt.plot(iterations, self.losses, label='Loss')
        plt.xlabel('Iteration')
        plt.ylabel('Mean Squared Error')
        plt.title('Loss Function')
        plt.grid(True)

        plt.subplot(223)
        for i, coef in enumerate(self.b_values):
            plt.plot(iterations, coef, label=f'b{i}')

        plt.xlabel('Iteration')
        plt.ylabel('Coefficient Value')
        plt.title('Coefficients')
        plt.grid(True)

        plt.legend()
        plt.tight_layout()
        plt.show()

def get_input_mv():
    n = int(input("Enter number of samples (n): "))
    m = int(input("Enter number of features (m): "))

    print("Enter X as a matrix (each row represents a data point):")
    X = np.array([[float(x) for x in input().split()] for _ in range(n)])
    y = np.array([[float(input(f"Enter y for sample {i + 1}: "))] for i in range(n)])

    return X, y

def main():
    X, y = get_input_mv()

    model = LinearRegressionMV()
    model.fit(X, y, method='gradient_descent')
    print("Gradient Descent Coefficients:", model.coefficients)
    model.plot_loss_function()

    model.fit(X, y, method='stochastic_gradient_descent', random_state=42)
    print("Stochastic Gradient Descent Coefficients:", model.coefficients)
    model.plot_loss_function()

    model.fit(X, y, method='mini_batch_gradient_descent', random_state=42)
    print("Mini Batch Gradient Descent Coefficients:", model.coefficients)
    model.plot_loss_function()

main()

Enter number of samples (n): 2
Enter number of features (m): 2
Enter X as a matrix (each row represents a data point):
1 2
3 4
Enter y for sample 1: 3
Enter y for sample 2: 4 


ValueError: shapes (2,2) and (3,1) not aligned: 2 (dim 1) != 3 (dim 0)