# Multiple Linear regression

* The equation for multiple linear regression is given by:
  - $$ \hat{y} = \theta_{0} + \theta_{1}x_{1} + \theta_{2}x_{2} + ...... + \theta_{n-1}x_{n-1} + \theta_{n}x_{n}$$
  - $$ \hat{y} = X^{T}\theta$$

* The gradient descent equation for multiple linear regession is given by:
  - $$ \theta_{0} \leftarrow \theta_{0} - \alpha\nabla\theta_0 \Rightarrow \nabla\theta_0 = \frac{1}{n} \Sigma_{i = 1}^{n} (y_i - \hat{y}_i)$$
  - $$ \theta_{j} \leftarrow \theta_{j} - \alpha\nabla\theta_j \Rightarrow \nabla\theta_j = \frac{1}{n} \Sigma_{i = 1}^{n} [(y_i - \hat{y}_{i})x_j^{(i)}] $$

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
class MultipleLinearRegression:
    def __init__(self, learning_rate: float = 0.01, num_iterations: int = 1000):
        self.learning_rate  = learning_rate                                          ## Initialising the learning rate.
        self.num_iterations = num_iterations                                         ## Initialising the number of iterations.
        self.X              = None                                                   ## Initialising the features array.
        self.y              = None                                                   ## Initialising the target array.
        self.N              = 0                                                      ## Initialising the number of samples.
        self.D              = 0                                                      ## Initialising the number of features.
        self.weights        = None                                                   ## Initialising the weights.
        self.bias           = None                                                   ## Initialising the bias.
        self.training_costs = list()                                                 ## Initialising a list to store the training cost history for the model.
    
    def fit(self, X, y):                                                             ## Method to train the model.
        self.X         = X                                                           ## Setting the feature array.
        self.y         = y                                                           ## Setting the target array.
        self.N, self.D = self.X.shape                                                ## Setting the number of samples and the number of features.
        self.weights   = np.zeros(self.D)                                            ## Setting the weights as an array of zeros.
        self.bias      = 0                                                           ## Setting the bias to 0.
            
        for i in range(self.num_iterations):                                         ## Training the model.
            self.update_weights()                                                    ## Performing gradient descent.
            current_cost = self.get_current_cost()                                   ## Getting the current training cost / error.
            self.training_costs.append(current_cost)
            if i % 100 == 0:
                print(f'Iteration {i}, Cost: {current_cost}')
            
    def update_weights(self):
        y_pred        = self.predict(self.X)                                         ## Getting the predictions with the current weights and bias.
        dw            = (1 / self.N) * np.dot(self.X.T, (y_pred - self.y))           ## Computing the gradient for weight with the current predictions.
        db            = (1 / self.N) * np.sum(y_pred - self.y)                       ## Computing the gradient for bias with the current predictions.
        self.weights -= self.learning_rate * dw                                      ## Updating the weights.
        self.bias    -= self.learning_rate * db                                      ## Updating the bias.
        
    def get_current_cost(self):
        y_pred = self.predict(self.X)                                                ## Getting the predictions with the current weights and bias.
        cost   = (1 / (2 * self.N)) * np.sum((y_pred - self.y)**2)                   ## Computing the mean squared error.
        return cost
        
    def predict(self, X):                                                            ## Method to predict the values for the given input using the current weights and bias.
        y = np.dot(X, self.weights) + self.bias
        return y

    def plot_cost_progression(self):
        cost_progression_chart = plt.figure(figsize=(8, 8))
        plt.style.use("ggplot")
        sns.lineplot(x=range(1, len(self.training_costs) + 1), y=self.training_costs)
        plt.xlabel("Number of iterations")
        plt.ylabel("Training cost or error")
        plt.title("Progression of training cost / error v/s iterations")
        plt.grid(True)
        plt.show()

In [None]:
from sklearn.datasets import make_regression
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
X, y = make_regression(n_samples=5000, n_features=3, noise=10, random_state=42)
X = scaler.fit_transform(X)
model = MultipleLinearRegression(0.01, 2000)
model.fit(X, y)
y_pred = model.predict(X)


In [None]:
model.plot_cost_progression()