In [1]:
%matplotlib notebook

import numpy as np
import matplotlib.pyplot as plt

# Multiple linear regression using the normal equation

The multiple linear regression model is as follows:
    $$h(x) = \theta_0 + \theta_1 x_1 + \theta_2 x_2 + \ldots +\theta_n x_n$$
Once again, the best parameters can be solved analytically from
    $$\theta = (X^T X)^{-1} X^T y$$
Explanation: Least squares in linear algebra.

In [64]:
class MultipleLinearRegressionMyClass:
    def __init__(self, X, y):
        '''
    Input:
        X = m x n array. The features matrix.
        y = m x 1 array. The target variable.        
    '''
        self.X = np.vstack((np.ones(len(X)), X.T)).T
        self.y = y
        self.m = len(X)
        
    # Class Methods
    def fit(self):
        
        # Normal equation
        self.coef_ = np.linalg.inv(self.X.T @ self.X) @ self.X.T @ self.y
        
        # Compute for the linear regression model's score
        predicted_y = np.array([self.coef_ @ Xi for Xi in self.X])
        
        SS_res = 0
        for i in range(self.m):
            SS_res += (self.y[i] - predicted_y[i])**2
            
        SS_tot = 0
        for i in range(self.m):
            SS_tot += (self.y[i] - self.y.mean())**2
        
        self.R2 = 1 - (SS_res/SS_tot)
        
    
    def predict(self, x_test):
        x_test = np.array([np.insert(x_test[i], 0, 1) for i in range(len(x_test))])
        return np.array([self.coef_ @ x_test_i for x_test_i in x_test])

# Test the function

### Make regression data

In [60]:
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=20, n_features=2, noise=25)

### Fit the data

In [61]:
regtest = MultipleLinearRegressionMyClass(X,y)
regtest.fit()
print(regtest.coef_, regtest.R2)

[-6.5242594  12.88949055 81.82963379] 0.950160069993895


### Make predictions

In [62]:
regtest.predict(np.array([[1,3],[2,3]]))

array([251.85413252, 264.74362306])

# Scikit-learn

In [63]:
from sklearn.linear_model import LinearRegression

reg = LinearRegression().fit(X, y)
print(reg.intercept_, reg.coef_, reg.score(X, y))

-6.5242593984490185 [12.88949055 81.82963379] 0.950160069993895
