In Linear Regression, you work with a dataset in the form of matrix $X (n \times p)$ where
+ $n$ represents the number of data points in the dataset (row)
+ $p$ represents the number of features or covariates in the dataset (columns)



In [31]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression

In [96]:
import numpy as np

class LinearRegression_GD:
    def __init__(self, learning_rate=0.01):
        self.learning_rate = learning_rate
        self.beta = None  # Store the trained coefficients

    def fit(self, X, y, epochs=100):
        # Reshape y to ensure it is a column vector (n_samples, 1)
        y = y.reshape(-1, 1)

        # Initialize beta with random values (for n features + bias)
        n_samples, n_features = X.shape
        self.beta = np.random.rand(n_features + 1, 1)  # Including bias term
        
        # Add bias term (column of ones) to the input features
        X = np.c_[np.ones(n_samples), X]

        # Gradient descent loop
        for i in range(epochs):
            # Compute the gradient of the loss function (MSE)
            error = X @ self.beta - y  # Prediction error
            gradient = (2 / n_samples) * (X.T @ error)
            
            # Update beta using gradient descent
            self.beta -= self.learning_rate * gradient
            
            # Compute MSE loss
            loss = np.mean(error ** 2)
            print(f"Epoch: {i+1}, Beta: {self.beta.flatten()}, Loss: {loss:.6f}")

    def predict(self, X):
        # Add bias term (column of ones) to the input features
        X = np.c_[np.ones(X.shape[0]), X]
        return X @ self.beta


In [97]:
X = np.random.rand(100, 3) * 10  
y = 3*X[:,0] + 2*X[:,1] + X[:,2] + np.random.normal(0, 1, 100)  # Linear relationship with noise

def derivative_1(X,y):
    beta = [1,1,1]
    return 2 * X.T @ ((X @ beta) - y)

derivative_1(X,y)

array([-16914.26306559, -16588.45208308, -14382.16455375])

In [98]:
model_GD = LinearRegression_GD()
model_GD.fit(X,y)

Epoch: 1, Beta: [0.47231359 2.78278079 3.1682895  2.53872797], Loss: 467.653009
Epoch: 2, Beta: [0.2037124  1.56534149 1.53283948 0.95250821], Loss: 214.964668
Epoch: 3, Beta: [0.3838114  2.65164976 2.53992877 1.8536545 ], Loss: 100.077393
Epoch: 4, Beta: [0.26293366 2.14450285 1.79598442 1.11465066], Loss: 47.530494
Epoch: 5, Beta: [0.34344768 2.66436386 2.24076158 1.49664098], Loss: 23.289031
Epoch: 6, Beta: [0.28880329 2.46296054 1.90129022 1.14591595], Loss: 11.968168
Epoch: 7, Beta: [0.32454064 2.71780827 2.09692618 1.30147224], Loss: 6.590776
Epoch: 8, Beta: [0.29957521 2.64492579 1.94159841 1.13061649], Loss: 3.977566
Epoch: 9, Beta: [0.31516563 2.77366825 2.02735877 1.18917303], Loss: 2.669789
Epoch: 10, Beta: [0.30348812 2.75266504 1.95621227 1.10297129], Loss: 1.991483
Epoch: 11, Beta: [0.31000482 2.82004479 1.99379493 1.12125665], Loss: 1.625050
Epoch: 12, Beta: [0.30426962 2.81845536 1.96130705 1.0758103 ], Loss: 1.418407
Epoch: 13, Beta: [0.30669437 2.85512159 1.97790645 1