In [23]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=1000, n_features=14, n_informative=10, noise=15, random_state=42)
X = pd.DataFrame(X)
y = pd.Series(y)
X.columns = [f'col_{col}' for col in X.columns]

In [24]:
class MyLineReg():
    def __init__(self, n_iter, learning_rate, weights=None):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = weights

    def mse(self, X, y):
        y_pred = X @ self.weights
        error = y_pred - y
        return np.sum((error) ** 2) / len(y_pred)
    
    def gr_mse(self, X, y):
        y_pred = X @ self.weights
        error = y_pred - y
        return (2 / len(y_pred)) * X.T @ (error)

    def fit(self, X, y, verbose=False):
        X.insert(0, "bias", 1)

        if self.weights is None:    
            self.weights = np.ones(X.shape[1])

        for i in range(self.n_iter):
            self.weights -= self.learning_rate * self.gr_mse(X, y)
                
            if verbose and (i % verbose == 0):
                print(f"{i} | loss: {self.mse(X, y)}")
            
    def get_coef(self):
        return self.weights[1:]
    
    def __str__(self) -> str:
        return f"{self.__class__.__name__} class: " + ", ".join("%s=%s" % item for item in vars(self).items())
    
    def __repr__(self) -> str:
        return f"{self.__class__.__name__} class: " + ", ".join("%s=%s" % item for item in vars(self).items())

In [25]:
reg = MyLineReg(50, 0.1)

In [26]:

reg.fit(X,y, 5)

0 | loss: 13360.32856215382
5 | loss: 1748.2271233241013
10 | loss: 414.22268268298916
15 | loss: 249.15004244116764
20 | loss: 227.2803332136292
25 | loss: 224.20382776707575
30 | loss: 223.74856551174267
35 | loss: 223.67835234805875
40 | loss: 223.66716200136182
45 | loss: 223.66533232352947


In [27]:
reg.get_coef()

col_0     42.918088
col_1     16.596470
col_2      0.497913
col_3     65.371968
col_4     47.591063
col_5     61.776303
col_6      0.246265
col_7     -0.107810
col_8     60.012217
col_9     53.889591
col_10    -0.472909
col_11    14.426826
col_12    17.682723
col_13    47.315290
dtype: float64

In [28]:
np.mean(reg.get_coef())

30.55314269483333