In [64]:
import numpy as np
import pandas as pd
from sklearn.datasets import make_regression

X, y = make_regression(n_samples=1000, n_features=14, n_informative=10, noise=15, random_state=42)
X = pd.DataFrame(X)
y = pd.Series(y)
X.columns = [f'col_{col}' for col in X.columns]

In [65]:
class MyLineReg():
    def __init__(self, n_iter, learning_rate, weights=None):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = weights

    def mse(self, X, y):
        y_pred = X @ self.weights
        error = y_pred - y
        return np.sum((error) ** 2) / len(y_pred)
    
    def gr_mse(self, X, y):
        y_pred = X @ self.weights
        error = y_pred - y
        return (2 / len(y_pred)) * X.T @ (error)

    def fit(self, X, y, verbose=False):
        X = X.copy()
        y = y.copy()
        X.insert(0, "bias", 1)

        if self.weights is None:    
            self.weights = np.ones(X.shape[1])

        for i in range(self.n_iter):
            self.weights -= self.learning_rate * self.gr_mse(X, y)
                
            if verbose and (i % verbose == 0):
                print(f"{i} | loss: {self.mse(X, y)}")

    def predict(self, X):
        X = X.copy()
        X.insert(0, "bias", 1)
        return X @ self.weights
            
    def get_coef(self):
        return self.weights[1:]
    
    def __str__(self) -> str:
        return f"{self.__class__.__name__} class: " + ", ".join("%s=%s" % item for item in vars(self).items())
    
    def __repr__(self) -> str:
        return f"{self.__class__.__name__} class: " + ", ".join("%s=%s" % item for item in vars(self).items())

In [66]:
reg = MyLineReg(300, 0.01)

In [67]:
reg.fit(X,y, 30)
reg.predict(X).head(5)

0 | loss: 19822.27161599988
30 | loss: 6193.199038100217
60 | loss: 2076.2727271462527
90 | loss: 809.2923486943216
120 | loss: 412.12742964839595
150 | loss: 285.3638179233295
180 | loss: 244.19486951513113
210 | loss: 230.60090822132693
240 | loss: 226.0415004391157
270 | loss: 224.48984875614096


0    -61.499540
1    131.311422
2    -51.865347
3     23.276149
4   -131.046757
dtype: float64