In [89]:
import pandas as pd
import numpy as np
from sklearn.datasets import make_regression

X, y = make_regression(
    n_samples=1000, n_features=14, n_informative=10, noise=15, random_state=42
)
X = pd.DataFrame(X)
y = pd.Series(y)

X.columns = [f"col_{col}" for col in X.columns]

print(X)

        col_0     col_1     col_2     col_3     col_4     col_5     col_6  \
0    1.191261 -0.308960  0.675137 -1.460501  0.053059 -1.886129  2.710794   
1    1.071476 -1.424766 -1.109750 -0.457677  0.399997  1.587401 -1.547570   
2   -0.045929  1.868603 -0.016568 -0.484258  1.089905 -1.147160  0.590744   
3   -1.487154  2.220322  0.718332  1.682888 -0.420986 -0.054746  1.900832   
4    0.344054  0.657763  0.348342 -0.417430 -0.589112  1.057814 -0.487705   
..        ...       ...       ...       ...       ...       ...       ...   
995 -1.422254  0.576557 -0.646573 -0.756351 -0.127918  1.119575  1.687142   
996  0.190500 -0.132634  0.709452  0.331980 -2.172670 -0.120381  0.513106   
997 -0.326648 -0.062894  2.002427 -0.650657  1.592964 -0.395284  0.360226   
998 -1.574342 -1.610263  0.407690  1.149487  1.466442 -0.338669 -2.059160   
999  0.331964 -0.159202  0.510099 -0.586971  0.133761  2.344054  0.166339   

        col_7     col_8     col_9    col_10    col_11    col_12    col_13  

In [93]:
class MyLineReg:
    def __init__(self, n_iter, learning_rate, metric: str = None):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = None
        self.metric = metric

    def __str__(self):
        return (
            f"MyLineReg class: n_iter={self.n_iter}, learning_rate={self.learning_rate}"
        )

    def get_metric(self, metric, y, y_hat):
        metrics = {'mae': np.sum(abs(y - y_hat)) / len(y),
                   'mse': np.sum((y_hat - y) ** 2) / len(y),
                   'rmse': np.sqrt(np.sum((y_hat - y) ** 2) / len(y)),
                   'mape': np.sum(np.abs((y - y_hat) / y)) * (100 / len(y)),
                   'r2': 1 - (np.sum(((y - y_hat)**2)) / np.sum(((y - np.mean(y))**2)))
                   }
        return metrics[metric]

    def fit(self, X: pd.DataFrame, y: pd.Series, verbose: int = False):
        #X.insert(0, 'ones', 1) #дополнение вектора фичей единичным столбцом
        W = np.ones(X.shape[1]) #инициализация вектора весов соответствующей длины

        for i in range(self.n_iter):
            y_hat = np.dot(X, W) #вычисление предсказаний
            
            MSE = self.get_metric('mse', y, y_hat) #вычисление метрики MSE (в данном случае MSE)
            
            if verbose:
                if i == 0:
                    if self.metric:
                        print(f'start | loss: {MSE} | {self.metric}: {self.get_metric(self.metric, y, y_hat)}')
                    else:
                        print(f'start | loss: {MSE}')
                elif (i + 1) % verbose == 0:
                    if self.metric:
                        print(f'{i} | loss: {MSE} | {self.metric}: {self.get_metric(self.metric, y, y_hat)}')
                    else:
                        print(f'{i} | loss: {MSE}')

            grad = 2/len(y) * np.dot((y_hat - y), X)
            W -= self.learning_rate * grad
            self.weights = W


    def get_coef(self):
        return self.weights[1:]

    def predict(self, X: pd.DataFrame):
        #X.insert(0, 'ones', 1)
        return np.dot(X, self.weights)
    
    def get_best_score(self):
        last_metric = self.get_metric(self.metric, y, self.predict(X))
        return last_metric

object = MyLineReg(50, 0.1, metric='mae')
object.fit(X, y, 10)
object.get_best_score()

start | loss: 20621.089638778492 | mae: 113.7842420167003
9 | loss: 657.7379735877852 | mae: 20.292259499795573
19 | loss: 231.51002421258787 | mae: 12.215005097465703
29 | loss: 223.84038255545846 | mae: 12.005051555730645
39 | loss: 223.66948016014283 | mae: 11.994186476564654
49 | loss: 223.66509145066422 | mae: 11.992771013940239


11.992727057657051

In [86]:
X.iloc[:, 1:]

Unnamed: 0,col_0,col_1,col_2,col_3,col_4,col_5,col_6,col_7,col_8,col_9,col_10,col_11,col_12,col_13
0,1.191261,-0.308960,0.675137,-1.460501,0.053059,-1.886129,2.710794,-1.716033,0.865290,0.138078,-0.063745,-2.104583,-0.476876,1.677116
1,1.071476,-1.424766,-1.109750,-0.457677,0.399997,1.587401,-1.547570,0.323247,0.165859,-0.302097,0.203944,-0.212452,0.836991,0.368498
2,-0.045929,1.868603,-0.016568,-0.484258,1.089905,-1.147160,0.590744,0.683325,-0.571184,-0.802199,-0.220114,0.034808,0.043829,0.955803
3,-1.487154,2.220322,0.718332,1.682888,-0.420986,-0.054746,1.900832,-0.101198,0.090042,-0.202924,0.340865,0.606237,-0.037008,-0.841048
4,0.344054,0.657763,0.348342,-0.417430,-0.589112,1.057814,-0.487705,-0.897830,-0.935596,-1.186993,1.074333,-0.069532,-0.177918,-0.912811
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,-1.422254,0.576557,-0.646573,-0.756351,-0.127918,1.119575,1.687142,-1.081548,-0.955540,3.078881,0.881640,0.311250,-1.606446,0.203464
996,0.190500,-0.132634,0.709452,0.331980,-2.172670,-0.120381,0.513106,-0.435486,0.847422,1.107081,-0.259547,-0.974529,-0.535328,-0.090533
997,-0.326648,-0.062894,2.002427,-0.650657,1.592964,-0.395284,0.360226,-0.307571,1.465211,0.658143,0.541321,-0.447878,-0.891543,0.069704
998,-1.574342,-1.610263,0.407690,1.149487,1.466442,-0.338669,-2.059160,0.581000,-1.409216,-1.082018,0.798501,0.753190,-1.532598,0.269306
