In [207]:
import numpy as np
import pandas as pd


class MyLineReg():
    def __init__(self, n_iter, learning_rate, metric=None, reg=None, l1_coef=0, l2_coef=0):
        self.n_iter = n_iter
        self.learning_rate = learning_rate
        self.weights = None
        self.metric = metric
        self.metrics()
        self.reg = reg
        self.l1_coef = l1_coef
        self.l2_coef = l2_coef

    def calculate_gradient(self, X, y, y_pred):
        if self.reg == 'l1':
            return (2/X.shape[0]) * np.dot((y_pred - y.values.ravel()), X) + self.l1_coef*np.sign(self.weights)
        if self.reg == 'l2':
            return (2/X.shape[0]) * np.dot((y_pred - y.values.ravel()), X) + self.l2_coef*2*(self.weights)
        if self.reg == 'elasticnet':
            assert self.l1_coef != 0 and self.l2_coef != 0 
            return (2/X.shape[0]) * np.dot((y_pred - y.values.ravel()), X) + self.l1_coef*np.sign(self.weights) + self.l2_coef*2*(self.weights)
        else:
            return (2/X.shape[0]) * np.dot((y_pred - y.values.ravel()), X)

    def metrics(self) -> None:
        if self.metric:
            if self.metric == 'mae':
                self.metric = ['mae', lambda y,
                               y_pred: np.mean(np.abs(y.values - y_pred))]

            if self.metric == 'mse':
                self.metric = ['mse', lambda y,
                               y_pred: np.mean((y.values - y_pred)**2)]

            if self.metric == 'rmse':
                self.metric = ['rmse', lambda y, y_pred: (
                    np.mean((y.values - y_pred)**2))**(0.5)]

            if self.metric == 'mape':
                self.metric = ['mape', lambda y, y_pred: 100 *
                               np.mean(np.abs((y.values - y_pred)/y))]

            if self.metric == 'r2':
                self.metric = ['r2', lambda y, y_pred: (1 -
                               (np.sum((y.values - y_pred)**2))/(np.sum((y.values - np.mean(y.values))**2)))]

    def __str__(self):
        return f"MyLineReg class: n_iter={self.n_iter}, learning_rate={self.learning_rate}"

    def __repr__(self):
        return f"MyLineReg class: n_iter={self.n_iter}, learning_rate={self.learning_rate}"

    def fit(self, samples: pd.DataFrame, y: pd.Series, verbose=False) -> None:

        X = samples.copy()
        X.insert(0, 'bias', pd.Series(1, index=range(X.shape[0])))

        self.weights = np.ones(X.shape[1])
        for i in range(self.n_iter):
            y_pred = np.dot(X, self.weights)
            loss = np.mean((y_pred - y.values)**2)


            grad = self.calculate_gradient(X, y, y_pred)


            self.weights = self.weights - grad * self.learning_rate

            if verbose and (i % verbose) == 0 and self.metric is not None:
                print(
                    f'iter = {i+1} ||| Loss = {loss} ||| {self.metric[0]} = {self.metric[1](y, y_pred)}')
            elif verbose and (i % verbose) == 0:
                print(f'iter = {i+1} ||| Loss = {loss}')
            if self.metric:
                self.final_metric = self.metric[1](y, np.dot(X, self.weights))

    def predict(self, samples: pd.DataFrame) -> int:
        """
        Принимает на вход  матрицу фичей в виде датафрейма пандаса.
        Дополняет матрицу фичей единичным вектором (первый столбец).
        Возвращает вектор предсказаний.
        """

        X = samples.copy()
        X.insert(0, 'bias', pd.Series(1, index=range(X.shape[0])))

        return np.dot(X, self.weights)

    def get_coef(self) -> list():
        try:
            assert self.weights is not None
            return np.array(self.weights[1:])
        except:
            return 'fit before!'

    def get_best_score(self) -> int:
        return self.final_metric

In [211]:
X = pd.DataFrame(range(1000))
y = (pd.DataFrame(list(range(1000)))*150)

model = MyLineReg(50, 0.1, 'r2', reg='l2', l2_coef=1)

In [212]:
model.fit(X, y, 10)
model.get_coef()

iter = 1 ||| Loss = 7414087658.5 ||| r2 = -3953.184038717372
iter = 11 ||| Loss = 2.1559759931369004e+106 ||| r2 = -1.1498550128613597e+100
iter = 21 ||| Loss = 6.290709761906351e+202 ||| r2 = -3.3550485613986155e+196
iter = 31 ||| Loss = 1.835504172333837e+299 ||| r2 = -9.789365375145838e+292
iter = 41 ||| Loss = inf ||| r2 = -inf


  (np.sum((y.values - y_pred)**2))/(np.sum((y.values - np.mean(y.values))**2)))]
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
  loss = np.mean((y_pred - y.values)**2)
  ret = umr_sum(arr, axis, dtype, out, keepdims, where=where)


array([-2.16680047e+243])

In [213]:
model.predict(X)

array([-3.25182907e+240, -2.17005230e+243, -4.33685277e+243,
       -6.50365325e+243, -8.67045372e+243, -1.08372542e+244,
       -1.30040547e+244, -1.51708551e+244, -1.73376556e+244,
       -1.95044561e+244, -2.16712566e+244, -2.38380570e+244,
       -2.60048575e+244, -2.81716580e+244, -3.03384584e+244,
       -3.25052589e+244, -3.46720594e+244, -3.68388599e+244,
       -3.90056603e+244, -4.11724608e+244, -4.33392613e+244,
       -4.55060617e+244, -4.76728622e+244, -4.98396627e+244,
       -5.20064632e+244, -5.41732636e+244, -5.63400641e+244,
       -5.85068646e+244, -6.06736651e+244, -6.28404655e+244,
       -6.50072660e+244, -6.71740665e+244, -6.93408669e+244,
       -7.15076674e+244, -7.36744679e+244, -7.58412684e+244,
       -7.80080688e+244, -8.01748693e+244, -8.23416698e+244,
       -8.45084703e+244, -8.66752707e+244, -8.88420712e+244,
       -9.10088717e+244, -9.31756721e+244, -9.53424726e+244,
       -9.75092731e+244, -9.96760736e+244, -1.01842874e+245,
       -1.04009675e+245,