In [75]:
import pandas as pd
import numpy as np

### Собственная реализация градиентного спуска для задачи линейной регрессии

In [92]:
from sklearn.base import BaseEstimator, RegressorMixin

class MyLinearRegression(BaseEstimator, RegressorMixin):

    def __init__(self, fit_intercept = True):
        self.fit_intercept = fit_intercept
    
    @staticmethod
    def mse(y_true, y_predictions):
        return np.mean((y_true - y_predictions) ** 2)
    
    def fit(self, X, y, learning_rate = 1e-1, loss_func = mse, tol = 1e-5, max_iter = 10000):
        n_objects, self.n_features = X.shape
        self.coef = np.random.rand(self.n_features)
        self.intercept = 0
        predictions = self.predict(X)
        self.loss = loss_func(y, predictions)
        self.last_loss = 1e6
        
        def loss_difference(curr_loss, last_loss):
            return abs(last_loss - curr_loss) / max(1, last_loss)
        
        while loss_difference(self.loss, self.last_loss)>tol and max_iter>0:
            dw = (2.0 / n_objects) * (X.T @ (predictions - y))
            db = (2.0 / n_objects) * np.sum(predictions - y)
            self.coef -= learning_rate * dw
            self.intercept -= learning_rate * db
            self.last_loss = self.loss
            predictions = self.predict(X)
            self.loss = loss_func(y, predictions)
            max_iter-=1
        return self
        
    def predict(self, X):
        return X @ self.coef + self.intercept

### Сравнение по качеству

### Моя реализация

In [93]:
from sklearn.datasets import load_diabetes
diabetes = load_diabetes()
X, y = diabetes.data, diabetes.target

In [94]:
my_lr = MyLinearRegression()

In [95]:
from sklearn.model_selection import cross_val_score

print(f'Mean RMSE: {np.mean(np.sqrt(-cross_val_score(my_lr, X, y, cv = 7, scoring = 'neg_mean_squared_error')))}')

Mean RMSE: 54.83340078875301


### Встроенная линейная регрессия

In [96]:
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LinearRegression
pipe = Pipeline([('LR', LinearRegression())])

In [97]:
print(f'Mean RMSE: {np.mean(np.sqrt(-cross_val_score(pipe, X, y, cv = 7, scoring = 'neg_mean_squared_error')))}')

Mean RMSE: 54.760886101733306
