Памятка для себя:
- обучить модель - найти функцию, которая бы хорошо приближала исходную функцию
- ищем в семействе линейных $$a(x) = \langle w, x \rangle + w_0$$
- "хорошо приближала" - в смысле квадратичной функции потерь $$L(y, a(x)) = (a(x) - y)^2 \to min$$
- будем искать такие $w$ и $w_0$, которые бы минимизировали это выражение
- будем искать градиентным спуском


Сгенерируем синтетические данные для регрессии

In [None]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [None]:
# dataset for linear regression with random parameters generation

features = np.random.randint(5, 51)
informative = int(features * np.random.random_sample())
noise = np.random.random_sample()
bias = np.random.randint(0, 50)

X, y = make_regression(n_samples=100, n_features=features, n_informative=informative, bias=bias, noise=noise, random_state=42)

print(f'features = {features}, informative = {informative}, noise = {noise}, bias = {bias}')
print(X.shape, y.shape)

features = 23, informative = 20, noise = 0.948870958500644, bias = 25
(100, 23) (100,)


In [None]:
class NaiveGradientDescent:
    def __init__(self, learning_rate=0.01, epochs=1500, threshold=1e-6):
        self.learning_rate = learning_rate
        self.epochs = epochs
        self.threshold = threshold
        self.losses = []

    def calc_loss(self, X_train, y_train):
        y_pred = np.dot(X_train, self.weights)
        loss = np.sum((y_train - y_pred)**2) / len(y_train)
        return loss

    def fit(self, X_train, y_train):
        n = len(X_train)
        X_train = np.concatenate([np.ones((n, 1)), X_train], axis=1)
        self.weights = np.random.normal(0, 0.1, size=X_train.shape[1])

        for i in range(self.epochs):

            self.losses.append(self.calc_loss(X_train, y_train))

            y_pred = np.dot(X_train, self.weights)
            grad = (2 / n) * np.dot(X_train.T, (y_pred - y_train))

            self.weights -= self.learning_rate * grad

            if len(self.losses) >= 2:
                if abs(self.losses[-1] - self.losses[-2]) < self.threshold:
                    break

        return self

    def predict(self, X):
        n = len(X)
        X = np.concatenate([np.ones((n, 1)), X], axis=1)
        y_pred = np.dot(X, self.weights)
        return y_pred

    def weights(self):
        return self.weights

    def losses(self):
        return self.losses

In [None]:
naive_gd = NaiveGradientDescent()
naive_gd.fit(X, y)
y_pred_gd = naive_gd.predict(X)

In [None]:
print(f'mse = {mean_squared_error(y, y_pred_gd)}, mae = {mean_absolute_error(y, y_pred_gd)}', end='\n\n')
print(f'naive_grad.intercept_ = {naive_gd.weights[0]}', end='\n\n')
print(naive_gd.weights[1:])

mse = 0.6076780602357938, mae = 0.6399446517442745

naive_grad.intercept_ = 24.931864051505993

[ 2.94944984e+01  3.70050488e+01  9.19365546e+01  7.59817511e+01
 -1.11098524e-01  8.60913762e+01  3.45051447e+01  2.74278465e+01
  8.26341935e+01  6.96972803e+01  1.94243349e+01 -8.50777762e-02
  6.94510167e+01  7.13947117e+01  4.62033712e+01  7.28456554e+01
  5.14171887e+01 -5.44063063e-02  8.08199802e+01  1.02569225e+01
  9.34393880e+01  5.08380841e+01  7.14697583e+01]


**Для сравнения**

In [None]:
linreg = LinearRegression()
linreg.fit(X, y)
y_pred = linreg.predict(X)

In [None]:
print(f'mse = {mean_squared_error(y, y_pred)}, mae = {mean_absolute_error(y, y_pred)}', end='\n\n')
print(f'linreg.intercept_ = {linreg.intercept_}', end='\n\n')
print(linreg.coef_)

mse = 0.6076043810795444, mae = 0.6407862267751581

linreg.intercept_ = 24.92542864656881

[ 2.94962111e+01  3.70096838e+01  9.19413583e+01  7.59814180e+01
 -1.10129126e-01  8.60888544e+01  3.45057742e+01  2.74263445e+01
  8.26412300e+01  6.96964957e+01  1.94223302e+01 -8.53262323e-02
  6.94565976e+01  7.13939872e+01  4.62029179e+01  7.28484714e+01
  5.14205778e+01 -5.62830965e-02  8.08212852e+01  1.02572049e+01
  9.34406120e+01  5.08346842e+01  7.14712893e+01]
