In [6]:
import numpy as np
from sklearn import datasets
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.pipeline import make_pipeline
import pandas as pd

%matplotlib inline

In [7]:
X = np.array([[   1,    1,  500,    1],
              [   1,    1,  700,    1],
              [   1,    2,  750,    2],
              [   1,    5,  600,    1],
              [   1,    3, 1450,    2],
              [   1,    0,  800,    1],
              [   1,    5, 1500,    3],
              [   1,   10, 2000,    3],
              [   1,    1,  450,    1],
              [   1,    2, 1000,    2]])
y = np.array([45, 55, 50, 55, 60, 35, 75, 80, 50, 60])

In [19]:
X_st = X.copy().astype(np.float64)
X_st[:, 1] = standard_scale(X_st[:, 1])
X_st[:, 2] = standard_scale(X_st[:, 2])
X_st[:, 3] = standard_scale(X_st[:, 3])

In [10]:
def calc_mae(y, y_pred):
    err = np.mean(np.abs(y - y_pred))
    return err

def calc_mse(y, y_pred):
    err = np.mean((y - y_pred)**2)
    return err

#нормализация [0, 1]
def min_max_scale(X):
    return (X - X.min()) / (X.max() - X.min())

#стандартизация
def standard_scale(X):
    mean = X.mean()
    std = X.std()
    return (X - mean) / std

In [18]:
def gradient_descent_reg_l2(X, y, iterations, eta=1e-4, reg=1e-8):
    W = np.random.randn(X.shape[1])
    n = X.shape[0]
    
    for i in range(0, iterations):
        y_pred = np.dot(X, W)
        err = calc_mse(y, y_pred)
        
        dQ = 2/n * X.T @ (y_pred - y) # градиент функции ошибки
        dReg = reg * W # градиент регуляризации
        
        W -= eta * (dQ + dReg)
        
        if i % (iterations / 10) == 0:
            print(f'Iter: {i}, weights: {W}, error {err}')
    
    print(f'Final MSE: {calc_mse(y, np.dot(X, W))}')
    return W

In [36]:
gradient_descent_reg_l2(X_st, y, iterations=5000, eta=1e-1, reg=1e-4)

Iter: 0, weights: [12.46803547  1.93469512  2.39123772  2.77897771], error 3160.2367213419175
Iter: 500, weights: [56.49717514  6.30231899 -0.35424805  6.56979014], error 24.904627743647744
Iter: 1000, weights: [56.49717514  6.30234588 -0.35437787  6.5698973 ], error 24.904627646691917
Iter: 1500, weights: [56.49717514  6.30234588 -0.35437787  6.5698973 ], error 24.904627646688844
Iter: 2000, weights: [56.49717514  6.30234588 -0.35437787  6.5698973 ], error 24.904627646688827
Iter: 2500, weights: [56.49717514  6.30234588 -0.35437787  6.5698973 ], error 24.904627646688827
Iter: 3000, weights: [56.49717514  6.30234588 -0.35437787  6.5698973 ], error 24.904627646688827
Iter: 3500, weights: [56.49717514  6.30234588 -0.35437787  6.5698973 ], error 24.904627646688827
Iter: 4000, weights: [56.49717514  6.30234588 -0.35437787  6.5698973 ], error 24.904627646688827
Iter: 4500, weights: [56.49717514  6.30234588 -0.35437787  6.5698973 ], error 24.904627646688827
Final MSE: 24.904627646688827


array([56.49717514,  6.30234588, -0.35437787,  6.5698973 ])

__Задание 1__

Постройте график зависимости весов всех признаков от lambda в самописной L2-регуляризации (на данных про ЕГЭ). Сделайте вывод.

__Задание 2__

Напишите функцию наподобие gradient_descent_reg_l2, но для применения L1-регуляризации.
Опираясь на [видео](https://www.youtube.com/watch?v=dHhYHGI9E6I) и [гайд](https://towardsdatascience.com/ml-from-scratch-linear-polynomial-and-regularized-regression-models-725672336076)

In [41]:
def gradient_descent_reg_l1(X, y, iterations, eta=1e-4, reg=1e-8):
    W = np.random.randn(X.shape[1])
    n = X.shape[0]
    
    for i in range(0, iterations):
        y_pred = np.dot(X, W)
        err = calc_mse(y, y_pred)
        
        dQ = 1/n * np.sign(X.T) @ (y_pred - y) # градиент функции ошибки
        dReg = reg * W # градиент регуляризации
        
        W -= eta * (dQ + dReg)
        
        if i % (iterations / 10) == 0:
            print(f'Iter: {i}, weights: {W}, error {err}')
    
    print(f'Final MSE: {calc_mse(y, np.dot(X, W))}')
    return W

In [50]:
gradient_descent_reg_l1(X_st, y, iterations=1000, eta=1e-2, reg=1e-6)

Iter: 0, weights: [ 0.42545696 -1.2331766  -0.29030876 -1.95775277], error 3455.6434385253706
Iter: 100, weights: [35.97488864 -2.93076827  1.53360049  5.3695294 ], error 530.5523869527552
Iter: 200, weights: [48.9871172  -1.39967895  2.70510418  7.37685672], error 110.52015810313878
Iter: 300, weights: [53.75000888  0.37943825  3.23572544  7.234352  ], error 46.33745448793593
Iter: 400, weights: [55.49337951  1.83869626  3.54341346  6.45823608], error 33.781381722257095
Iter: 500, weights: [56.1315089   2.92130037  3.80803635  5.5983183 ], error 30.252376690576178
Iter: 600, weights: [56.36508466  3.68712559  4.08298763  4.8210878 ], error 29.121078926431693
Iter: 700, weights: [56.45058085  4.21199771  4.37557442  4.15886586], error 28.939468544097075
Iter: 800, weights: [56.48187519  4.56139856  4.67971701  3.60247814], error 29.20241068253539
Iter: 900, weights: [56.49332992  4.78589495  4.98727875  3.13336571], error 29.678037716731808
Final MSE: 30.250678677769105


array([56.49749827,  4.92172424,  5.28840135,  2.73741659])