In [56]:
import pandas as pd
import numpy as np

# ok, here we go again
df = pd.read_csv('Advertising.csv', index_col=0)
df.head()

Unnamed: 0,TV,Radio,Newspaper,Sales
1,230.1,37.8,69.2,22.1
2,44.5,39.3,45.1,10.4
3,17.2,45.9,69.3,9.3
4,151.5,41.3,58.5,18.5
5,180.8,10.8,58.4,12.9


In [57]:
y = df.Sales.values
X = df[['TV', 'Radio', 'Newspaper']].values

In [58]:
y[:10], X[:10]

(array([22.1, 10.4,  9.3, 18.5, 12.9,  7.2, 11.8, 13.2,  4.8, 10.6]),
 array([[230.1,  37.8,  69.2],
        [ 44.5,  39.3,  45.1],
        [ 17.2,  45.9,  69.3],
        [151.5,  41.3,  58.5],
        [180.8,  10.8,  58.4],
        [  8.7,  48.9,  75. ],
        [ 57.5,  32.8,  23.5],
        [120.2,  19.6,  11.6],
        [  8.6,   2.1,   1. ],
        [199.8,   2.6,  21.2]]))

In [59]:
from sklearn.model_selection import train_test_split

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=42)

In [60]:
X_train.shape, X_test.shape

((160, 3), (40, 3))

In [61]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error

model = LinearRegression( )
model.fit(X_train, y_train)
model.coef_

array([0.04472952, 0.18919505, 0.00276111])

In [62]:
y_pred = model.predict(X_train)

print(r2_score(y_pred, y_train)) # плохая метрика! 
print(mean_absolute_error(y_pred, y_train))

0.883555792578981
1.198467896150013


In [63]:
y_pred = model.predict(X_test)

print(r2_score(y_pred, y_test)) # плохая метрика! 
print(mean_absolute_error(y_pred, y_test))

0.8791069137120033
1.46075671681176


Добиваемся переобучения! 

In [74]:
from sklearn.preprocessing import PolynomialFeatures

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=42)

poly = PolynomialFeatures(degree=5)
X_train = poly.fit_transform(X_train)
X_test = poly.transform(X_test)

In [75]:
X_train.shape

(160, 56)

In [76]:
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_absolute_error

model = LinearRegression( )
model.fit(X_train, y_train)

y_pred = model.predict(X_train)

print('Качествао модели на трэйне:')
print(r2_score(y_pred, y_train)) # плохая метрика! 
print(mean_absolute_error(y_pred, y_train))
print('-'*50)


y_pred = model.predict(X_test)

print('Качествао модели на тест:')
print(r2_score(y_pred, y_test)) # плохая метрика! 
print(mean_absolute_error(y_pred, y_test))

Качествао модели на трэйне:
0.9947393422433753
0.29746500856749547
--------------------------------------------------
Качествао модели на тест:
0.30069471308709783
2.469040156634764


Какой минус есть у процедуры с разбиением на трейн и тест? 

## Кросс-валидация 

![](https://long-short.pro/wp-content/uploads/sites/3/2013/06/crossvalidation.png)

In [82]:
from sklearn.model_selection import cross_val_score

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=42)

model = LinearRegression( )

cv_results = cross_val_score(model, X_train, y_train, cv=5, scoring='r2')
cv_results

array([0.71981527, 0.92992247, 0.92652848, 0.91883369, 0.80234225])

In [83]:
cv_results.mean()

0.8594884313276513

In [85]:
model = LinearRegression( )
model.fit(X_train, y_train)

y_pred = model.predict(X_test)
r2_score(y_pred, y_test) # плохая метрика! 

0.8791069137120033

## Регуляризация 

$$
MSE = L(\beta) = \frac{1}{n} \cdot \sum_{i=1}^n (y_i - \beta \cdot x_i)^2 \to \min_{\beta} 
$$

### Ridge - регрессия (l-2 регуляризация)

$$
L(\beta) = \frac{1}{n} \cdot \sum_{i=1}^n (y_i - (\beta_0 + \beta_1 \cdot x_{1i} + \ldots + \beta_k \cdot x_{ki} )^2 + \lambda \cdot \sum \beta^2_k \to \min_{\beta} 
$$

### Lasso - регрессия (l-1 регуляризация)

$$
L(\beta) = \frac{1}{n} \cdot \sum_{i=1}^n (y_i - (\beta_0 + \beta_1 \cdot x_{1i} + \ldots + \beta_k \cdot x_{ki} )^2 + \lambda \cdot \sum |\beta_k| \to \min_{\beta} 
$$

In [103]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, 
                                                    random_state=42)

poly = PolynomialFeatures(degree=7)
X_train = poly.fit_transform(X_train)
X_test = poly.transform(X_test)

In [104]:
X_train.shape

(160, 120)

In [107]:
from sklearn.linear_model import Lasso

model = Lasso(alpha=100)

model.fit(X_train, y_train)
model.coef_

  model = cd_fast.enet_coordinate_descent(


array([ 0.00000000e+00,  0.00000000e+00,  0.00000000e+00,  0.00000000e+00,
        3.25792170e-04,  0.00000000e+00,  1.08746032e-04,  0.00000000e+00,
        0.00000000e+00,  0.00000000e+00, -8.10497571e-07,  6.35355338e-06,
       -1.15432498e-06,  3.61525890e-05, -0.00000000e+00, -2.60865239e-07,
        0.00000000e+00,  0.00000000e+00, -0.00000000e+00, -3.80959208e-06,
       -2.93878668e-09, -8.21214346e-09, -1.13627710e-09, -1.37390553e-07,
        3.93215736e-08, -2.95913802e-09,  2.24549359e-07, -1.58278280e-07,
        6.90588943e-09,  4.92546862e-08, -3.63122134e-07,  5.43210783e-07,
       -2.13992061e-07,  6.78334615e-08, -3.62776915e-09,  6.78468630e-12,
       -3.16967528e-12, -9.57630179e-12, -1.88008087e-10,  2.23293609e-11,
       -1.78837143e-11, -1.97622817e-09,  7.83794681e-10, -3.84525753e-11,
        1.61300768e-11,  5.41447907e-09, -4.04070386e-09, -8.60486223e-10,
        5.09793748e-10,  1.39880955e-10, -5.88626985e-09,  1.33678098e-08,
       -4.47140125e-09,  

In [108]:
y_pred = model.predict(X_train)

print('Качествао модели на трэйне:')
print(r2_score(y_pred, y_train)) # плохая метрика! 
print(mean_absolute_error(y_pred, y_train))
print('-'*50)


y_pred = model.predict(X_test)

print('Качествао модели на тест:')
print(r2_score(y_pred, y_test)) # плохая метрика! 
print(mean_absolute_error(y_pred, y_test))

Качествао модели на трэйне:
0.978537379572133
0.48156002927166375
--------------------------------------------------
Качествао модели на тест:
0.9684672161578011
0.773789493586923


__Модель:__

$$
y_i = \beta x_i 
$$

__Функция потерь без регуляризации:__

$$
 L(\beta) = \frac{1}{n} \cdot \sum_{i=1}^n (y_i - \beta \cdot x_i)^2 \to \min_{\beta} 
$$

$$
\hat{\beta} = \frac{\sum{x_i y_i}}{\sum x_i^2}
$$

__Функция потерь c регуляризацией:__

$$
L(\beta) = \frac{1}{n} \cdot \sum_{i=1}^n (y_i - \beta \cdot x_i)^2  + \lambda \cdot \beta^2 \to \min_{\beta} 
$$

$$
L' = \frac{1}{n} \cdot \sum_{i=1}^n -2 x_i (y_i - \beta x_i) + 2 \lambda \beta = 0
$$

$$
\sum x_i y_i - \beta \sum x_i^2 - n\lambda\beta = 0
$$

$$
\hat{\beta}^{Ridge} = \frac{ \sum x_i y_i}{n \lambda + \sum x_i^2}
$$

* Если взять $\lambda = 0$, получается оценка такая, как будто никакого штрафа в виде регуляризатора нет 
* Если взять $\lambda$ очень большим, то итоговая оценка будет очень маленькой 