In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.preprocessing import PolynomialFeatures
from sklearn.model_selection import train_test_split

df = pd.read_csv("../data/Advertising.csv", index_col=0)

X, y = df.drop('Sales', axis="columns"), df['Sales']

model_polynomial = PolynomialFeatures(3, include_bias=False)
poly_feature = model_polynomial.fit_transform(X)
# poly_feature #innehåller våra features

X_train, X_test, y_train, y_test = train_test_split(poly_feature, y, test_size=0.33, random_state=1337)
X_train.shape

# eftersom vi gör regularisering, måste vi göra standardisering, nästa steg

(134, 19)

In [9]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaled_X_train = scaler.fit_transform(X_train) # på train datan, gör både fit och transform
scaled_X_test = scaler.transform(X_test) # på test datan, gör bara transform

print(f"{scaled_X_train.mean():.3f}, {scaled_X_test.mean():.3f}")

-0.000, 0.031


In [13]:
# nu ska vi göra ridge regression, "L2 regularization"
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_squared_error, mean_absolute_error

model_ridge = Ridge(alpha=0.1)
model_ridge.fit(scaled_X_train, y_train) # tränar modellen
y_hat = model_ridge.predict(scaled_X_test) # gör prediction på den scaled test datan
print(model_ridge.coef_)

MSE = mean_squared_error(y_test, y_hat)
RMSE = np.sqrt(MSE)

MSE, RMSE


[ 5.60232255e+00  7.76137111e-01  4.08021864e-01 -6.57597265e+00
  4.34278763e+00 -9.82498935e-01 -7.30657923e-01  1.63143459e-01
 -4.82682624e-01  2.84024955e+00 -1.33057802e+00  6.19651985e-01
  8.33426629e-01 -3.84522734e-01  4.20534037e-01 -1.29689124e-01
  1.92356595e-01  1.66660560e-01 -4.67542299e-05]


(0.24959538036150153, 0.49959521651182925)

Ridge regressionen ger ett mycket bättre resultat, än den vi gjorde i 00!

In [12]:
from sklearn.linear_model import Lasso

model_lasso = Lasso(alpha = 0.1)
model_lasso.fit(scaled_X_train, y_train)
y_hat = model_lasso.predict(scaled_X_test)

print(model_lasso.coef_)
np.sqrt(mean_squared_error(y_test, y_hat))

[ 1.77053674  0.16177077  0.         -0.          3.77413423  0.
  0.          0.04720898  0.         -0.37585383 -0.         -0.
  0.          0.          0.          0.          0.          0.
  0.        ]


0.7738198161795438

Cross Validation och hyperparametrisering

In [18]:
from sklearn.linear_model import RidgeCV

model_ridgeCV = RidgeCV(alphas=[.0001, .001, .01, .1, 1, 5, 10], scoring="neg_mean_squared_error")
model_ridgeCV.fit(scaled_X_train, y_train)
print(model_ridgeCV.coef_,  "\n",  model_ridgeCV.score(scaled_X_train, y_train)) # score: hög sådan här betyder att variansen i y kan förklaras...
model_ridgeCV.alpha_ # ger oss svaret på crossvalidation

[  7.34985134   0.6331339    0.4575767  -10.6664558    4.69979729
  -1.21795087  -0.33002651  -0.2049405   -0.18235904   5.19374784
  -1.37857412   1.00487749   0.51536908  -0.39391912   0.23265959
  -0.28009281   0.38741237   0.14013473  -0.09899025] 
 0.9912715899638569


0.01

In [21]:
from sklearn.linear_model import LassoCV

model_lassoCV = LassoCV(eps=0.001, n_alphas=100, max_iter=10000, cv=5)
model_lassoCV.fit(scaled_X_train, y_train)
print(f"alpha (lambda) = {model_lassoCV.alpha_}")
y_hat = model_lassoCV.predict(scaled_X_test)

np.sqrt(mean_squared_error(y_test, y_hat))

alpha (lambda) = 0.004956246150210799


0.4529065286091838

Elastic
- metoden testar alla alternativen i l1_ratio. Svaret 1.0 innebär att det är bara Lasso (tror jag)

In [26]:
from sklearn.linear_model import ElasticNetCV

model_elastic = ElasticNetCV(l1_ratio=[.1, .5, .7, .9, .95, .99, 1], eps=0.001, n_alphas=100, max_iter=10000, cv=5) #l1_ratio är ration mellan de två metoderna, eps är typ steglängden i iterationen
model_elastic.fit(scaled_X_train, y_train) # tränar modellen
print(f"{model_elastic.l1_ratio_}, {model_elastic.alpha_}")

1.0, 0.004956246150210799


In [27]:
y_hat = model_elastic.predict(scaled_X_test)
np.sqrt(mean_absolute_error(y_test, y_hat))

0.558480162209294