<a href="https://colab.research.google.com/github/aluqbnle/ml-sandbox/blob/master/20_Regularization_and_Linear_Regression.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# 実験


# 正則化手法を線形回帰に導入する

### データの読み込み

In [0]:
import numpy
from pandas import DataFrame
from sklearn.datasets import load_boston
from sklearn.model_selection import train_test_split

In [0]:

boston = load_boston()
X = boston.data  # 特徴量
y = boston.target  # 目的変数
feature_names = boston.feature_names  # 特徴量名

df = DataFrame(data=X, columns=feature_names)
df['MEDV'] = y  # 目的変数

In [0]:
X = df[['RM', 'LSTAT']].values
y = df['MEDV'].values

In [0]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

In [0]:
X_train0 = X_train[:, 0].reshape(-1, 1)  # 訓練データのRM
X_train1 = X_train[:, 1].reshape(-1, 1)  # 訓練データのLSTAT
X_test0 = X_test[:, 0].reshape(-1, 1)  # テストデータのRM
X_test1 = X_test[:, 1].reshape(-1, 1)  # テストデータのLSTAT

In [0]:
from sklearn.preprocessing import MinMaxScaler, StandardScaler

### 前処理

In [0]:
standerd_scaler = StandardScaler()
X_train_scaled0 = standerd_scaler.fit_transform(X_train0)
X_test_scaled0 = standerd_scaler.transform(X_test0)

In [0]:
min_max_scaler = MinMaxScaler()
X_train_scaled1 = min_max_scaler.fit_transform(X_train1)
X_test_scaled1 = min_max_scaler.transform(X_test1)


In [0]:
X_train_scaled = numpy.zeros(X_train.shape)
X_train_scaled[:, 0] = X_train_scaled0.reshape(-1)
X_train_scaled[:, 1] = X_train_scaled1.reshape(-1)

In [0]:
X_test_scaled = numpy.zeros(X_test.shape)
X_test_scaled[:, 0] = standerd_scaler.transform(X_test0).reshape(-1)
X_test_scaled[:, 1] = min_max_scaler.transform(X_test1).reshape(-1)

### k-fold cross validation

In [0]:
from sklearn.model_selection import KFold

In [0]:
kf = KFold(n_splits=5, shuffle=True, random_state=42)

### Lasso

In [0]:
from sklearn.linear_model import LassoCV

In [0]:
lasso = LassoCV(cv=kf)

In [39]:
lasso.fit(X_train_scaled, y_train)

LassoCV(alphas=None, copy_X=True,
    cv=KFold(n_splits=5, random_state=42, shuffle=True), eps=0.001,
    fit_intercept=True, max_iter=1000, n_alphas=100, n_jobs=None,
    normalize=False, positive=False, precompute='auto', random_state=None,
    selection='cyclic', tol=0.0001, verbose=False)

In [40]:
lasso.score(X_train_scaled, y_train)

0.6486214999775376

In [41]:
lasso.score(X_test_scaled, y_test)

0.6004647942968495

In [42]:
lasso.alpha_

0.006548570448456534

### Ridge Regression

In [0]:
import numpy
from sklearn.linear_model import RidgeCV

In [0]:
ridge = RidgeCV(cv=kf)

In [45]:
ridge.fit(X_train_scaled, y_train)



RidgeCV(alphas=array([ 0.1,  1. , 10. ]),
    cv=KFold(n_splits=5, random_state=42, shuffle=True),
    fit_intercept=True, gcv_mode=None, normalize=False, scoring=None,
    store_cv_values=False)

In [46]:
ridge.score(X_train_scaled, y_train)

0.648616441243237

In [47]:
ridge.score(X_test_scaled, y_test)

0.6005255323711268

In [48]:
ridge.alpha_

0.1

### Elastic Net

In [0]:
from sklearn.linear_model import ElasticNetCV

In [0]:
elasticnet = ElasticNetCV(cv=kf, l1_ratio=0.5)

In [51]:
elasticnet.fit(X_train_scaled, y_train)

ElasticNetCV(alphas=None, copy_X=True,
       cv=KFold(n_splits=5, random_state=42, shuffle=True), eps=0.001,
       fit_intercept=True, l1_ratio=0.5, max_iter=1000, n_alphas=100,
       n_jobs=None, normalize=False, positive=False, precompute='auto',
       random_state=None, selection='cyclic', tol=0.0001, verbose=0)

In [52]:
elasticnet.score(X_train_scaled, y_train)

0.6408644755644152

In [53]:
elasticnet.score(X_test_scaled, y_test)

0.6034664891361318

In [54]:
elasticnet.alpha_

0.01309714089691307

In [55]:
elasticnet.l1_ratio_

0.5