## 練習時間
試著使用 sklearn datasets 的其他資料集 (boston, ...)，來訓練自己的線性迴歸模型，並加上適當的正則話來觀察訓練情形。

In [1]:
import numpy as np
import pandas as pd

from sklearn.preprocessing import StandardScaler
from sklearn.datasets import load_boston, load_wine
from sklearn.linear_model import Lasso, RidgeClassifier
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import accuracy_score, r2_score, mean_squared_error

### Wine Dataset

In [2]:
wine = load_wine()

X, y = wine.data, wine.target

std_scaler = StandardScaler()
trans_x = std_scaler.fit_transform(X)

x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=.3)

RidgeCls = RidgeClassifier()

alpha_range = np.linspace(0, 1.5, 15)
tol_range = np.linspace(0, 0.001, 5)
grid_para = {'alpha': alpha_range, 'tol': tol_range}

clf = GridSearchCV(estimator=RidgeCls, param_grid=grid_para, cv=5)
clf.fit(x_train, y_train)

best_par = clf.best_params_
print('The best paramenter: {}'.format(best_par))
print('-'*50)

coefficient = clf.best_estimator_.coef_
print('Coefficient:{}'.format(coefficient))
print('-'*50)

y_pred = clf.predict(x_test)
acc = accuracy_score(y_test, y_pred)
print('Accuracy: {:.3f}'.format(acc))

The best paramenter: {'alpha': 0.3214285714285714, 'tol': 0.0}
--------------------------------------------------
Coefficient:[[ 3.06365608e-01 -3.14056119e-03  4.34690487e-01 -6.19398029e-02
  -2.05712866e-03 -2.28862182e-01  2.22295654e-01 -3.04608878e-01
  -5.56637465e-02 -3.75346768e-02 -3.00835585e-01  2.67975401e-01
   1.45201336e-03]
 [-3.49122487e-01 -1.16297848e-01 -6.31927747e-01  4.26816960e-02
   2.10989978e-03  1.72617446e-01  1.22248706e-01  6.55646353e-01
   9.62999248e-02 -1.07995008e-01  7.03353860e-01 -2.65306902e-02
  -1.24503318e-03]
 [ 4.27568791e-02  1.19438409e-01  1.97237260e-01  1.92581069e-02
  -5.27711197e-05  5.62447361e-02 -3.44544360e-01 -3.51037475e-01
  -4.06361783e-02  1.45529684e-01 -4.02518276e-01 -2.41444711e-01
  -2.06980177e-04]]
--------------------------------------------------
Accuracy: 0.981


### Boston Dataset

In [3]:
boston = load_boston()

X, y = boston.data, boston.target

categorical_feature = ['CHAS', 'RAD']

boston_df = pd.DataFrame(data=X, columns=boston.feature_names)
numerical_dat = boston_df.loc[:, ~boston_df.columns.isin(categorical_feature)].values
categorical_dat = boston_df[categorical_feature].values

std_scaler = StandardScaler()
std_x = std_scaler.fit_transform(numerical_dat)

trans_x = np.hstack((std_x, categorical_dat))
x_train, x_test, y_train, y_test = train_test_split(trans_x, y)

alpha_range = np.linspace(0.001, 0.1, 20)
tol_range = np.linspace(0.01, 0.1, 10)
grid_param = {'alpha': alpha_range, 'tol': tol_range}

lasso_reg = Lasso()
cv_lasso = GridSearchCV(estimator=lasso_reg, param_grid=grid_param, cv=5)
cv_lasso.fit(x_train, y_train)

best_par = cv_lasso.best_params_
print('The best paramenter: {}'.format(best_par))
print('-'*50)

coefficient = cv_lasso.best_estimator_.coef_
print('Coefficient:{}'.format(coefficient))
print('-'*50)

y_pred = cv_lasso.predict(x_test)
r_square = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
print('R sqruare: {:.3f}'.format(r_square))
print('MSE: {:.3f}'.format(mse))

The best paramenter: {'alpha': 0.021842105263157895, 'tol': 0.030000000000000006}
--------------------------------------------------
Coefficient:[-0.65761948  1.11377378  0.47376679 -2.02745416  2.66307321 -0.23415512
 -2.70444143 -2.46661237 -2.20049258  0.94127134 -3.20681326  2.08610663
  0.32313856]
--------------------------------------------------
R sqruare: 0.724
MSE: 27.290


