## [作業重點]
使用 Sklearn 中的 Lasso, Ridge 模型，來訓練各種資料集，務必了解送進去模型訓練的**資料型態**為何，也請了解模型中各項參數的意義。

機器學習的模型非常多種，但要訓練的資料多半有固定的格式，確保你了解訓練資料的格式為何，這樣在應用新模型時，就能夠最快的上手開始訓練！

## 練習時間
試著使用 sklearn datasets 的其他資料集 (boston, ...)，來訓練自己的線性迴歸模型，並加上適當的正則話來觀察訓練情形。

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

In [2]:
from sklearn import datasets, linear_model
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from sklearn import metrics


In [3]:
boston = datasets.load_boston()
boston.keys()

dict_keys(['data', 'target', 'feature_names', 'DESCR', 'filename'])

In [4]:
print(boston['DESCR'])

.. _boston_dataset:

Boston house prices dataset
---------------------------

**Data Set Characteristics:**  

    :Number of Instances: 506 

    :Number of Attributes: 13 numeric/categorical predictive. Median Value (attribute 14) is usually the target.

    :Attribute Information (in order):
        - CRIM     per capita crime rate by town
        - ZN       proportion of residential land zoned for lots over 25,000 sq.ft.
        - INDUS    proportion of non-retail business acres per town
        - CHAS     Charles River dummy variable (= 1 if tract bounds river; 0 otherwise)
        - NOX      nitric oxides concentration (parts per 10 million)
        - RM       average number of rooms per dwelling
        - AGE      proportion of owner-occupied units built prior to 1940
        - DIS      weighted distances to five Boston employment centres
        - RAD      index of accessibility to radial highways
        - TAX      full-value property-tax rate per $10,000
        - PTRATIO  pu

In [5]:
boston_df = pd.DataFrame(boston.data, columns=boston.feature_names)
boston_df = pd.concat([boston_df, pd.DataFrame(boston.target, columns=['target_price'])], axis=1)
boston_df.head()

Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target_price
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.09,1.0,296.0,15.3,396.9,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.9,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.9,5.33,36.2


In [6]:
# split_dataset
x_train, x_test, y_train, y_test = train_test_split(boston_df[boston.feature_names], boston_df.target_price, random_state=13)
print('train/test:{}/{}'.format(x_train.shape, x_test.shape))

train/test:(379, 13)/(127, 13)


In [7]:
ss_scaler = StandardScaler()
linear_reg = linear_model.LinearRegression()
Lasso_reg = linear_model.Lasso()
Ridge_reg = linear_model.Ridge()

In [9]:
# regression_metric:
def r2_score_(y_true, y_pred):
    return 1 - (np.sum(np.square(y_pred - y_true)) / np.sum(np.square(y_true - y_true.mean())))

In [10]:
#Base data preprossing >> feature scaling
ss_scaler.fit(x_train, y_train)
x_train_ss = ss_scaler.transform(x_train)
x_test_ss = ss_scaler.transform(x_test)
# linear_reg
result_linear = cross_val_score(linear_reg, x_train_ss, y_train, cv=5)
linear_reg.fit(x_train_ss, y_train)
y_train_predict = linear_reg.predict(x_train_ss)
y_test_predict = linear_reg.predict(x_test_ss)
r2_score_train_diy = r2_score_(y_train, y_train_predict)
r2_score_test_diy = r2_score_(y_test, y_test_predict)
print('linear_cv_mean:{}/{}'.format(result_linear.mean(), result_linear))
print('linear_explained_variance_score:', metrics.explained_variance_score(y_train, y_train_predict))
print('linear_train_r2_score_diy:', r2_score_train_diy)
print('linear_train_r2_score:{}'.format(linear_reg.score(x_train_ss, y_train)))
print('linear_train_r2_score:{}'.format(metrics.r2_score(y_train,y_train_predict, multioutput='raw_values')))
print('linear_test_r2_score:{}'.format(metrics.r2_score(y_test, y_test_predict, multioutput='raw_values')))
print('linear_test_r2_score_diy:', r2_score_test_diy)
print('\ntrain_mse_loss_diy:', np.mean(np.square(linear_reg.predict(x_train_ss) - y_train)))
print('train_mse_loss:', metrics.mean_squared_error(y_train, y_train_predict))
print('test_mse_loss_diy:', np.mean(np.square(linear_reg.predict(x_test_ss) - y_test)))
print('test_mse_loss:', metrics.mean_squared_error(y_test, y_test_predict))


linear_cv_mean:0.701615716669169/[0.73134096 0.67586937 0.68795073 0.77447125 0.63844627]
linear_explained_variance_score: 0.7445495716932606
linear_train_r2_score_diy: 0.7445495716932605
linear_train_r2_score:0.7445495716932605
linear_train_r2_score:[0.74454957]
linear_test_r2_score:[0.71219288]
linear_test_r2_score_diy: 0.7121928806832926

train_mse_loss_diy: 21.069982302970864
train_mse_loss: 21.069982302970864
test_mse_loss_diy: 25.806326618670038
test_mse_loss: 25.806326618670038


In [11]:
#Lasso regressor
result_lasso = cross_val_score(Lasso_reg, x_train_ss, y_train, cv=5)
Lasso_reg.fit(x_train_ss, y_train)
y_train_predict = Lasso_reg.predict(x_train_ss)
y_test_predict = Lasso_reg.predict(x_test_ss)
r2_score_train_diy = r2_score_(y_train, y_train_predict)
r2_score_test_diy = r2_score_(y_test, y_test_predict)
print('lasso_cv_mean:{}/{}'.format(result_lasso.mean(), result_lasso))
print('lasso_explained_variance_score:', metrics.explained_variance_score(y_train, y_train_predict))
print('lasso_train_r2_score_diy:', r2_score_train_diy)
print('lasso_train_r2_score:{}'.format(Lasso_reg.score(x_train_ss, y_train)))
print('lasso_train_r2_score:{}'.format(metrics.r2_score(y_train,y_train_predict, multioutput='raw_values')))
print('lasso_test_r2_score:{}'.format(metrics.r2_score(y_test, y_test_predict, multioutput='raw_values')))
print('lasso_test_r2_score_diy:', r2_score_test_diy)
print('\ntrain_mse_loss_diy:', np.mean(np.square(Lasso_reg.predict(x_train_ss) - y_train)))
print('train_mse_loss:', metrics.mean_squared_error(y_train, y_train_predict))
print('test_mse_loss_diy:', np.mean(np.square(Lasso_reg.predict(x_test_ss) - y_test)))
print('test_mse_loss:', metrics.mean_squared_error(y_test, y_test_predict))

lasso_cv_mean:0.6502321382996942/[0.77451061 0.62386538 0.59219736 0.6949521  0.56563525]
lasso_explained_variance_score: 0.6609020175881553
lasso_train_r2_score_diy: 0.6609020175881553
lasso_train_r2_score:0.6609020175881553
lasso_train_r2_score:[0.66090202]
lasso_test_r2_score:[0.68087432]
lasso_test_r2_score_diy: 0.6808743208504415

train_mse_loss_diy: 27.969373689252084
train_mse_loss: 27.969373689252084
test_mse_loss_diy: 28.61451630553995
test_mse_loss: 28.61451630553995


In [12]:
#Lasso_CV
Lasso_reg_cv = linear_model.LassoCV(cv=5, random_state=0).fit(x_train_ss, y_train)
#print('all_alphas:', Lasso_reg_cv.alphas_)
print('best_alpha:', Lasso_reg_cv.alpha_)
print('train_score:', Lasso_reg_cv.score(x_train_ss, y_train))
print('test_score:', Lasso_reg_cv.score(x_test_ss, y_test))
print('Lasso_coef:', Lasso_reg_cv.coef_)
Lasso_reg_LarsIC = linear_model.LassoLarsIC(criterion='aic').fit(x_train_ss, y_train)
print('best_alpha:', Lasso_reg_LarsIC.alpha_)
print('train_score:', Lasso_reg_LarsIC.score(x_train_ss, y_train))
print('test_score:', Lasso_reg_LarsIC.score(x_test_ss, y_test))
print('Lasso_coef:', Lasso_reg_LarsIC.coef_)
Lasso_reg_LarsIC = linear_model.LassoLarsIC(criterion='bic').fit(x_train_ss, y_train)
print('best_alpha:', Lasso_reg_LarsIC.alpha_)
print('train_score:', Lasso_reg_LarsIC.score(x_train_ss, y_train))
print('test_score:', Lasso_reg_LarsIC.score(x_test_ss, y_test))
print('Lasso_coef:', Lasso_reg_LarsIC.coef_)

best_alpha: 0.03826943096892057
train_score: 0.7437747764846194
test_score: 0.7145664788674476
Lasso_coef: [-1.20713214  1.25187964 -0.11348253  0.8487891  -1.18583137  2.47284803
 -0.         -2.78899528  2.42661236 -1.88021963 -1.75953682  0.66358063
 -3.94111898]
best_alpha: 8.796285730941115e-06
train_score: 0.7445495543246335
test_score: 0.7122063971911516
Lasso_coef: [-1.31848811  1.37387076 -0.07199789  0.84541371 -1.36624796  2.41345912
  0.         -3.04480124  2.97203584 -2.33473871 -1.81925436  0.70050975
 -3.93731775]
best_alpha: 0.05550872485120724
train_score: 0.6554348804796464
test_score: 0.6780890386459045
Lasso_coef: [ 0.          0.          0.          0.20466294  0.          2.55665571
  0.          0.          0.          0.         -1.13949049  0.
 -3.64815522]


In [13]:
#GridSearch
score_for_Lasso =['explained_variance', 'r2', 'neg_mean_squared_error']

for score in score_for_Lasso:
    Lasso_reg_GSCV = GridSearchCV(Lasso_reg, param_grid=[{'alpha':[10**(-i) for i in range(0, 8)]},
                                                        {'alpha':[0.01*(i) for i in range(1, 10)]},
                                                        {'alpha':[0.0001*(i) for i in range(1, 1000)]}
                                                        ],
                                              scoring=score, cv=5)
    Lasso_reg_GSCV.fit(x_train_ss, y_train)
    print('**:', Lasso_reg_GSCV.scorer_)
    print('Lasso_reg_GSCV.best_params_:', Lasso_reg_GSCV.best_params_)
    print('Lasso_reg_GSCV.best_score_(train):', Lasso_reg_GSCV.best_score_)
    print('Lasso_reg_GSCV.test_score:', metrics.r2_score(y_test, Lasso_reg_GSCV.best_estimator_.predict(x_test_ss)))
    print('Lasso_reg_GSCV.best_estimator_.coef_:', Lasso_reg_GSCV.best_estimator_.coef_)



**: make_scorer(explained_variance_score)
Lasso_reg_GSCV.best_params_: {'alpha': 0.0656}
Lasso_reg_GSCV.best_score_(train): 0.7141497188793662
Lasso_reg_GSCV.test_score: 0.7153532528603115
Lasso_reg_GSCV.best_estimator_.coef_: [-1.12751972  1.16444237 -0.1428565   0.85111535 -1.0568483   2.5153603
 -0.         -2.60559717  2.03684452 -1.55534622 -1.71688793  0.63713131
 -3.94377061]




**: make_scorer(r2_score)
Lasso_reg_GSCV.best_params_: {'alpha': 0.0609}
Lasso_reg_GSCV.best_score_(train): 0.705534892878949
Lasso_reg_GSCV.test_score: 0.7152718357760801
Lasso_reg_GSCV.best_estimator_.coef_: [-1.14119711  1.1794703  -0.13780747  0.85071871 -1.07901591  2.50805807
 -0.         -2.63712275  2.10380751 -1.61116586 -1.72421696  0.64167861
 -3.94331649]
**: make_scorer(mean_squared_error, greater_is_better=False)
Lasso_reg_GSCV.best_params_: {'alpha': 0.0386}
Lasso_reg_GSCV.best_score_(train): -24.293861536325807
Lasso_reg_GSCV.test_score: 0.7145804949159187
Lasso_reg_GSCV.best_estimator_.coef_: [-1.20617002  1.25082261 -0.11383787  0.84881705 -1.18427206  2.47336168
 -0.         -2.78677793  2.42190179 -1.87629296 -1.75902125  0.66326078
 -3.94115093]




In [14]:
#Ridge regressor
result_ridge = cross_val_score(Ridge_reg, x_train_ss, y_train, cv=5)
Ridge_reg.fit(x_train_ss, y_train)
y_train_predict = Ridge_reg.predict(x_train_ss)
y_test_predict = Ridge_reg.predict(x_test_ss)
r2_score_train_diy = r2_score_(y_train, y_train_predict)
r2_score_test_diy = r2_score_(y_test, y_test_predict)
print('ridge_cv_mean:{}/{}'.format(result_lasso.mean(), result_lasso))
print('ridge_explained_variance_score:', metrics.explained_variance_score(y_train, y_train_predict))
print('ridge_train_r2_score_diy:', r2_score_train_diy)
print('ridge_train_r2_score:{}'.format(Ridge_reg.score(x_train_ss, y_train)))
print('ridge_train_r2_score:{}'.format(metrics.r2_score(y_train,y_train_predict, multioutput='raw_values')))
print('ridge_test_r2_score:{}'.format(metrics.r2_score(y_test, y_test_predict, multioutput='raw_values')))
print('ridge_test_r2_score_diy:', r2_score_test_diy)
print('\ntrain_mse_loss_diy:', np.mean(np.square(y_train_predict - y_train)))
print('train_mse_loss:', metrics.mean_squared_error(y_train, y_train_predict))
print('test_mse_loss_diy:', np.mean(np.square(y_test_predict - y_test)))
print('test_mse_loss:', metrics.mean_squared_error(y_test, y_test_predict))

ridge_cv_mean:0.6502321382996942/[0.77451061 0.62386538 0.59219736 0.6949521  0.56563525]
ridge_explained_variance_score: 0.7445261917282574
ridge_train_r2_score_diy: 0.7445261917282574
ridge_train_r2_score:0.7445261917282574
ridge_train_r2_score:[0.74452619]
ridge_test_r2_score:[0.7126988]
ridge_test_r2_score_diy: 0.7126988036813631

train_mse_loss_diy: 21.071910721929193
train_mse_loss: 21.071910721929193
test_mse_loss_diy: 25.76096285503869
test_mse_loss: 25.76096285503869


In [15]:
#Ridge_CV
Ridge_reg_cv = linear_model.RidgeCV(cv=5).fit(x_train_ss, y_train)
print('best_alpha:', Ridge_reg_cv.alpha_)
print('train_score:', Ridge_reg_cv.score(x_train_ss, y_train))
print('test_score:', Ridge_reg_cv.score(x_test_ss, y_test))
print('Lasso_coef:', Ridge_reg_cv.coef_)


best_alpha: 10.0
train_score: 0.7430629361398937
test_score: 0.7154138257715341
Lasso_coef: [-1.20840394  1.21513735 -0.27106532  0.88846807 -1.13520968  2.51450315
 -0.05664465 -2.74400275  2.22270171 -1.63854362 -1.7391258   0.69746321
 -3.77269455]




In [16]:
#GridSearch
score_for_Lasso =['explained_variance', 'r2', 'neg_mean_squared_error']

for score in score_for_Lasso:
    Ridge_reg_GSCV = GridSearchCV(Ridge_reg, param_grid=[{'alpha':[10**(i) for i in range(0, 8)]},
                                                        #{'alpha':[0.01*(i) for i in range(1, 10)]},
                                                        {'alpha':[0.0001*(i) for i in range(1, 1000)]}],
                                              scoring=score, cv=5)
    Ridge_reg_GSCV.fit(x_train_ss, y_train)
    print('**:', Ridge_reg_GSCV.scorer_)
    print('Ridge_reg_GSCV.best_params_:', Ridge_reg_GSCV.best_params_)
    print('Ridge_reg_GSCV.best_score_(train):', Ridge_reg_GSCV.best_score_)
    print('Ridge_reg_GSCV.test_score:', metrics.r2_score(y_test, Ridge_reg_GSCV.best_estimator_.predict(x_test_ss)))
    print('Ridge_reg_GSCV.best_estimator_.coef_:', Ridge_reg_GSCV.best_estimator_.coef_)



**: make_scorer(explained_variance_score)
Ridge_reg_GSCV.best_params_: {'alpha': 10}
Ridge_reg_GSCV.best_score_(train): 0.7172761819619052
Ridge_reg_GSCV.test_score: 0.7154138257715341
Ridge_reg_GSCV.best_estimator_.coef_: [-1.20840394  1.21513735 -0.27106532  0.88846807 -1.13520968  2.51450315
 -0.05664465 -2.74400275  2.22270171 -1.63854362 -1.7391258   0.69746321
 -3.77269455]




**: make_scorer(r2_score)
Ridge_reg_GSCV.best_params_: {'alpha': 10}
Ridge_reg_GSCV.best_score_(train): 0.7083596803914537
Ridge_reg_GSCV.test_score: 0.7154138257715341
Ridge_reg_GSCV.best_estimator_.coef_: [-1.20840394  1.21513735 -0.27106532  0.88846807 -1.13520968  2.51450315
 -0.05664465 -2.74400275  2.22270171 -1.63854362 -1.7391258   0.69746321
 -3.77269455]
**: make_scorer(mean_squared_error, greater_is_better=False)
Ridge_reg_GSCV.best_params_: {'alpha': 10}
Ridge_reg_GSCV.best_score_(train): -24.102523776382274
Ridge_reg_GSCV.test_score: 0.7154138257715341
Ridge_reg_GSCV.best_estimator_.coef_: [-1.20840394  1.21513735 -0.27106532  0.88846807 -1.13520968  2.51450315
 -0.05664465 -2.74400275  2.22270171 -1.63854362 -1.7391258   0.69746321
 -3.77269455]


