### 作業
請使用不同的資料集，並使用 hyper-parameter search 的方式，看能不能找出最佳的超參數組合

In [2]:
from sklearn import datasets, metrics
from sklearn.model_selection import train_test_split, KFold, GridSearchCV

from sklearn.ensemble import GradientBoostingRegressor


In [8]:
iris = datasets.load_iris()
iris.data.shape

(150, 4)

In [5]:
x_train, x_test, y_train, y_test = train_test_split(iris.data,iris.target, test_size = 0.25, random_state = 4)

clf = GradientBoostingRegressor(random_state=7)

clf.fit(x_train, y_train)

y_pred = clf.predict(x_test)
print(y_pred)
print(metrics.mean_squared_error(y_test, y_pred))


[2.00771455e+00 4.57901818e-04 2.00180990e+00 2.00428691e+00
 2.02537071e+00 1.02253701e+00 1.79912017e+00 4.57901818e-04
 4.57901818e-04 1.68000207e+00 2.94893942e-03 4.13896404e-04
 2.53943941e-03 1.01286880e+00 2.01696157e+00 6.27887053e-04
 1.10755570e+00 6.27887053e-04 4.13896404e-04 1.95869142e+00
 6.71892466e-04 1.99402673e+00 1.19207320e+00 4.57901818e-04
 6.27887053e-04 4.57901818e-04 2.25439703e-02 4.57901818e-04
 4.57901818e-04 2.01216457e+00 1.00407307e+00 6.27887053e-04
 1.97580112e+00 2.53943941e-03 1.00493484e+00 1.99884720e+00
 2.02727308e+00 9.96535239e-01]
0.020919619043415945


In [10]:
n_estimators = [20, 40, 60, 80]
max_depth = [1, 3, 5, 7, 9]
param_grid = dict(n_estimators=n_estimators, max_depth=max_depth)

grid_search = GridSearchCV(clf, param_grid, scoring="neg_mean_squared_error",n_jobs=-1, verbose=1)
grid_result = grid_search.fit(x_train, y_train)


Fitting 5 folds for each of 20 candidates, totalling 100 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:    9.7s
[Parallel(n_jobs=-1)]: Done 100 out of 100 | elapsed:   10.3s finished


In [11]:
print("Best Accuracy: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Best Accuracy: -0.035951 using {'max_depth': 3, 'n_estimators': 40}


In [12]:
grid_result.best_params_

{'max_depth': 3, 'n_estimators': 40}

In [14]:
clf_bestparam = GradientBoostingRegressor(max_depth=grid_result.best_params_['max_depth'],
                                         n_estimators=grid_result.best_params_['n_estimators'])
clf_bestparam.fit(x_train, y_train)

y_pred = clf_bestparam.predict(x_test)
y_pred

array([1.99650101, 0.01922073, 1.99650101, 1.99722349, 1.99722349,
       1.0132917 , 1.72038349, 0.01922073, 0.01922073, 1.65987324,
       0.0228515 , 0.01922073, 0.0228515 , 1.01112486, 1.94013452,
       0.01922073, 1.0972971 , 0.01922073, 0.01922073, 1.88153699,
       0.01922073, 1.98099196, 1.14343746, 0.01922073, 0.01922073,
       0.01922073, 0.02522306, 0.01922073, 0.01922073, 1.9867855 ,
       1.00725313, 0.01922073, 1.99650101, 0.0228515 , 1.01231616,
       1.98099196, 1.98099196, 0.99619793])

In [15]:
print(metrics.mean_squared_error(y_test, y_pred))

0.018197414174284852
