# 作業
請使用不同的資料集，並使用 hyper-parameter search 的方式，看能不能找出最佳的超參數組合

In [1]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.datasets import load_wine
from sklearn.metrics import mean_squared_error
from sklearn.ensemble import GradientBoostingClassifier

In [2]:
wine = load_wine()

X_train, X_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.25, random_state=42)

In [3]:
gb = GradientBoostingClassifier()
gb.fit(X_train, y_train)
y_pred = gb.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print("MSE with defaults:", mse)

MSE with defaults: 0.044444444444444446


In [4]:
n_estimators = [100, 200, 300]
max_depth = [1, 3, 5]
param_grid = dict(n_estimators=n_estimators, max_depth=max_depth)

grid_search = GridSearchCV(gb, param_grid, scoring='neg_mean_squared_error', n_jobs=-1, verbose=1)
grid_result = grid_search.fit(X_train, y_train)

print("Best Accuracy: %f using %s" % (grid_result.best_score_, grid_result.best_params_))

Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:    8.8s finished


Best Accuracy: -0.030075 using {'max_depth': 1, 'n_estimators': 200}


In [5]:
gb = GradientBoostingClassifier(max_depth=5, n_estimators=300)
gb.fit(X_train, y_train)
y_pred = gb.predict(X_test)

mse = mean_squared_error(y_test, y_pred)
print("MSE with best parameters:", mse)

MSE with best parameters: 0.15555555555555556
