#### 參考資料
完整調參數攻略-如何使用 Python 調整梯度提升機的超參數     
[complete-guide-parameter-tuning-gradient-boosting-gbm](https://www.analyticsvidhya.com/blog/2016/02/complete-guide-parameter-tuning-gradient-boosting-gbm-python/)

In [1]:
from sklearn import datasets, metrics
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.model_selection import train_test_split

In [2]:
# 讀取鳶尾花資料集
iris = datasets.load_iris()

# 切分訓練集/測試集
x_train, x_test, y_train, y_test = train_test_split(iris.data, iris.target, test_size=0.25, random_state=4)

# 建立模型
clf = GradientBoostingClassifier()

# 訓練模型
clf.fit(x_train, y_train)

# 預測測試集
y_pred = clf.predict(x_test)

In [3]:
acc = metrics.accuracy_score(y_test, y_pred)
print("Acuuracy: ", acc)

Acuuracy:  0.9736842105263158


### 作業
目前已經學過許多的模型，相信大家對整體流程應該比較掌握了，這次作業請改用**手寫辨識資料集**，步驟流程都是一樣的，請試著自己撰寫程式碼來完成所有步驟

In [4]:
from sklearn import datasets, metrics
digits = datasets.load_digits()

In [5]:
print('Target:', digits.target_names)
print('Data shape:', digits.data.shape)
print('Image shape:', digits.images.shape)

Target: [0 1 2 3 4 5 6 7 8 9]
Data shape: (1797, 64)
Image shape: (1797, 8, 8)


In [6]:
# 切分訓練集/測試集
x_train, x_test, y_train, y_test = train_test_split(digits.data, digits.target, test_size=0.25, random_state=4)

In [7]:
# 建立模型
clf = GradientBoostingClassifier()

# 訓練模型
clf.fit(x_train, y_train)

# 預測測試集
y_pred = clf.predict(x_test)

acc = metrics.accuracy_score(y_test, y_pred)
print("Accuracy: ", acc)

metrics.confusion_matrix(y_test, y_pred)

Accuracy:  0.9666666666666667


array([[47,  0,  0,  0,  0,  0,  0,  0,  1,  0],
       [ 1, 37,  0,  1,  0,  0,  0,  0,  0,  0],
       [ 1,  0, 47,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  1,  0, 40,  0,  0,  0,  0,  2,  0],
       [ 0,  0,  0,  0, 40,  2,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0, 47,  0,  0,  0,  2],
       [ 0,  0,  0,  0,  0,  0, 38,  0,  1,  0],
       [ 0,  0,  0,  0,  0,  0,  0, 49,  0,  0],
       [ 0,  1,  0,  0,  0,  0,  0,  0, 42,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  1,  1, 48]], dtype=int64)

In [8]:
# 建立模型
clf = GradientBoostingClassifier(subsample=0.8)

# 訓練模型
clf.fit(x_train, y_train)

# 預測測試集
y_pred = clf.predict(x_test)

acc = metrics.accuracy_score(y_test, y_pred)
print("Accuracy: ", acc)

metrics.confusion_matrix(y_test, y_pred)

Accuracy:  0.9777777777777777


array([[47,  0,  0,  0,  0,  0,  0,  0,  1,  0],
       [ 1, 37,  0,  1,  0,  0,  0,  0,  0,  0],
       [ 0,  0, 48,  0,  0,  0,  0,  0,  0,  0],
       [ 0,  1,  0, 41,  0,  0,  0,  0,  0,  1],
       [ 0,  0,  0,  0, 41,  1,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0, 48,  0,  0,  0,  1],
       [ 0,  0,  0,  0,  0,  0, 38,  0,  1,  0],
       [ 0,  0,  0,  0,  0,  0,  0, 49,  0,  0],
       [ 0,  1,  0,  0,  0,  0,  0,  0, 42,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  1, 49]], dtype=int64)

In [9]:
# 建立模型
clf = GradientBoostingClassifier(n_estimators=200, learning_rate=0.5)

# 訓練模型
clf.fit(x_train, y_train)

# 預測測試集
y_pred = clf.predict(x_test)

acc = metrics.accuracy_score(y_test, y_pred)
print("Accuracy: ", acc)

metrics.confusion_matrix(y_test, y_pred)

Accuracy:  0.9711111111111111


array([[48,  0,  0,  0,  0,  0,  0,  0,  0,  0],
       [ 1, 36,  1,  1,  0,  0,  0,  0,  0,  0],
       [ 0,  0, 47,  1,  0,  0,  0,  0,  0,  0],
       [ 0,  0,  0, 40,  0,  0,  1,  0,  1,  1],
       [ 0,  0,  0,  0, 41,  1,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0, 48,  0,  0,  0,  1],
       [ 0,  1,  0,  0,  0,  0, 37,  0,  1,  0],
       [ 0,  0,  0,  0,  0,  0,  0, 49,  0,  0],
       [ 0,  0,  1,  0,  0,  0,  0,  0, 42,  0],
       [ 0,  0,  0,  0,  0,  0,  0,  0,  1, 49]], dtype=int64)