In [None]:
# --- 第 1 部分 ---
# 載入函式庫與資料集
from copy import deepcopy
from sklearn.datasets import load_breast_cancer
from sklearn.tree import DecisionTreeClassifier
from sklearn import metrics
import numpy as np
bc = load_breast_cancer()
train_size = 400
train_x, train_y = bc.data[:train_size], bc.target[:train_size]
test_x, test_y = bc.data[train_size:], bc.target[train_size:]
np.random.seed(123456)


In [None]:
# --- 第 2 部分 ---
ensemble_size = 100
base_classifier = DecisionTreeClassifier(max_depth=1)

# 建立訓練資料集的索引串列
indices = [x for x in range(train_size)]

# 建立弱學習器串列
base_learners = []

# 設定初始權重與誤差
data_weights = np.zeros(train_size) + 1/train_size
learners_errors = np.zeros(ensemble_size)
learners_weights = np.zeros(ensemble_size)


In [None]:
# --- 第 3 部分 ---
# 訓練弱學習器
for i in range(ensemble_size):
    # 複製弱學習器
    weak_learner = deepcopy(base_classifier)

    # 加權抽樣
    # 每筆資料抽到的機率即其權重
    data_indices = np.random.choice(indices, train_size, p=data_weights)
    sample_x, sample_y = train_x[data_indices], train_y[data_indices]

    # 訓練、評估弱學習器
    weak_learner.fit(sample_x, sample_y)
    predictions = weak_learner.predict(train_x)
    errors = predictions != train_y
    corrects = predictions == train_y

    # 儲存學習器
    base_learners.append(weak_learner)

    # 計算加權錯誤
    weighted_errors = data_weights*errors
    # 計算平均加權錯誤
    learner_error = np.mean(weighted_errors)
    # 計算弱學習器權重
    learner_weight = np.log((1-learner_error)/learner_error)/2
    # 儲存計算結果
    learners_errors[i] = learner_error
    learners_weights[i] = learner_weight

    # 更新資料權重
    data_weights[errors] = np.exp(data_weights[errors] * 
                                  learner_weight)
    data_weights[corrects] = np.exp(-data_weights[corrects] * 
                                    learner_weight)
    # 權重正規化
    data_weights = data_weights / sum(data_weights)


In [None]:
# --- 第 4 部分 ---
# 集成
ensemble_predictions = []
for learner, weight in zip(base_learners, learners_weights):
    # 計算加權後的預測值
    prediction = learner.predict(test_x)
    ensemble_predictions.append(prediction * weight)

# 輸出預測分類
ensemble_predictions = np.mean(ensemble_predictions, 
                               axis = 0) >= 0.5

ensemble_acc = metrics.accuracy_score(test_y, 
                                      ensemble_predictions)

# 顯示準確率
print('Boosting: %.2f' % ensemble_acc)
