In [None]:
# --- 第 1 部分 ---
# 載入函式庫與資料集
from sklearn.datasets import load_breast_cancer
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold
from sklearn import metrics
import numpy as np
bc = load_breast_cancer()

train_x, train_y = bc.data[:400], bc.target[:400]
test_x, test_y = bc.data[400:], bc.target[400:]


In [None]:
# --- 第 2 部分 ---
# 建立基學習器與超學習器
# 將基學習器放到串列中
base_learners = []

knn = KNeighborsClassifier(n_neighbors=2)
base_learners.append(knn)

dtr = DecisionTreeClassifier(max_depth=4, random_state=2)
base_learners.append(dtr)

mlpc = MLPClassifier(hidden_layer_sizes =(100, ), random_state=2)
base_learners.append(mlpc)

meta_learner = LogisticRegression()


In [None]:
# --- 第 3 部分 ---
# 產生訓練超學習器用的中繼資料

# 建立變數以儲存中繼資料及其標籤
meta_data = np.zeros((len(base_learners), len(train_x)))
meta_targets = np.zeros(len(train_x))

# 進行交叉驗證
KF = KFold(n_splits=5)
index = 0
for train_indices, test_indices in KF.split(train_x):
    # 前K-1折是訓練資料集
    # 第K折是驗證資料集
    for i in range(len(base_learners)):
        learner = base_learners[i]
        
        learner.fit(train_x[train_indices], train_y[train_indices])
        p = learner.predict_proba(train_x[test_indices])[:,0]

        meta_data[i][index:index + len(test_indices)] = p

    meta_targets[index:index + 
                 len(test_indices)] = train_y[test_indices]
    index += len(test_indices)

# 將中繼資料轉置為超學習器需要的形式
meta_data = meta_data.transpose()


In [None]:
# --- 第 4 部分 ---
# 產生超學習器的測試資料
test_meta_data = np.zeros((len(base_learners), len(test_x)))
base_acc = []
for i in range(len(base_learners)):
    b = base_learners[i]
    b.fit(train_x, train_y)
    predictions = b.predict_proba(test_x)[:,0]
    test_meta_data[i] = predictions

    acc = metrics.accuracy_score(test_y, b.predict(test_x))
    base_acc.append(acc)
test_meta_data = test_meta_data.transpose()


In [None]:
# --- 第 5 部分 ---
# 訓練超學習器
meta_learner.fit(meta_data, meta_targets)
ensemble_predictions = meta_learner.predict(test_meta_data)

acc = metrics.accuracy_score(test_y, ensemble_predictions)


In [None]:
# --- 第 6 部分 ---
# 顯示結果
print('Acc  Name')
print('-'*20)
for i in range(len(base_learners)):
    learner = base_learners[i]
    print(f'{base_acc[i]:.2f} {learner.__class__.__name__}')
print(f'{acc:.2f} Ensemble')
