In [2]:
import numpy as np

from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

cancer_data = load_breast_cancer()

X_data = cancer_data.data
y_label = cancer_data.target

X_train, X_test, y_train, y_test = train_test_split(X_data, y_label, test_size=0.2, random_state=0)

In [3]:
# Create individual ML models.
knn_clf = KNeighborsClassifier(n_neighbors=4)
rf_clf = RandomForestClassifier(n_estimators=100, random_state=0)
dt_clf = DecisionTreeClassifier()
ada_clf = AdaBoostClassifier(n_estimators=100)

# Train from the dataset created from the individual ML models above. (final model)
lr_final = LogisticRegression(C=10)

In [4]:
# Train individual models
knn_clf.fit(X_train, y_train)
rf_clf.fit(X_train, y_train)
dt_clf.fit(X_train, y_train)
ada_clf.fit(X_train, y_train)

knn_pred = knn_clf.predict(X_test)
rf_pred = rf_clf.predict(X_test)
dt_pred = dt_clf.predict(X_test)
ada_pred = ada_clf.predict(X_test)

In [5]:
print("KNN 정확도: {0:.4f}".format(accuracy_score(y_test, knn_pred)))
print("랜덤포레스트 정확도: {0:.4f}".format(accuracy_score(y_test, rf_pred)))
print("결정 트리 정확도: {0:.4f}".format(accuracy_score(y_test, dt_pred)))
print("에이다부스트 정확도: {0:.4f}".format(accuracy_score(y_test, ada_pred)))

KNN 정확도: 0.9211
랜덤포레스트 정확도: 0.9649
결정 트리 정확도: 0.9035
에이다부스트 정확도: 0.9561


In [6]:
# Now, create new dataset from the predictions
pred = np.array([knn_pred, rf_pred, dt_pred, ada_pred])
print(pred.shape)

# Use transpose to change the positions of the columns. Make each algorithm's prediction result as the features.
pred = np.transpose(pred)
print(pred.shape)

(4, 114)
(114, 4)


In [8]:
lr_final.fit(pred, y_test)
final = lr_final.predict(pred)

print('최종 메타 모델의 예측 정확도: {0:.4f}'.format(accuracy_score(y_test, final)))

최종 메타 모델의 예측 정확도: 0.9737


The stacking has improved the prediction accuracy to 97.37%. However, this kind of method does not guarantee an improved prediction. Also, it can cause overfitting of the model.