In [1]:
from sklearn import datasets
data_breast_cancer = datasets.load_breast_cancer(as_frame=True)

In [4]:
X = data_breast_cancer["data"]
y = data_breast_cancer["target"]

In [10]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

In [7]:
from sklearn.tree import DecisionTreeClassifier
tree_clf = DecisionTreeClassifier()

In [8]:
from sklearn.linear_model import LogisticRegression
log_clf = LogisticRegression()

In [9]:
from sklearn.neighbors import KNeighborsClassifier
knn_clf = KNeighborsClassifier()

In [12]:
from sklearn.ensemble import VotingClassifier
voting_clf_hard = VotingClassifier(estimators=[('tr', tree_clf), ('lr', log_clf), ('knn', knn_clf)], voting='hard')
voting_clf_soft = VotingClassifier(estimators=[('tr', tree_clf), ('lr', log_clf), ('knn', knn_clf)], voting='soft')

In [13]:
X_train_bc = X_train[["mean texture", "mean symmetry"]]
X_test_bc = X_test[["mean texture", "mean symmetry"]]

In [25]:
acc_vote = []

In [26]:
from sklearn.metrics import accuracy_score
for clf in (tree_clf, log_clf, knn_clf, voting_clf_hard, voting_clf_soft):
    clf.fit(X_train_bc, y_train)
    y_pred_train = clf.predict(X_train_bc)
    y_pred_test = clf.predict(X_test_bc)
    acc_train = accuracy_score(y_train, y_pred_train)
    acc_test = accuracy_score(y_test, y_pred_test)
    acc_vote.append((acc_train, acc_test))

In [23]:
clfs_vote = [tree_clf, log_clf, knn_clf, voting_clf_hard, voting_clf_soft]
clfs_vote

[DecisionTreeClassifier(),
 LogisticRegression(),
 KNeighborsClassifier(),
 VotingClassifier(estimators=[('tr', DecisionTreeClassifier()),
                              ('lr', LogisticRegression()),
                              ('knn', KNeighborsClassifier())]),
 VotingClassifier(estimators=[('tr', DecisionTreeClassifier()),
                              ('lr', LogisticRegression()),
                              ('knn', KNeighborsClassifier())],
                  voting='soft')]

In [27]:
acc_vote

[(1.0, 0.7456140350877193),
 (0.6989010989010989, 0.7543859649122807),
 (0.7582417582417582, 0.7105263157894737),
 (0.8285714285714286, 0.7368421052631579),
 (0.9626373626373627, 0.7543859649122807)]

In [28]:
import pickle
with open('acc_vote.pkl', 'wb') as fp:
    pickle.dump(acc_vote, fp)

In [29]:
with open('vote.pkl', 'wb') as fp:
    pickle.dump(clfs_vote, fp)

In [31]:
from sklearn.ensemble import BaggingClassifier
from sklearn.tree import DecisionTreeClassifier

bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=30, bootstrap=True)
bag_clf_05 = BaggingClassifier(DecisionTreeClassifier(), max_samples=0.5, n_estimators=30, bootstrap=True)
past_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=30, bootstrap=False)
past_clf_05 = BaggingClassifier(DecisionTreeClassifier(), max_samples=0.5, n_estimators=30, bootstrap=False)

In [32]:
from sklearn.ensemble import RandomForestClassifier
rf_clf = RandomForestClassifier(n_estimators=30)

In [33]:
from sklearn.ensemble import AdaBoostClassifier
ab_clf = AdaBoostClassifier(n_estimators=30)

In [35]:
from sklearn.ensemble import GradientBoostingClassifier
gb_clf = GradientBoostingClassifier(n_estimators=30)

In [37]:
acc_bag = []

In [38]:
for clf in (bag_clf, bag_clf_05, past_clf, past_clf_05, rf_clf, ab_clf, gb_clf):
    clf.fit(X_train_bc, y_train)
    y_pred_train = clf.predict(X_train_bc)
    y_pred_test = clf.predict(X_test_bc)
    acc_train = accuracy_score(y_train, y_pred_train)
    acc_test = accuracy_score(y_test, y_pred_test)
    acc_bag.append((acc_train, acc_test))

In [39]:
clfs_bag = (bag_clf, bag_clf_05, past_clf, past_clf_05, rf_clf, ab_clf, gb_clf)

In [40]:
acc_bag

[(0.9956043956043956, 0.7982456140350878),
 (0.9186813186813186, 0.8333333333333334),
 (1.0, 0.7456140350877193),
 (0.9736263736263736, 0.8157894736842105),
 (1.0, 0.7631578947368421),
 (0.7868131868131868, 0.8245614035087719),
 (0.7978021978021979, 0.8070175438596491)]

In [41]:
clfs_bag

(BaggingClassifier(base_estimator=DecisionTreeClassifier(), n_estimators=30),
 BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=0.5,
                   n_estimators=30),
 BaggingClassifier(base_estimator=DecisionTreeClassifier(), bootstrap=False,
                   n_estimators=30),
 BaggingClassifier(base_estimator=DecisionTreeClassifier(), bootstrap=False,
                   max_samples=0.5, n_estimators=30),
 RandomForestClassifier(n_estimators=30),
 AdaBoostClassifier(n_estimators=30),
 GradientBoostingClassifier(n_estimators=30))

In [42]:
with open('acc_bag.pkl', 'wb') as fp:
    pickle.dump(acc_bag, fp)

In [43]:
with open('bag.pkl', 'wb') as fp:
    pickle.dump(clfs_bag, fp)

In [44]:
fea_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=30, max_samples=0.5, max_features=2, bootstrap_features=False, bootstrap=True)

In [45]:
fea_clf.fit(X_train, y_train)
y_pred_train = fea_clf.predict(X_train)
y_pred_test = fea_clf.predict(X_test)
acc_train = accuracy_score(y_train, y_pred_train)
acc_test = accuracy_score(y_test, y_pred_test)

In [None]:
acc_fea = 