In [48]:
from sklearn.datasets import make_moons
from sklearn.ensemble import RandomForestClassifier, VotingClassifier, BaggingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier

In [34]:
X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [35]:
# ensemble learning can be used to get better accuracy than a simgle model
# hard voting - aggregate the predictions of multiple models 
# soft voting - predict class with highest probability averaged over all classifiers

In [43]:
log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC(probability=True)

In [44]:
voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='hard'
)
voting_clf_s = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='soft'
)

In [46]:
for clf in (log_clf, rnd_clf, svm_clf, voting_clf, voting_clf_s):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.888
SVC 0.896
VotingClassifier 0.896
VotingClassifier 0.912


In [47]:
# bagging and pasting are also ensemble methods
# this uses the same algorithm for each predictor but uses different subsets of the data
# bagging - done with replacement
# pasting - done without replacement

In [59]:
bag_clf = BaggingClassifier(
    DecisionTreeClassifier(),
    n_estimators=500,
    max_samples=100,
    bootstrap=True,# True = bagging, False = pasting
    n_jobs=-1,
    oob_score=True # out of bag instances, data that is not used in training and can be used for evaluation
)

In [60]:
bag_clf.fit(X_train, y_train)

BaggingClassifier(base_estimator=DecisionTreeClassifier(), max_samples=100,
                  n_estimators=500, n_jobs=-1, oob_score=True)

In [61]:
y_pred = bag_clf.predict(X_test)

In [62]:
accuracy_score(y_test, y_pred)

0.912

In [65]:
# out of bag can be used instead of validation
bag_clf.oob_score_

0.9226666666666666