# Ensemble
A group of predictors is called an Ensemble.

In [1]:
from sklearn.model_selection import train_test_split
from sklearn.datasets import make_moons

X, y = make_moons(n_samples=500, noise=0.30, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [2]:
# Creates and trains a voting classifier composed of three diverse classifiers
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

log_clf = LogisticRegression()
rnd_clf = RandomForestClassifier()
svm_clf = SVC()

voting_clf = VotingClassifier(
    estimators=[('lr', log_clf), ('rf', rnd_clf), ('svc', svm_clf)],
    voting='hard'
)
voting_clf.fit(X_train, y_train)


VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('rf', RandomForestClassifier()), ('svc', SVC())])

In [4]:
# Obtain the accuracy 
from sklearn.metrics import accuracy_score
for clf in (log_clf, rnd_clf, svm_clf, voting_clf):
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print(clf.__class__.__name__, accuracy_score(y_test, y_pred))

LogisticRegression 0.864
RandomForestClassifier 0.904
SVC 0.896
VotingClassifier 0.904


### Soft Voting
If all classifiers are able to estimate class probabilities (i.e., they have a pre dict_proba() method), then you can tell scikit-learn to predict the class with the highest probability, averaged over all the individual classifiers. <br>
This is called Soft Voting.<br>
Soft Voting often achieves higher performance than hard voting because it gives more weight to highly confident votes. <br>
Simpyl change voting='soft'. SVC class does not have this by default, so we must use hard.

# Bagging and Pasting

