### Ensemble classifier - soft voting

In [46]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import accuracy_score
import numpy as np

iris = datasets.load_iris()
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.4, random_state=0)

logistic = LogisticRegression()
rndforest = RandomForestClassifier()
svc = SVC(probability=True) # probability=True will add predict_proba() method needed for soft voting

classifiers = [('LogisticRegression', logistic), ('RandomForest', rndforest), ('SVC', svc)]

# Individual classifiers
for name, clf in classifiers:
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print("{} score: {}".format(name, accuracy_score(y_test, y_pred)))

# Ensemble classifier - voting soft
voting_clf_soft = VotingClassifier(
    estimators=classifiers,
    voting = 'soft',
    n_jobs = -1)
voting_clf_soft.fit(X_train, y_train)
y_pred = voting_clf_soft.predict(X_test)
print("Ensemble Classifier (soft voting) score: {}".format(accuracy_score(y_test, y_pred)))



LogisticRegression score: 0.9166666666666666
RandomForest score: 0.9333333333333333
SVC score: 0.95
Ensemble Classifier (soft voting) score: 0.9333333333333333


### Ensemble classifier - hard voting

In [47]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn import datasets
from sklearn.metrics import accuracy_score
import numpy as np

iris = datasets.load_iris()
X_train, X_test, y_train, y_test = train_test_split(
    iris.data, iris.target, test_size=0.4, random_state=0)

logistic = LogisticRegression()
rndforest = RandomForestClassifier()
svc = SVC()

classifiers = [('LogisticRegression', logistic), ('RandomForest', rndforest), ('SVC', svc)]

# Individual classifiers
for name, clf in classifiers:
    clf.fit(X_train, y_train)
    y_pred = clf.predict(X_test)
    print("{} score: {}".format(name, accuracy_score(y_test, y_pred)))

# Ensemble classifier - voting hard
voting_clf_hard = VotingClassifier(
    estimators=classifiers,
    voting = 'hard',
    n_jobs = -1)
voting_clf_hard.fit(X_train, y_train)
y_pred = voting_clf_hard.predict(X_test)
print("Ensemble Classifier (hard voting) score: {}".format(accuracy_score(y_test, y_pred)))



LogisticRegression score: 0.9166666666666666
RandomForest score: 0.9166666666666666
SVC score: 0.95
Ensemble Classifier (hard voting) score: 0.9666666666666667
