In [1]:
import numpy as np 
import warnings 
from sklearn import datasets 
from sklearn import model_selection 
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.naive_bayes import GaussianNB
warnings.filterwarnings('ignore')

In [2]:
iris = datasets.load_iris()
X,y = iris.data[:,1:3] , iris.target
y

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [3]:
clf1 = LogisticRegression(random_state=1)
clf2 = RandomForestClassifier(random_state=1)
clf3 = GaussianNB()

print('5 fold cross validation:\n')

labels = ['Logistic Regression' , 'Random Forest' , 'Naive Bayes']

for clf,label in zip([clf1,clf2,clf3] , labels ):
    scores = model_selection.cross_val_score(clf,X,y,scoring = 'accuracy')
    
    print('Accuracy: %0.2f (+/- %0.2f) [%s]'
          %(scores.mean(), scores.std() , label))

5 fold cross validation:

Accuracy: 0.95 (+/- 0.04) [Logistic Regression]
Accuracy: 0.94 (+/- 0.04) [Random Forest]
Accuracy: 0.91 (+/- 0.04) [Naive Bayes]


In [4]:
voting_clf_hard = VotingClassifier(estimators = [(labels[0],clf1),
                                                 (labels[1],clf2),
                                                 (labels[2],clf3)],
                                   voting = 'hard')
voting_clf_hard

VotingClassifier(estimators=[('Logistic Regression',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='auto',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=1, solver='lbfgs',
                                                 tol=0.0001, verbose=0,
                                                 warm_start=False)),
                             ('Random Forest',
                              RandomForestClassifier(bootstrap=True,
                                                     ccp_alpha=0...
                                                     max_leaf_nodes=None,
                  

In [5]:
voting_clf_soft = VotingClassifier(estimators = [(labels[0],clf1),
                                                 (labels[1],clf2),
                                                 (labels[2],clf3)],
                                   voting = 'soft')
voting_clf_soft

VotingClassifier(estimators=[('Logistic Regression',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='auto',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=1, solver='lbfgs',
                                                 tol=0.0001, verbose=0,
                                                 warm_start=False)),
                             ('Random Forest',
                              RandomForestClassifier(bootstrap=True,
                                                     ccp_alpha=0...
                                                     max_leaf_nodes=None,
                  

In [6]:
labels_new = ['Logistic Regression','Random Forest','Naive Bayes',
             'Voting_classifier_hard' , 'Voting_Classifier_soft']

for (clf,label) in zip([clf1,clf2,clf3,voting_clf_hard,
                        voting_clf_soft] , labels_new):
    scores = model_selection.cross_val_score(clf,X,y,cv = 5,
                                            scoring = 'accuracy')
    print('Accuracy: %0.2f (+/- %0.2f) [%s]'
          %(scores.mean(), scores.std() , label))

Accuracy: 0.95 (+/- 0.04) [Logistic Regression]
Accuracy: 0.94 (+/- 0.04) [Random Forest]
Accuracy: 0.91 (+/- 0.04) [Naive Bayes]
Accuracy: 0.95 (+/- 0.04) [Voting_classifier_hard]
Accuracy: 0.95 (+/- 0.03) [Voting_Classifier_soft]
