In [1]:
from sklearn.linear_model import LogisticRegression
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import VotingClassifier

from sklearn.metrics import accuracy_score
from sklearn import datasets
from sklearn.model_selection import train_test_split

import warnings
warnings.filterwarnings("ignore")


In [2]:
brc = datasets.load_breast_cancer()

In [3]:
y = brc.target
x = brc.data

In [4]:
x.shape, y.shape

((569, 30), (569,))

In [5]:
xtrain, xtest, ytrain, ytest = train_test_split(x,y, random_state=9)

In [6]:
xtrain.shape, xtest.shape, ytrain.shape, ytest.shape

((426, 30), (143, 30), (426,), (143,))

In [7]:
lg = LogisticRegression()
sv = SVC()
nb = GaussianNB()
dt = DecisionTreeClassifier()

# Let's check the accuracy of individul classifiers.

In [8]:
for cls in (lg,sv,dt,nb):
    cls.fit(xtrain, ytrain)
    ypred = cls.predict(xtest)
    print(cls.__class__.__name__, accuracy_score(ytest, ypred))

LogisticRegression 0.958041958041958
SVC 0.9090909090909091
DecisionTreeClassifier 0.958041958041958
GaussianNB 0.9440559440559441


# Voting Classifier

In [9]:
vote = VotingClassifier(
    estimators = [("log", lg), ("svc", sv), ("Dt", dt), ("nG", nb)],
    voting='hard',
    weights=None,
    n_jobs=None,
    flatten_transform=True,
)

# Let's check accuracy of classifiers including Voing classifier

In [10]:
for clf in (lg, sv, dt, nb, vote):
    clf.fit(xtrain, ytrain)
    ypred = clf.predict(xtest)
    print(clf.__class__.__name__, accuracy_score(ytest, ypred) )

LogisticRegression 0.958041958041958
SVC 0.9090909090909091
DecisionTreeClassifier 0.9440559440559441
GaussianNB 0.9440559440559441
VotingClassifier 0.972027972027972


# Above is the example of Hard Voting, let's try Soft Voting

In [11]:
vote = VotingClassifier(
    estimators = [("log", lg), ("svc", sv), ("Dt", dt), ("nG", nb)],
    voting='soft',
    weights=None,
    n_jobs=None,
    flatten_transform=True,
)

In [12]:
for clf in (lg, sv, dt, nb, vote):
    clf.fit(xtrain, ytrain)
    ypred = clf.predict(xtest)
    print(clf.__class__.__name__, accuracy_score(ytest, ypred) )

LogisticRegression 0.958041958041958
SVC 0.9090909090909091
DecisionTreeClassifier 0.972027972027972
GaussianNB 0.9440559440559441


AttributeError: predict_proba is not available when  probability=False

In [13]:
# Enable Probablilty in ensembles

In [15]:
sv = SVC(probability=True)

In [16]:
vote = VotingClassifier(
    estimators = [("log", lg), ("svc", sv), ("Dt", dt), ("nG", nb)],
    voting='soft',
    weights=None,
    n_jobs=None,
    flatten_transform=True,
)

In [18]:
for clf in (lg, sv, dt, nb, vote):
    clf.fit(xtrain, ytrain)
    ypred = clf.predict(xtest)
    print(clf.__class__.__name__, accuracy_score(ytest, ypred) )

LogisticRegression 0.958041958041958
SVC 0.9090909090909091
DecisionTreeClassifier 0.9440559440559441
GaussianNB 0.9440559440559441
VotingClassifier 0.9790209790209791
