# Ensemble learning using the voting classifier

## Setup

In [1]:
import pandas as pd
from sklearn.datasets import make_classification, make_regression
from sklearn.model_selection import train_test_split
                                     
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

from sklearn.naive_bayes import GaussianNB
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import VotingClassifier

from collections import Counter

## Preparing the dataset for classification 

In [2]:
X, y = make_classification(n_samples=500, 
                           n_features=10,
                           random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, 
                                                    stratify=y, 
                                                    random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [3]:
Counter(y)

Counter({1: 250, 0: 250})

## Fitting the models

In [4]:
clf_list = [('decision tree', DecisionTreeClassifier()),
            ('logistic regression', LogisticRegression()),
            ('knn', KNeighborsClassifier()),
            ('naive bayes classifier', GaussianNB())]

In [5]:
for model_tuple in clf_list:
    model = model_tuple[1]
    if 'random_state' in model.get_params().keys():
        model.set_params(random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_pred, y_test)
    print(f"{model_tuple[0]}'s accuracy: {acc:.2f}")

decision tree's accuracy: 0.86
logistic regression's accuracy: 0.85
knn's accuracy: 0.86
naive bayes classifier's accuracy: 0.87


## Using the `VotingClassifier`

In [6]:
voting_clf = VotingClassifier(clf_list, voting='hard')
voting_clf.fit(X_train, y_train)
y_pred = voting_clf.predict(X_test)
print(f"Voting Classifier's accuracy: {accuracy_score(y_pred, y_test):.2f}")

Voting Classifier's accuracy: 0.88


In [7]:
voting_clf = VotingClassifier(clf_list, voting='soft')
voting_clf.fit(X_train, y_train)
y_pred = voting_clf.predict(X_test)
print(f"Voting Classifier's accuracy: {accuracy_score(y_pred, y_test):.2f}")

Voting Classifier's accuracy: 0.85


## BONUS: `VotingRegressor`

In [8]:
# load the libraries 

from sklearn.datasets import make_regression
                                     
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression
from sklearn.ensemble import VotingRegressor

from sklearn.metrics import mean_squared_error


In [9]:
# prepare the dateset 
X, y = make_regression(n_samples=500, 
                       n_features=10,
                       random_state=42)

X_train, X_test, y_train, y_test = train_test_split(X, y, 
                                                    test_size=0.2, 
                                                    random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

In [10]:
# define the list of estimators 
est_list = [('decision tree', DecisionTreeRegressor()),
            ('linear regression', LinearRegression()),
            ('knn', KNeighborsRegressor())
            ]

In [11]:
for model_tuple in est_list:
    model = model_tuple[1]
    if 'random_state' in model.get_params().keys():
        model.set_params(random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    mse = mean_squared_error(y_pred, y_test)
    print(f"{model_tuple[0]}'s MSE: {mse:.2f}")

decision tree's MSE: 10651.04
linear regression's MSE: 0.00
knn's MSE: 4697.50


In [12]:
voting_reg = VotingRegressor(est_list)
voting_reg.fit(X_train, y_train)
y_pred = voting_reg.predict(X_test)
print(f"Voting Classifier's accuracy: {mean_squared_error(y_pred, y_test):.2f}")

Voting Classifier's accuracy: 2322.66
