### A `votingclassifier/regressor` uses the technique of employing several models on training and

### predict an outcome which is the collective prediction of each models. This technique of using more than one model is called`Ensemble`

In [1]:
import pandas as pd
import numpy as np
from sklearn.ensemble import VotingClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier as KNN
from sklearn.linear_model import LogisticRegression

In [9]:
df = pd.read_csv('../Datasets/Social_Network_Ads.csv')

In [10]:
df.head()

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19,19000,0
1,15810944,Male,35,20000,0
2,15668575,Female,26,43000,0
3,15603246,Female,27,57000,0
4,15804002,Male,19,76000,0


In [11]:
X = df.drop('Purchased',axis=1)
y = df['Purchased']

In [12]:
y

0      0
1      0
2      0
3      0
4      0
      ..
395    1
396    1
397    1
398    0
399    1
Name: Purchased, Length: 400, dtype: int64

In [13]:
from sklearn.preprocessing import OneHotEncoder
from sklearn.compose import ColumnTransformer

categorical_features = ["Gender"]
one_hot = OneHotEncoder()
transformer = ColumnTransformer([("one_hot", 
                                 one_hot, 
                                 categorical_features)],
                                 remainder="passthrough")
X = transformer.fit_transform(X)

In [14]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.2,random_state=42)

## Instantiating the individual models

In [15]:
lr = LogisticRegression(random_state=42)
knn = KNN(n_neighbors=27)
dt = DecisionTreeClassifier(min_samples_leaf=0.13, random_state=42)

In [18]:
classifiers = [('Logistic Regression', lr), ('K Nearest Neighbours', knn), ('Decision Tree', dt)]

In [19]:
for clf_name, clf in classifiers:  
    clf.fit(X_train, y_train)    
    y_pred = clf.predict(X_test)
    accuracy = clf.score(X_test, y_test) 
    print(f"The accuracy of {clf_name} is {accuracy:.3}")

The accuracy of Logistic Regression is 0.738
The accuracy of K Nearest Neighbours is 0.725
The accuracy of Decision Tree is 0.925


In [20]:
vc = VotingClassifier(estimators=classifiers)

In [21]:
vc.fit(X_train, y_train)   

y_pred = vc.predict(X_test)

accuracy = vc.score(X_test, y_test)

print('Voting Classifier: {:.3f}'.format(accuracy))



Voting Classifier: 0.750
