##### Voting Classification

##### Dataset

Download `Social_Network_Ads.csv`

Location: https://mitu.co.in/dataset

Import the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

load the dataset

In [2]:
df = pd.read_csv('datasets/Social_Network_Ads.csv')
df

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19.0,19000.0,0
1,15810944,Male,35.0,20000.0,0
2,15668575,Female,26.0,43000.0,0
3,15603246,Female,27.0,57000.0,0
4,15804002,Male,19.0,76000.0,0
...,...,...,...,...,...
395,15691863,Female,46.0,41000.0,1
396,15706071,Male,51.0,23000.0,1
397,15654296,Female,50.0,20000.0,1
398,15755018,Male,36.0,33000.0,0


Separate the input and output data

In [3]:
# input data
x = df[['Age', 'EstimatedSalary']]

# output data
y = df['Purchased']

Feature Scaling

In [4]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(x)

Cross-Validation

In [5]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, random_state=0)

In [6]:
x_train.shape, x_test.shape, y_train.shape, y_test.shape

((300, 2), (100, 2), (300,), (100,))

Import All Classifiers

In [7]:
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

Create Object

In [41]:
nb = GaussianNB()
log = LogisticRegression()
knn = KNeighborsClassifier(n_neighbors=5)
dt = DecisionTreeClassifier(random_state=0)
svm = SVC(kernel='poly')
rf = RandomForestClassifier(random_state=0)

Train the model

In [42]:
nb.fit(x_train, y_train)
log.fit(x_train, y_train)
knn.fit(x_train, y_train)
svm.fit(x_train, y_train)
dt.fit(x_train, y_train)
rf.fit(x_train, y_train)

In [43]:
from sklearn.metrics import ConfusionMatrixDisplay, confusion_matrix
from sklearn.metrics import classification_report, accuracy_score

In [44]:
y_pred_nb = nb.predict(x_test)
y_pred_log = log.predict(x_test)
y_pred_knn = knn.predict(x_test)
y_pred_svm = svm.predict(x_test)
y_pred_dt = dt.predict(x_test)
y_pred_rf = rf.predict(x_test)

In [45]:
print('NB:', '\t', accuracy_score(y_test, y_pred_nb))
print('Log:', '\t', accuracy_score(y_test, y_pred_log))
print('KNN:', '\t', accuracy_score(y_test, y_pred_knn))
print('SVM:', '\t', accuracy_score(y_test, y_pred_svm))
print('DT:', '\t', accuracy_score(y_test, y_pred_dt))
print('RF:', '\t', accuracy_score(y_test, y_pred_rf))

NB: 	 0.9
Log: 	 0.89
KNN: 	 0.93
SVM: 	 0.92
DT: 	 0.9
RF: 	 0.92


##### Stacking Classification

In [46]:
from sklearn.ensemble import StackingClassifier

In [47]:
stack = StackingClassifier(estimators=[('nb', nb),
                                    ('dt', dt),
                                    ('rf', rf),
                                    ('knn', knn)], final_estimator=log)

In [48]:
stack.fit(x_train, y_train)

In [49]:
y_pred = stack.predict(x_test)

In [50]:
accuracy_score(y_test, y_pred)

0.94

In [51]:
print(classification_report(y_test, y_pred))

              precision    recall  f1-score   support

           0       0.96      0.96      0.96        68
           1       0.91      0.91      0.91        32

    accuracy                           0.94       100
   macro avg       0.93      0.93      0.93       100
weighted avg       0.94      0.94      0.94       100



In [52]:
confusion_matrix(y_test, y_pred)

array([[65,  3],
       [ 3, 29]], dtype=int64)