##### Dataset

In [3]:
# Social_Network_Ads.csv
# location: https://mitu.co.in/dataset

In [4]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sn

In [5]:
df = pd.read_csv('datasets/Social_Network_Ads.csv')
df

Unnamed: 0,User ID,Gender,Age,EstimatedSalary,Purchased
0,15624510,Male,19.0,19000.0,0
1,15810944,Male,35.0,20000.0,0
2,15668575,Female,26.0,43000.0,0
3,15603246,Female,27.0,57000.0,0
4,15804002,Male,19.0,76000.0,0
...,...,...,...,...,...
395,15691863,Female,46.0,41000.0,1
396,15706071,Male,51.0,23000.0,1
397,15654296,Female,50.0,20000.0,1
398,15755018,Male,36.0,33000.0,0


##### Separate input , output

In [6]:
x = df[['Age', 'EstimatedSalary']]
y = df['Purchased']

##### Scale the data

In [7]:
from sklearn.preprocessing import MinMaxScaler
scaler = MinMaxScaler()
x_scaled = scaler.fit_transform(x)

##### Cross Validation

In [8]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x_scaled, y, random_state=0)

##### Build all the models

In [9]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB

##### Make object of all Classes

In [10]:
clf_log = LogisticRegression()
clf_dt = DecisionTreeClassifier(random_state=0)
clf_rf = RandomForestClassifier(random_state=0, n_estimators = 10)
clf_knn = KNeighborsClassifier(n_neighbors=5)
clf_svm = SVC(random_state=0, kernel='rbf')
clf_nb = GaussianNB()

##### Train all models

In [38]:
clf_log.fit(x_train, y_train)
clf_dt.fit(x_train, y_train)
clf_rf.fit(x_train, y_train)
clf_knn.fit(x_train, y_train)
clf_svm.fit(x_train, y_train)
clf_nb.fit(x_train, y_train);

##### Performance Evaluation of all models

In [12]:
y_pred_log = clf_log.predict(x_test)
y_pred_dt = clf_dt.predict(x_test)
y_pred_rf = clf_rf.predict(x_test)
y_pred_knn = clf_knn.predict(x_test)
y_pred_svm = clf_svm.predict(x_test)
y_pred_nb = clf_nb.predict(x_test)

##### Accuracy of all models

In [13]:
from sklearn.metrics import confusion_matrix, classification_report
from sklearn.metrics import accuracy_score, ConfusionMatrixDisplay

In [14]:
print('Log:', accuracy_score(y_test, y_pred_log))
print('Dec Tree:', accuracy_score(y_test, y_pred_dt))
print('RF:', accuracy_score(y_test, y_pred_rf))
print('KNN:', accuracy_score(y_test, y_pred_knn))
print('SVM:', accuracy_score(y_test, y_pred_svm))
print('Naive Bayes:', accuracy_score(y_test, y_pred_nb))

Log: 0.89
Dec Tree: 0.9
RF: 0.93
KNN: 0.93
SVM: 0.93
Naive Bayes: 0.9


##### Create voting classifier

In [15]:
from sklearn.ensemble import VotingClassifier

In [16]:
vt = VotingClassifier(estimators= [ ('log', clf_log),
                                   ('dt', clf_dt),
                                   ('rf', clf_rf),
                                   ('knn', clf_knn),
                                   ('svm', clf_svm),
                                   ('nb', clf_nb)])

In [17]:
vt.fit(x_train, y_train)

In [18]:
y_pred_vt = vt.predict(x_test)

In [19]:
accuracy_score(y_test, y_pred_vt)

0.94

In [20]:
print(classification_report(y_test, y_pred_vt))

              precision    recall  f1-score   support

           0       0.96      0.96      0.96        68
           1       0.91      0.91      0.91        32

    accuracy                           0.94       100
   macro avg       0.93      0.93      0.93       100
weighted avg       0.94      0.94      0.94       100



In [21]:
# accuracy increased 

##### Apply Stacking

In [22]:
from sklearn.ensemble import StackingClassifier

In [30]:
stack = StackingClassifier(estimators= [('dt', clf_dt),
                                   ('rf', clf_rf),
                                   ('knn', clf_knn),
                                   ('log', clf_log),
                                   ('nb', clf_nb)],
                           final_estimator= clf_svm)
                           
                          

In [31]:
stack.fit(x_train, y_train)

In [34]:
y_pred_stack = stack.predict(x_test)

In [35]:
accuracy_score(y_test, y_pred_stack)

0.93

In [36]:
# highest accuracy is preserved