In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier
from sklearn.ensemble import BaggingClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

In [2]:
data = pd.read_csv('data/Social_Network_Ads.csv')
data.drop('User ID',axis=1,inplace=True)
data.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


In [3]:
for i in range(data.shape[0]):
    if(data['Gender'][i]=='Male'):
        data['Gender'][i] = 1
    else:
        data['Gender'][i] = 0

data.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Gender'][i] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Gender'][i] = 0


Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0


In [4]:
X = data.drop('Purchased',axis=1)
y = data['Purchased']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state = 2) 

In [5]:
scaler = StandardScaler()
X_train2 = scaler.fit_transform(X_train)
X_test2 = scaler.transform(X_test)
X2 = scaler.fit_transform(X)

In [6]:
lr = LogisticRegression()
dt = DecisionTreeClassifier()
knn = KNeighborsClassifier()
gnb = GaussianNB()
svm = SVC(probability=True)
rf = RandomForestClassifier()
adab = AdaBoostClassifier()

In [7]:
estimator_list = [('lr',lr),('dt',dt),('knn',knn),('gnb',gnb),('svm',svm)]

In [8]:
vt_clf_hard = VotingClassifier(estimators=estimator_list, voting='hard' )
vt_clf_soft = VotingClassifier(estimators=estimator_list, voting='soft' )

In [9]:
bag = BaggingClassifier(base_estimator=dt,n_estimators=100,max_samples=0.3,bootstrap=True,n_jobs=-1)
past = BaggingClassifier(base_estimator=dt,n_estimators=100,max_samples=1,bootstrap=False,n_jobs=-1)
subspace = BaggingClassifier(base_estimator=dt,n_estimators=100,max_samples=1,bootstrap=False,max_features=0.5,bootstrap_features=True,n_jobs=-1)
patch = BaggingClassifier(base_estimator=dt,n_estimators=100,max_samples=0.3,bootstrap=True,max_features=0.5,bootstrap_features=True,n_jobs=-1)

In [10]:
model_list = [lr,dt,knn,gnb,svm,vt_clf_hard,vt_clf_soft,bag,past,subspace,patch,rf,adab]

In [11]:
def cross_validate(model,X,y):
    cv_score = cross_val_score(model,X,y,cv=10,scoring='accuracy')
    print('model ::::::   ', model)
    print('cross validated score === ',cv_score.mean()*100)

In [12]:
for model in model_list:
    cross_validate(model,X2,y)

model ::::::    LogisticRegression()
cross validated score ===  82.25
model ::::::    DecisionTreeClassifier()
cross validated score ===  83.5
model ::::::    KNeighborsClassifier()
cross validated score ===  90.75
model ::::::    GaussianNB()
cross validated score ===  87.5
model ::::::    SVC(probability=True)
cross validated score ===  90.5
model ::::::    VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('dt', DecisionTreeClassifier()),
                             ('knn', KNeighborsClassifier()),
                             ('gnb', GaussianNB()),
                             ('svm', SVC(probability=True))])
cross validated score ===  88.00000000000001
model ::::::    VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('dt', DecisionTreeClassifier()),
                             ('knn', KNeighborsClassifier()),
                             ('gnb', GaussianNB()),
                             ('svm', SVC(pr