# Different Algorithms

In [20]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import VotingClassifier

from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score
from sklearn.metrics import classification_report
from sklearn.preprocessing import StandardScaler

In [21]:
data = pd.read_csv('data/Social_Network_Ads.csv')
data.drop('User ID',axis=1,inplace=True)
data.head()

Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,Male,19,19000,0
1,Male,35,20000,0
2,Female,26,43000,0
3,Female,27,57000,0
4,Male,19,76000,0


In [22]:
for i in range(data.shape[0]):
    if(data['Gender'][i]=='Male'):
        data['Gender'][i] = 1
    else:
        data['Gender'][i] = 0

data.head()

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Gender'][i] = 1
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  data['Gender'][i] = 0


Unnamed: 0,Gender,Age,EstimatedSalary,Purchased
0,1,19,19000,0
1,1,35,20000,0
2,0,26,43000,0
3,0,27,57000,0
4,1,19,76000,0


In [23]:
X = data.drop('Purchased',axis=1)
y = data['Purchased']
X_train, X_test, y_train, y_test = train_test_split(X,y,test_size = 0.2, random_state = 2) 

In [24]:
scaler = StandardScaler()
X_train2 = scaler.fit_transform(X_train)
X_test2 = scaler.transform(X_test)
X2 = scaler.fit_transform(X)

In [25]:
lr = LogisticRegression()
dt = DecisionTreeClassifier()
knn = KNeighborsClassifier()
gnb = GaussianNB()
svm = SVC(probability=True)

In [26]:
estimator_list = [('lr',lr),('dt',dt),('knn',knn),('gnb',gnb),('svm',svm)]

In [27]:
vt_clf_hard = VotingClassifier(estimators=estimator_list, voting='hard' )
vt_clf_soft = VotingClassifier(estimators=estimator_list, voting='soft' )

In [28]:
model_list = [lr,dt,knn,gnb,svm,vt_clf_hard,vt_clf_soft]

In [29]:
def cross_validate(model,X,y):
    cv_score = cross_val_score(model,X,y,cv=10,scoring='accuracy')
    print('model ::::::   ', model)
    print('cross validated score === ',cv_score.mean()*100)

In [30]:
for model in model_list:
    cross_validate(model,X2,y)

model ::::::    LogisticRegression()
cross validated score ===  82.25
model ::::::    DecisionTreeClassifier()
cross validated score ===  83.99999999999999
model ::::::    KNeighborsClassifier()
cross validated score ===  90.75
model ::::::    GaussianNB()
cross validated score ===  87.5
model ::::::    SVC(probability=True)
cross validated score ===  90.5
model ::::::    VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('dt', DecisionTreeClassifier()),
                             ('knn', KNeighborsClassifier()),
                             ('gnb', GaussianNB()),
                             ('svm', SVC(probability=True))])
cross validated score ===  88.00000000000001
model ::::::    VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('dt', DecisionTreeClassifier()),
                             ('knn', KNeighborsClassifier()),
                             ('gnb', GaussianNB()),
                             (

# Same Algorithm

In [31]:
svm1 = SVC(probability=True,C=1.0 )
svm2 = SVC(probability=True,C=5.0 )
svm3 = SVC(probability=True,C=20.0 )
svm4 = SVC(probability=True,C=50.0 )
svm5 = SVC(probability=True,C=100.0 )

In [32]:
estimator_list2 = [('m1',svm1),('m2',svm2),('m3',svm3),('m4',svm4),('m5',svm5)]

In [33]:
vt_clf_hard2 = VotingClassifier(estimators=estimator_list, voting='hard' )
vt_clf_soft2 = VotingClassifier(estimators=estimator_list, voting='soft' )

In [34]:
model_list2 = [lr,dt,knn,gnb,svm,vt_clf_hard2,vt_clf_soft2]

In [35]:
for model in model_list2:
    cross_validate(model,X2,y)

model ::::::    LogisticRegression()
cross validated score ===  82.25
model ::::::    DecisionTreeClassifier()
cross validated score ===  83.5
model ::::::    KNeighborsClassifier()
cross validated score ===  90.75
model ::::::    GaussianNB()
cross validated score ===  87.5
model ::::::    SVC(probability=True)
cross validated score ===  90.5
model ::::::    VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('dt', DecisionTreeClassifier()),
                             ('knn', KNeighborsClassifier()),
                             ('gnb', GaussianNB()),
                             ('svm', SVC(probability=True))])
cross validated score ===  88.00000000000001
model ::::::    VotingClassifier(estimators=[('lr', LogisticRegression()),
                             ('dt', DecisionTreeClassifier()),
                             ('knn', KNeighborsClassifier()),
                             ('gnb', GaussianNB()),
                             ('svm', SVC(pr

#  Tuning Weights of each algorithm

In [36]:
#for i in range(1,6):
#    for j in range(1,6):
 #       for k in range(1,6):
  #          for l in range(1,6):
   #             for m in range(1,6):
    #                vt_clf = VotingClassifier(estimators=estimator_list, voting='soft',weights=[i,j,k,l,m] ,n_jobs=-1)
     #               cv_score = cross_val_score(vt_clf,X2,y,cv=10,scoring='accuracy')
      #              x = np.mean(cv_score)*100
       #             if x>=88.1:
        #                print('for i={},j={},k={},l={},m={}'.format(i,j,k,l,m),x)