## Voting Classifier

In [1]:
import pandas as pd
credit = pd.read_csv("CreditCardDefault.csv")
credit.drop(["ID"], axis=1, inplace=True)

In [2]:
X = credit.iloc[:, 0:23]
y = credit.iloc[:, -1]

In [3]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size= 0.3, stratify = y)

In [4]:
from sklearn import preprocessing
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

### Voting

https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.VotingClassifier.html

In [5]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC

In [6]:
model1 = LogisticRegression()
model2 = DecisionTreeClassifier()
model3 = SVC()

In [7]:
model1.fit(X_train, y_train)
model2.fit(X_train, y_train)
model3.fit(X_train, y_train);

In [8]:
model1_pred = model1.predict(X_test)
model2_pred = model2.predict(X_test)
model3_pred = model3.predict(X_test)

In [9]:
from sklearn.metrics import accuracy_score

In [10]:
model1.__class__.__name__

'LogisticRegression'

In [11]:
print(model1.__class__.__name__ , accuracy_score(y_test, model1_pred))
print(model2.__class__.__name__ , accuracy_score(y_test, model2_pred))
print(model3.__class__.__name__ , accuracy_score(y_test, model3_pred))

LogisticRegression 0.8141111111111111
DecisionTreeClassifier 0.7245555555555555
SVC 0.8221111111111111


### Hard

In [12]:
from sklearn.ensemble import VotingClassifier

In [13]:
estimators = [('lr', model1), ('dt', model2) , ('svm', model3) ] 
voting_hard = VotingClassifier(estimators, voting="hard")

In [14]:
voting_hard.fit(X_train, y_train)

VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='auto',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=None,
                                                 solver='lbfgs', tol=0.0001,
                                                 verbose=0, warm_start=False)),
                             ('dt',
                              DecisionTreeClassifier(ccp_alpha=0.0,
                                                     class_weight=None,
                                                     criterion='gini',...
                                        

In [15]:
voting_hard_pred = voting_hard.predict(X_test)

In [16]:
accuracy_score(y_test, voting_hard_pred)

0.8201111111111111

### Soft

In [17]:
estimators = [('lr', model1), ('dt', model2) , ('svm', model3) ] 
voting_soft = VotingClassifier(estimators, voting="soft")

In [18]:
voting_soft.fit(X_train, y_train)

VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='auto',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=None,
                                                 solver='lbfgs', tol=0.0001,
                                                 verbose=0, warm_start=False)),
                             ('dt',
                              DecisionTreeClassifier(ccp_alpha=0.0,
                                                     class_weight=None,
                                                     criterion='gini',...
                                        

In [19]:
voting_soft_pred = voting_soft.predict(X_test)

AttributeError: predict_proba is not available when  probability=False

In [20]:
model4 = SVC(probability=True) 
estimators = [('lr', model1), ('dt', model2) , ('svm', model4) ] 
voting_soft = VotingClassifier(estimators, voting="soft")

In [21]:
voting_soft.fit(X_train, y_train)

VotingClassifier(estimators=[('lr',
                              LogisticRegression(C=1.0, class_weight=None,
                                                 dual=False, fit_intercept=True,
                                                 intercept_scaling=1,
                                                 l1_ratio=None, max_iter=100,
                                                 multi_class='auto',
                                                 n_jobs=None, penalty='l2',
                                                 random_state=None,
                                                 solver='lbfgs', tol=0.0001,
                                                 verbose=0, warm_start=False)),
                             ('dt',
                              DecisionTreeClassifier(ccp_alpha=0.0,
                                                     class_weight=None,
                                                     criterion='gini',...
                                        

In [23]:
voting_soft_pred = voting_soft.predict(X_test)

In [24]:
accuracy_score(y_test, voting_soft_pred)

0.8118888888888889