# Stacking classification

In [1]:
import pandas as pd
import numpy as np

from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression

bank_data = pd.read_csv('data/bank_data_processed.csv')
bank_data.head()

Unnamed: 0,Age,Income,Family,Education,Mortgage,Securities Account,CD Account,Online,CreditCard
0,34,180,1,3,0,0,0,0,0
1,38,130,4,3,134,0,0,0,0
2,46,193,2,3,0,0,0,0,0
3,38,119,1,2,0,0,1,1,1
4,42,141,3,3,0,1,1,1,0


In [2]:
X = bank_data.drop('CreditCard', axis=1)
Y = bank_data['CreditCard']

x_train, x_hold_out, x_test  = np.split(X, [int(.7*len(X)), int(.9*len(X))])
y_train, y_hold_out, y_test  = np.split(Y, [int(.7*len(Y)), int(.9*len(Y))])

clf1 = KNeighborsClassifier(n_neighbors=10)
clf2 = RandomForestClassifier(n_estimators=50)
clf3 = GaussianNB()

for clf in (clf1, clf2, clf3):
    clf.fit(x_train, y_train)

In [3]:
def get_predictions(x, y):
    pred_result = pd.DataFrame()
    
    i = 1
    for clf in (clf1, clf2, clf3):

        y_pred = clf.predict(x)
        
        print(clf.__class__.__name__, accuracy_score(y, y_pred))
        
        pred_result.insert(i - 1, 'y_pred_' + str(i), y_pred)
        
        i += 1
        
    return pred_result

pred_result = get_predictions(x_hold_out, y_hold_out)

KNeighborsClassifier 0.7083333333333334
RandomForestClassifier 0.8125
GaussianNB 0.8229166666666666


In [4]:
x_stack_train = pred_result
y_stack_train = y_hold_out
y_stack_train.sample(5)

338    0
365    0
372    0
350    0
359    0
Name: CreditCard, dtype: int64

Use a LogisticRegression classifier as a blender

In [5]:
clf_stack = LogisticRegression(solver='lbfgs', C=1, max_iter=200)
clf_stack.fit(x_stack_train, y_stack_train)
pred_result_test = get_predictions(x_test, y_test)
x_stack_test = pred_result_test
y_stack_pred = clf_stack.predict(x_stack_test)
accuracy_score(y_stack_pred, y_test)

KNeighborsClassifier 0.6041666666666666
RandomForestClassifier 0.8333333333333334
GaussianNB 0.8541666666666666


0.8541666666666666