# Set up

In [34]:
import numpy as np
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()

from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn.tree import DecisionTreeClassifier
from imblearn.over_sampling import SMOTE
from sklearn.ensemble import AdaBoostClassifier

from sklearn.metrics import roc_auc_score

import pickle

In [4]:
train = pd.read_csv('../lipika/cleaned_2013_14')
test = pd.read_csv('../lipika/cleaned_2015')

# Run model

In [29]:
def split_data(df, cols):
    x = df.drop(cols, axis = 1)
    y = df.paid
    return x, y

cols_to_drop_training = ['loan_status', 'paid', 'amnt', 'total_pymnt', 'term_adj']
x_train_initial, y_train_initial = split_data(train, cols_to_drop_training)

# drop NAs from test
test_noNAs = test.dropna()
x_test, y_test = split_data(test_noNAs, cols_to_drop_training)

In [11]:
sm = SMOTE(random_state=1, ratio = 1.0)
x_train, y_train = sm.fit_sample(x_train_initial, y_train_initial)

In [20]:
def adaboost_model(x, y, dtree_depth=3, n_est=50, lr=0.1):
    model = make_pipeline(
    PolynomialFeatures(degree=2, include_bias=False),
    AdaBoostClassifier(DecisionTreeClassifier(max_depth=dtree_depth), n_estimators=n_est, learning_rate=lr))
    model.fit(x, y)
    return model

In [21]:
ada1 = adaboost_model(x_train, y_train)

In [31]:
ada1_class_acc_train = ada1.score(x_train, y_train)
ada1_class_acc_test = ada1.score(x_test, y_test)
ada1_auc_train = roc_auc_score(y_train, ada1.predict(x_train))
ada1_auc_test = roc_auc_score(y_test, ada1.predict(x_test))

In [32]:
print('Classification accuracy rate (train):', ada1_class_acc_train)
print('Classification accuracy rate (test):', ada1_class_acc_test)
print('AUC (train):', ada1_auc_train)
print('AUC (test):', ada1_auc_test)

Classification accuracy rate (train): 0.7942669319263039
Classification accuracy rate (test): 0.6575107469373751
AUC (train): 0.7942669319263038
AUC (test): 0.6092821309181438


In [35]:
# Dump the trained Adaboost model with Pickle
# Open the file to save as pkl file
adaboost_pkl = open('Adaboost1.pkl', 'wb')
pickle.dump(ada1, adaboost_pkl)
# Close the pickle instances
adaboost_pkl.close()