### Import Tools

In [1]:
import pickle
import pandas as pd

from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.metrics import f1_score

from yellowbrick.classifier import ConfusionMatrix
from sklearn.metrics import classification_report



### Import Train Data

In [2]:
X_train = pd.read_csv('../data/X_train_trans.csv', index_col=0)

pickle_in = open('../data/y_train.pickle', 'rb')
y_train = pickle.load(pickle_in)

### Import Test Data

In [3]:
X_test = pd.read_csv('../data/X_test_trans.csv', index_col=0)
pickle_in = open('../data/y_test.pickle', 'rb')
y_test = pickle.load(pickle_in)

### Fit Default Gradient Boosting Classifier 

In [4]:
from sklearn.ensemble import GradientBoostingClassifier

In [5]:
gbc = GradientBoostingClassifier()

In [6]:
gbc.fit(X_train,y_train)

GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=None, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [7]:
y_pred_gbc = gbc.predict(X_test)
y_probs_gbc = gbc.predict_proba(X_test)

In [8]:
print ('accuracy score:', gbc.score(X_test, y_test))
print ('recall:', recall_score(y_test, y_pred_gbc))
print ('precision:', precision_score(y_test, y_pred_gbc))
print ('f1:', f1_score(y_test, y_pred_gbc))

accuracy score: 0.98
recall: 0.8896551724137931
precision: 0.9699248120300752
f1: 0.9280575539568345


In [None]:
cm_gbc = ConfusionMatrix(gbc, classes = ['Maintain', 'Churn'])

cm_gbc.score(X_test, y_test)

cm_gbc.poof()

#specify the target classes
classes = ["Maintain", "Churn"]


print(classification_report(y_test, y_pred_gbc))


### Fit KNN Model (n_neighbors = 7)

In [None]:
from sklearn.neighbors import KNeighborsClassifier

In [None]:
knn = KNeighborsClassifier(n_neighbors=7)
knn.fit(X_train, y_train)

In [None]:
y_pred_knn = knn.predict(X_test)
y_probs_knn = knn.predict_proba(X_test)

In [None]:
print ('accuracy score:', knn.score(X_test, y_test))
print ('recall:', recall_score(y_test, y_pred_knn))
print ('precision:', precision_score(y_test, y_pred_knn))
print ('f1:', f1_score(y_test, y_pred_knn))

In [None]:
cm_knn = ConfusionMatrix(knn, classes = ['Maintain', 'Churn'])

cm_knn.score(X_test, y_test)

cm_knn.poof()

classes = ["Maintain", "Churn"]


print(classification_report(y_test, y_pred_knn))


### Fit Default Random Forest

In [None]:
from sklearn.ensemble import RandomForestClassifier

In [None]:
clf = RandomForestClassifier()

In [None]:
clf.fit(X_train, y_train)

In [None]:
y_pred_clf = clf.predict(X_test)
y_probs_clf = clf.predict_proba(X_test)

In [None]:
print ('accuracy score:', clf.score(X_test, y_test))
print ('recall:', recall_score(y_test, y_pred_clf))
print ('precision:', precision_score(y_test, y_pred_clf))
print ('f1:', f1_score(y_test, y_pred_clf))

In [None]:
cm_clf = ConfusionMatrix(clf, classes = ['Maintain', 'Churn'])

cm_clf.score(X_test, y_test)

cm_clf.poof()

#specify the target classes
classes = ["Maintain", "Churn"]


print(classification_report(y_test, y_pred_clf))


### Fit Decision Tree (max_depth = 2)

In [None]:
from sklearn.tree import DecisionTreeClassifier

In [None]:
dtc = DecisionTreeClassifier(max_depth = 2)

In [None]:
dtc.fit(X_train, y_train)

In [None]:
y_pred_dtc = dtc.predict(X_test)
y_probs_dtc = dtc.predict_proba(X_test)


In [None]:
print ('accuracy score:', dtc.score(X_test, y_test))
print ('recall:', recall_score(y_test, y_pred_dtc))
print ('precision:', precision_score(y_test, y_pred_dtc))
print ('f1:', f1_score(y_test, y_pred_dtc))


In [None]:
cm_dtc = ConfusionMatrix(dtc, classes = ['Maintain', 'Churn'])

cm_dtc.score(X_test, y_test)

cm_dtc.poof()

#specify the target classes
classes = ["Maintain", "Churn"]


print(classification_report(y_test, y_pred_dtc))

### Fit Logistic Model (solver = 'lbfgs', Cs=50, penalty='l2', random_state=3, n_jobs=-1)

In [None]:
from sklearn.linear_model import LogisticRegression, LogisticRegressionCV

In [None]:
lr= LogisticRegressionCV(solver = 'lbfgs', Cs=50, penalty='l2', random_state=3, n_jobs=-1)

In [None]:
lr.fit(X_train, y_train)

In [None]:
y_pred_lr = lr.predict(X_test)
y_probs_lr = lr.predict_proba(X_test)

In [None]:
print ('accuracy score:', lr.score(X_test, y_test))
print ('recall:', recall_score(y_test, y_pred_lr))
print ('precision:', precision_score(y_test, y_pred_lr))
print ('f1:', f1_score(y_test, y_pred_lr))

In [None]:
cm = ConfusionMatrix(lr, classes = ['Maintain', 'Churn'])

cm.score(X_test, y_test)

cm.poof()

#specify the target classes
classes = ["Maintain", "Churn"]


print(classification_report(y_test, y_pred_lr))

In [None]:
from sklearn.metrics import roc_curve
import warnings
import seaborn as sns
import itertools
import matplotlib.pyplot as plt
%matplotlib inline

In [None]:
fpr_gbc, tpr_gbc, thresholds_gbc = roc_curve(y_test, y_probs_gbc[:,1])
fpr_clf, tpr_clf, thresholds_clf = roc_curve(y_test, y_probs_clf[:,1])
fpr_knn, tpr_knn, thresholds_knn = roc_curve(y_test, y_probs_knn[:,1])
fpr_dtc, tpr_dtc, thresholds_dtc = roc_curve(y_test, y_probs_dtc[:,1])
fpr_lr, tpr_lr, thresholds_lr = roc_curve(y_test, y_probs_lr[:,1])

In [None]:
def plot_roc_curve(fpr, tpr, label = None):
    plt.plot(fpr, tpr, linewidth =2 , label = label)
    plt.plot([0,1], [0,1], 'k--')
    plt.axis([0, 1, 0, 1])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    
plot_roc_curve(fpr_gbc, tpr_gbc, label = 'GBoost')
plot_roc_curve(fpr_clf, tpr_clf, label = 'Forest')
plot_roc_curve(fpr_knn, tpr_knn, label = 'KNN')
plot_roc_curve(fpr_dtc, tpr_dtc, label = 'DTree')
plot_roc_curve(fpr_lr, tpr_lr, label = 'LogReg')

plt.legend()
plt.show()