In [18]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report

# AdaBoost and Gradient Boosting

In [19]:
df = pd.read_csv('data/cleaned_data.csv')

In [26]:
def evaluate(model,dt_test,dt_train,target_train,target_test):
    pred_test = model.predict(dt_test)
    pred_train = model.predict(dt_train)
    
    print('Evaluations for test:\n', confusion_matrix(target_test, pred_test))
    print(classification_report(target_test, pred_test))
    print('\n')
    print('Evaluations for train:\n',confusion_matrix(target_train, pred_train))
    print(classification_report(target_train, pred_train))

In [20]:
target = df['churn']
data = df.drop('churn', axis=1)

In [21]:
# Split the data into training and test sets
data_train, data_test, target_train, target_test = train_test_split(data, target, test_size=0.25, random_state=42)

In [22]:
# Instantiate an AdaBoostClassifier
adaboost_clf = AdaBoostClassifier(random_state=42)

# Instantiate an GradientBoostingClassifier
gbt_clf = GradientBoostingClassifier(random_state=42)

In [24]:
adaboost_clf.fit(data_train, target_train)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
                   n_estimators=50, random_state=42)

In [27]:
evaluate(adaboost_clf, data_test, data_train, target_train, target_test)

Evaluations for test:
 [[687  22]
 [ 83  42]]
              precision    recall  f1-score   support

           0       0.89      0.97      0.93       709
           1       0.66      0.34      0.44       125

    accuracy                           0.87       834
   macro avg       0.77      0.65      0.69       834
weighted avg       0.86      0.87      0.86       834



Evaluations for train:
 [[2077   64]
 [ 189  169]]
              precision    recall  f1-score   support

           0       0.92      0.97      0.94      2141
           1       0.73      0.47      0.57       358

    accuracy                           0.90      2499
   macro avg       0.82      0.72      0.76      2499
weighted avg       0.89      0.90      0.89      2499



In [31]:
gbt_clf.fit(data_train, target_train)

GradientBoostingClassifier(criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None, presort='auto',
                           random_state=42, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [32]:
evaluate(gbt_clf, data_test, data_train, target_train, target_test)

Evaluations for test:
 [[703   6]
 [ 37  88]]
              precision    recall  f1-score   support

           0       0.95      0.99      0.97       709
           1       0.94      0.70      0.80       125

    accuracy                           0.95       834
   macro avg       0.94      0.85      0.89       834
weighted avg       0.95      0.95      0.95       834



Evaluations for train:
 [[2138    3]
 [  65  293]]
              precision    recall  f1-score   support

           0       0.97      1.00      0.98      2141
           1       0.99      0.82      0.90       358

    accuracy                           0.97      2499
   macro avg       0.98      0.91      0.94      2499
weighted avg       0.97      0.97      0.97      2499



In [34]:
print('Mean Adaboost Cross-Val Score (k=5):')
print(cross_val_score(adaboost_clf, df, target, cv=5).mean())

Mean Adaboost Cross-Val Score (k=5):
1.0


In [35]:
print('Mean GBT Cross-Val Score (k=5):')
print(cross_val_score(gbt_clf, df, target, cv=5).mean())

Mean GBT Cross-Val Score (k=5):
1.0
