In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, f1_score, confusion_matrix, classification_report
import imblearn
from imblearn import under_sampling, over_sampling
from imblearn.over_sampling import SMOTE



# AdaBoost and Gradient Boosting

In [2]:
df = pd.read_csv('data/cleaned_data.csv')

In [3]:
def evaluate(model,dt_test,dt_train,target_train,target_test):
    pred_test = model.predict(dt_test)
    pred_train = model.predict(dt_train)
    
    print('Evaluations for test:\n', confusion_matrix(target_test, pred_test))
    print(classification_report(target_test, pred_test))
    print('\n')
    print('Evaluations for train:\n',confusion_matrix(target_train, pred_train))
    print(classification_report(target_train, pred_train))

In [4]:
target = df['churn']
data = df.drop('churn', axis=1)

In [5]:
# Split the data into training and test sets
data_train, data_test, target_train, target_test = train_test_split(data, target, test_size=0.25, random_state=42)

In [6]:
data_train_smoted, target_train_smoted = SMOTE(random_state=42).fit_resample(data_train, target_train)
data_train = data_train_smoted
target_train = target_train_smoted



In [7]:
# Instantiate an AdaBoostClassifier
adaboost_clf = AdaBoostClassifier(random_state=42)

# Instantiate an GradientBoostingClassifier
gbt_clf = GradientBoostingClassifier(random_state=42)

In [8]:
adaboost_clf.fit(data_train, target_train)

AdaBoostClassifier(algorithm='SAMME.R', base_estimator=None, learning_rate=1.0,
                   n_estimators=50, random_state=42)

In [9]:
evaluate(adaboost_clf, data_test, data_train, target_train, target_test)

Evaluations for test:
 [[659  50]
 [ 49  76]]
              precision    recall  f1-score   support

           0       0.93      0.93      0.93       709
           1       0.60      0.61      0.61       125

    accuracy                           0.88       834
   macro avg       0.77      0.77      0.77       834
weighted avg       0.88      0.88      0.88       834



Evaluations for train:
 [[1994  147]
 [ 235 1906]]
              precision    recall  f1-score   support

           0       0.89      0.93      0.91      2141
           1       0.93      0.89      0.91      2141

    accuracy                           0.91      4282
   macro avg       0.91      0.91      0.91      4282
weighted avg       0.91      0.91      0.91      4282



In [10]:
gbt_clf.fit(data_train, target_train)

GradientBoostingClassifier(ccp_alpha=0.0, criterion='friedman_mse', init=None,
                           learning_rate=0.1, loss='deviance', max_depth=3,
                           max_features=None, max_leaf_nodes=None,
                           min_impurity_decrease=0.0, min_impurity_split=None,
                           min_samples_leaf=1, min_samples_split=2,
                           min_weight_fraction_leaf=0.0, n_estimators=100,
                           n_iter_no_change=None, presort='deprecated',
                           random_state=42, subsample=1.0, tol=0.0001,
                           validation_fraction=0.1, verbose=0,
                           warm_start=False)

In [11]:
evaluate(gbt_clf, data_test, data_train, target_train, target_test)

Evaluations for test:
 [[692  17]
 [ 29  96]]
              precision    recall  f1-score   support

           0       0.96      0.98      0.97       709
           1       0.85      0.77      0.81       125

    accuracy                           0.94       834
   macro avg       0.90      0.87      0.89       834
weighted avg       0.94      0.94      0.94       834



Evaluations for train:
 [[2113   28]
 [ 107 2034]]
              precision    recall  f1-score   support

           0       0.95      0.99      0.97      2141
           1       0.99      0.95      0.97      2141

    accuracy                           0.97      4282
   macro avg       0.97      0.97      0.97      4282
weighted avg       0.97      0.97      0.97      4282

