In [1]:
# Import the data
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, balanced_accuracy_score
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder
from sklearn.preprocessing import StandardScaler

In [4]:
import utilities
import importlib
importlib.reload(utilities)
from xgboost import XGBClassifier
from imblearn.over_sampling import SMOTE

In [5]:
df = pd.read_csv('Resources/Customer-Churn-Records.csv')

In [6]:
# Initialize LogisticRegression
xgb_model = XGBClassifier(random_state=1)

In [7]:
scores = utilities.processData(df, xgb_model)

Accuracy Score: 0.856
Balanced accuracy Score: 0.7258558691222086
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.94      0.91      1607
           1       0.68      0.51      0.58       393

    accuracy                           0.86      2000
   macro avg       0.78      0.73      0.75      2000
weighted avg       0.85      0.86      0.85      2000

auc Score:  0.8536238561889697


print(scores)

In [8]:
# Create a parameter grid
param_grid = {
    'n_estimators': [100, 200, 300],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.8, 1.0],
}

In [9]:
from sklearn.model_selection import GridSearchCV

grid_search = GridSearchCV(xgb_model, param_grid, cv=5, scoring='balanced_accuracy', verbose=1)
scores_gscv = utilities.processData(df, grid_search)
print("Best parameters found: ", grid_search.best_params_)

Fitting 5 folds for each of 54 candidates, totalling 270 fits
Accuracy Score: 0.864
Balanced accuracy Score: 0.7298729635453036
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.95      0.92      1607
           1       0.72      0.51      0.60       393

    accuracy                           0.86      2000
   macro avg       0.80      0.73      0.76      2000
weighted avg       0.85      0.86      0.85      2000

auc Score:  0.860001805079875
Best parameters found:  {'learning_rate': 0.2, 'max_depth': 3, 'n_estimators': 300, 'subsample': 1.0}


In [10]:
xgb_model_optimized = XGBClassifier(random_state=1, learning_rate = 0.2, max_depth = 3,n_estimators = 300, subsample = 1)

In [11]:
scores = utilities.processData(df, xgb_model_optimized)

Accuracy Score: 0.864
Balanced accuracy Score: 0.7298729635453036
Classification Report:
               precision    recall  f1-score   support

           0       0.89      0.95      0.92      1607
           1       0.72      0.51      0.60       393

    accuracy                           0.86      2000
   macro avg       0.80      0.73      0.76      2000
weighted avg       0.85      0.86      0.85      2000

auc Score:  0.860001805079875
