## Import Common Package

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import train_test_split, GridSearchCV, StratifiedKFold, RandomizedSearchCV
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import RobustScaler, PowerTransformer, PolynomialFeatures, OneHotEncoder
from sklearn.compose import ColumnTransformer

from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from xgboost import XGBClassifier, XGBRegressor

from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

import warnings
warnings.filterwarnings('ignore')

### Import Data

In [2]:
df = pd.read_csv('Churn_Modelling.csv')
pd.options.display.max_columns = 999
pd.set_option('display.float_format', lambda x: '%.2f' % x) #Change Decimal
df.head()

Unnamed: 0,RowNumber,CustomerId,Surname,CreditScore,Geography,Gender,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited
0,1,15634602,Hargrave,619,France,Female,42,2,0.0,1,1,1,101348.88,1
1,2,15647311,Hill,608,Spain,Female,41,1,83807.86,1,0,1,112542.58,0
2,3,15619304,Onio,502,France,Female,42,8,159660.8,3,1,0,113931.57,1
3,4,15701354,Boni,699,France,Female,39,1,0.0,2,0,0,93826.63,0
4,5,15737888,Mitchell,850,Spain,Female,43,2,125510.82,1,1,1,79084.1,0


### Data Preparation

In [3]:
df_1 = df.copy()

In [4]:
df_1.isna().sum()

RowNumber          0
CustomerId         0
Surname            0
CreditScore        0
Geography          0
Gender             0
Age                0
Tenure             0
Balance            0
NumOfProducts      0
HasCrCard          0
IsActiveMember     0
EstimatedSalary    0
Exited             0
dtype: int64

In [5]:
# Remove RowNumber and Surname and CustomerId
df_1.drop(columns = ['RowNumber','Surname', 'CustomerId'], inplace = True)

In [6]:
### One Hot Encoding Column fuel, seller_type, transmission (don't have levelling)
df_1 =  pd.get_dummies(data=df_1, columns=['Geography', 'Gender'])

In [7]:
df_1.head()

Unnamed: 0,CreditScore,Age,Tenure,Balance,NumOfProducts,HasCrCard,IsActiveMember,EstimatedSalary,Exited,Geography_France,Geography_Germany,Geography_Spain,Gender_Female,Gender_Male
0,619,42,2,0.0,1,1,1,101348.88,1,1,0,0,1,0
1,608,41,1,83807.86,1,0,1,112542.58,0,0,0,1,1,0
2,502,42,8,159660.8,3,1,0,113931.57,1,1,0,0,1,0
3,699,39,1,0.0,2,0,0,93826.63,0,1,0,0,1,0
4,850,43,2,125510.82,1,1,1,79084.1,0,0,0,1,1,0


### Cek Imbalance Data

In [8]:
df_1['Exited'].value_counts()

0    7963
1    2037
Name: Exited, dtype: int64

In [9]:
pd.crosstab(index=df_1['Exited'], columns='count', normalize=True)*100

col_0,count
Exited,Unnamed: 1_level_1
0,79.63
1,20.37


### Splitting Data

In [10]:
X = df_1.drop(columns='Exited')
y = df_1['Exited']

In [11]:
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify = y, test_size = .20, random_state = 42)

Dataset is imbalance

0: Not-Exited
1: Exited

Because this dataset is about Bank Churn, we will focus to get the smallest False Negative and get the best score of recall 1

The cost of acquiring new customers is estimated at five times the rate of retaining existing ones\
    - Source : https://www.fpsc.com/the_cost_of_customer_churn.pdf

## Machine Learning Modelling

## 1. Random Forest

### 1a. Random Forest - Base Algorithm

In [26]:
RF_Base = RandomForestClassifier()

In [27]:
RF_Base.fit(X_train, y_train)

RandomForestClassifier()

In [28]:
y_pred_RFBase = RF_Base.predict(X_test)

In [29]:
print(classification_report(y_test, y_pred_RFBase))

              precision    recall  f1-score   support

           0       0.87      0.96      0.92      1593
           1       0.76      0.45      0.57       407

    accuracy                           0.86      2000
   macro avg       0.82      0.71      0.74      2000
weighted avg       0.85      0.86      0.85      2000



In [30]:
cm_RF_Base = confusion_matrix(y_test, y_pred_RFBase , labels=[1,0])

In [31]:
df_RF_Base = pd.DataFrame(cm_RF_Base, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_RF_Base

Unnamed: 0,Pred 1,Pred 0
Akt 1,185,222
Akt 0,58,1535


### 1b. Random Forest - Class Weight Tuning

In [50]:
RF_1 = RandomForestClassifier(class_weight={0 : .1 , 1 : .9})

In [45]:
RF_1.fit(X_train, y_train)

RandomForestClassifier(class_weight={0: 0.2, 1: 0.8})

In [46]:
y_pred_RF1 = RF_1.predict(X_test)

In [47]:
print(classification_report(y_test, y_pred_RF1))

              precision    recall  f1-score   support

           0       0.87      0.97      0.91      1593
           1       0.76      0.42      0.54       407

    accuracy                           0.85      2000
   macro avg       0.81      0.69      0.73      2000
weighted avg       0.85      0.85      0.84      2000



In [48]:
cm_RF_1 = confusion_matrix(y_test, y_pred_RF1 , labels=[1,0])

In [49]:
df_RF_1 = pd.DataFrame(cm_RF_1, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_RF_1

Unnamed: 0,Pred 1,Pred 0
Akt 1,171,236
Akt 0,54,1539


### 1c. Random Forest - Hyper Parameter Tuning

In [12]:
param_RF = {
    "n_estimators" : np.arange(100, 10000, 100),
    "max_depth" : [None, 5, 10, 25, 30, 40, 50, 60, 80, 95, 100],
    "min_samples_leaf" :np.arange(1,101),
    "max_features" : ['auto', 0.3, 0.5, 0.8],
    "class_weight" : [{0:x, 1: 1 - x} for x in [.1, .20, .35, .40]]
}

In [53]:
RF_rand = RandomForestClassifier(random_state=42)

In [54]:
skf = StratifiedKFold(n_splits=3, random_state=42)

In [55]:
RF_RS= RandomizedSearchCV(RF_rand, param_RF, cv=skf, n_iter=50, n_jobs=-1, verbose=1, random_state=42, scoring='recall')

In [56]:
RF_RS.fit(X_train, y_train)

Fitting 3 folds for each of 50 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:  5.5min
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed: 19.7min finished


RandomizedSearchCV(cv=StratifiedKFold(n_splits=3, random_state=42, shuffle=False),
                   estimator=RandomForestClassifier(random_state=42), n_iter=50,
                   n_jobs=-1,
                   param_distributions={'class_weight': [{0: 0.1, 1: 0.9},
                                                         {0: 0.2, 1: 0.8},
                                                         {0: 0.35, 1: 0.65},
                                                         {0: 0.4, 1: 0.6}],
                                        'max_depth': [None, 5, 10, 25, 30, 40,
                                                      50, 60, 80, 95, 100],
                                        'max_features': ['auto', 0.3, 0.5, 0.8],
                                        'min_samples_leaf':...
       3400, 3500, 3600, 3700, 3800, 3900, 4000, 4100, 4200, 4300, 4400,
       4500, 4600, 4700, 4800, 4900, 5000, 5100, 5200, 5300, 5400, 5500,
       5600, 5700, 5800, 5900, 6000, 6100, 6200, 6300, 640

In [57]:
RF_RS.best_params_

{'n_estimators': 600,
 'min_samples_leaf': 16,
 'max_features': 'auto',
 'max_depth': 5,
 'class_weight': {0: 0.1, 1: 0.9}}

In [58]:
Model_RF_Tuned = RF_RS.best_estimator_

In [62]:
y_pred_RF_Tuned = Model_RF_Tuned.predict(X_test)

In [64]:
print(classification_report(y_test, y_pred_RF_Tuned))

              precision    recall  f1-score   support

           0       0.97      0.40      0.56      1593
           1       0.29      0.95      0.44       407

    accuracy                           0.51      2000
   macro avg       0.63      0.67      0.50      2000
weighted avg       0.83      0.51      0.54      2000



In [65]:
cm_RF_Tuned = confusion_matrix(y_test, y_pred_RF_Tuned, labels=[1,0])

In [66]:
df_RF_Tuned = pd.DataFrame(cm_RF_Tuned, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_RF_Tuned

Unnamed: 0,Pred 1,Pred 0
Akt 1,386,21
Akt 0,959,634


### 1d. Random Forest - Hyper Parameter Tuning (FINE TUNING)

In [27]:
param_RF2 = {
    "n_estimators" : [700, 750, 800],
    "max_depth" : [3,4],
    "min_samples_leaf" :[20, 25,30],
#     "max_features" : ['auto', 0.2, 0.1, 0.3],
    "class_weight" : [{0:x, 1: 1 - x} for x in [.15, .20]]
}

In [29]:
RF_rand2 = RandomForestClassifier()

In [30]:
skf = StratifiedKFold(n_splits=3, random_state=42)

In [31]:
RF_GS1 = GridSearchCV(RF_rand2, param_RF2, cv=skf, n_jobs=-1, verbose=1, scoring='recall')

In [32]:
RF_GS1.fit(X_train, y_train)

Fitting 3 folds for each of 36 candidates, totalling 108 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   25.6s
[Parallel(n_jobs=-1)]: Done 108 out of 108 | elapsed:  1.2min finished


GridSearchCV(cv=StratifiedKFold(n_splits=3, random_state=42, shuffle=False),
             estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'class_weight': [{0: 0.15, 1: 0.85}, {0: 0.2, 1: 0.8}],
                         'max_depth': [3, 4], 'min_samples_leaf': [20, 25, 30],
                         'n_estimators': [700, 750, 800]},
             scoring='recall', verbose=1)

In [33]:
RF_GS1.best_params_

{'class_weight': {0: 0.15, 1: 0.85},
 'max_depth': 3,
 'min_samples_leaf': 20,
 'n_estimators': 800}

In [34]:
Model_GS1_Tuned = RF_GS1.best_estimator_

In [35]:
y_pred_RF2_Tuned = Model_GS1_Tuned.predict(X_test)

In [36]:
print(classification_report(y_test, y_pred_RF2_Tuned))

              precision    recall  f1-score   support

           0       0.95      0.55      0.70      1593
           1       0.34      0.89      0.49       407

    accuracy                           0.62      2000
   macro avg       0.64      0.72      0.59      2000
weighted avg       0.83      0.62      0.65      2000



In [37]:
cm_RF2_Tuned = confusion_matrix(y_test, y_pred_RF2_Tuned, labels=[1,0])

In [38]:
df_RF2_Tuned = pd.DataFrame(cm_RF2_Tuned, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_RF2_Tuned

Unnamed: 0,Pred 1,Pred 0
Akt 1,362,45
Akt 0,718,875


### 1e. Random Forest - Hyper Parameter Tuning (FINE TUNING2)

In [107]:
param_RF3 = {
    "n_estimators" : [800],
    "max_depth" : [3],
    "min_samples_leaf" :[20],
#     "max_features" : [0.3],
    "class_weight" : [{0:x, 1: 1 - x} for x in [.2, .25, .3, .4, .45, .5]]
}

In [108]:
RF_rand3 = RandomForestClassifier()

In [109]:
skf = StratifiedKFold(n_splits=3, random_state=42)

In [110]:
RF_GS2 = GridSearchCV(RF_rand3, param_RF3, cv=skf, n_jobs=-1, verbose=1, scoring='recall')

In [111]:
RF_GS2.fit(X_train, y_train)

Fitting 3 folds for each of 6 candidates, totalling 18 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  18 out of  18 | elapsed:   10.9s finished


GridSearchCV(cv=StratifiedKFold(n_splits=3, random_state=42, shuffle=False),
             estimator=RandomForestClassifier(), n_jobs=-1,
             param_grid={'class_weight': [{0: 0.2, 1: 0.8}, {0: 0.25, 1: 0.75},
                                          {0: 0.3, 1: 0.7}, {0: 0.4, 1: 0.6},
                                          {0: 0.45, 1: 0.55},
                                          {0: 0.5, 1: 0.5}],
                         'max_depth': [3], 'min_samples_leaf': [20],
                         'n_estimators': [800]},
             scoring='recall', verbose=1)

In [112]:
RF_GS2.best_params_

{'class_weight': {0: 0.2, 1: 0.8},
 'max_depth': 3,
 'min_samples_leaf': 20,
 'n_estimators': 800}

In [113]:
Model_GS2_Tuned = RF_GS2.best_estimator_

In [114]:
y_pred_RF3_Tuned = Model_GS2_Tuned.predict(X_test)

In [115]:
print(classification_report(y_test, y_pred_RF3_Tuned))

              precision    recall  f1-score   support

           0       0.93      0.77      0.84      1593
           1       0.46      0.76      0.58       407

    accuracy                           0.77      2000
   macro avg       0.70      0.77      0.71      2000
weighted avg       0.83      0.77      0.79      2000



In [116]:
cm_RF3_Tuned = confusion_matrix(y_test, y_pred_RF3_Tuned, labels=[1,0])

In [117]:
df_RF3_Tuned = pd.DataFrame(cm_RF3_Tuned, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_RF3_Tuned

Unnamed: 0,Pred 1,Pred 0
Akt 1,310,97
Akt 0,359,1234


===============================================================================================================

## 2. Decison Tree

### 2a. Decison Tree - Base Algorithm

In [31]:
DT_Base = DecisionTreeClassifier()

In [32]:
DT_Base.fit(X_train, y_train)

DecisionTreeClassifier()

In [33]:
y_pred_DTBase = DT_Base.predict(X_test)

In [34]:
print(classification_report(y_test, y_pred_DTBase))

              precision    recall  f1-score   support

           0       0.87      0.85      0.86      1593
           1       0.48      0.52      0.50       407

    accuracy                           0.79      2000
   macro avg       0.68      0.69      0.68      2000
weighted avg       0.79      0.79      0.79      2000



In [35]:
cm_DT_Base = confusion_matrix(y_test, y_pred_DTBase , labels=[1,0])

In [36]:
df_DT_Base = pd.DataFrame(cm_DT_Base, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_DT_Base

Unnamed: 0,Pred 1,Pred 0
Akt 1,212,195
Akt 0,231,1362


### 2b. Decision Tree - Class Weight Tuning

In [37]:
DT_1 = DecisionTreeClassifier(class_weight={0 : .1 , 1 : .9})

In [38]:
DT_1.fit(X_train, y_train)

DecisionTreeClassifier(class_weight={0: 0.1, 1: 0.9})

In [39]:
y_pred_DT1 = DT_1.predict(X_test)

In [40]:
print(classification_report(y_test, y_pred_DT1))

              precision    recall  f1-score   support

           0       0.87      0.89      0.88      1593
           1       0.51      0.48      0.49       407

    accuracy                           0.80      2000
   macro avg       0.69      0.68      0.69      2000
weighted avg       0.80      0.80      0.80      2000



In [41]:
cm_DT_1 = confusion_matrix(y_test, y_pred_DT1 , labels=[1,0])

In [42]:
df_DT_1 = pd.DataFrame(cm_DT_1, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_DT_1

Unnamed: 0,Pred 1,Pred 0
Akt 1,194,213
Akt 0,183,1410


### 2c. Decision Tree - Hyper Parameter Tuning

In [468]:
param_DT = {
#     "n_estimators" : np.arange(100, 10000, 100),
     "max_depth" : [None, 5, 10, 25, 30, 40, 50, 60, 80, 95, 100],
    "min_samples_leaf" :np.arange(1,101),
    "max_features" : ['auto', 0.3, 0.5, 0.8],
    "class_weight" : [{0:x, 1: 1 - x} for x in [.1, .20, .35, .40]]
}

In [469]:
DT_rand = DecisionTreeClassifier(random_state=42)

In [470]:
skf = StratifiedKFold(n_splits=3, random_state=42)

In [471]:
DT_RS= RandomizedSearchCV(DT_rand, param_DT, cv=skf, n_iter=50, n_jobs=-1, verbose=1, random_state=42, scoring='recall')

In [472]:
DT_RS.fit(X_train, y_train)

Fitting 3 folds for each of 50 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.1s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:    0.4s finished


RandomizedSearchCV(cv=StratifiedKFold(n_splits=3, random_state=42, shuffle=False),
                   estimator=DecisionTreeClassifier(random_state=42), n_iter=50,
                   n_jobs=-1,
                   param_distributions={'class_weight': [{0: 0.1, 1: 0.9},
                                                         {0: 0.2, 1: 0.8},
                                                         {0: 0.35, 1: 0.65},
                                                         {0: 0.4, 1: 0.6}],
                                        'max_depth': [None, 5, 10, 25, 30, 40,
                                                      50, 60, 80, 95, 100],
                                        'max_features': ['auto', 0.3, 0.5, 0.8],
                                        'min_samples_leaf': array([  1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  11,  12,  13,
        14,  15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  26,
        27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  37,  

In [473]:
DT_RS.best_params_

{'min_samples_leaf': 68,
 'max_features': 'auto',
 'max_depth': 25,
 'class_weight': {0: 0.1, 1: 0.9}}

In [474]:
Model_DT_Tuned = DT_RS.best_estimator_

In [475]:
y_pred_DT_Tuned = Model_DT_Tuned.predict(X_test)

In [476]:
print(classification_report(y_test, y_pred_DT_Tuned))

              precision    recall  f1-score   support

           0       0.94      0.47      0.62      1593
           1       0.30      0.87      0.44       407

    accuracy                           0.55      2000
   macro avg       0.62      0.67      0.53      2000
weighted avg       0.81      0.55      0.59      2000



In [477]:
cm_DT_Tuned = confusion_matrix(y_test, y_pred_DT_Tuned, labels=[1,0])

In [478]:
df_DT_Tuned = pd.DataFrame(cm_DT_Tuned, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_DT_Tuned

Unnamed: 0,Pred 1,Pred 0
Akt 1,356,51
Akt 0,848,745


### 2d. Decision Tree - Hyper Parameter Tuning (FINE TUNING)

In [1748]:
param_DT2 = {
    "max_depth" : [20, 25, 25],
    "min_samples_leaf" :[68],
    "max_features" : ['auto'],
    "class_weight" : [{0: 0.175, 1: 0.825}]
}

In [1749]:
DT_rand2 = DecisionTreeClassifier()

In [1750]:
skf = StratifiedKFold(n_splits=3, random_state=42)

In [1751]:
DT_GS1 = GridSearchCV(DT_rand2, param_DT2, cv=skf, n_jobs=-1, verbose=1, scoring='recall')

In [1752]:
DT_GS1.fit(X_train, y_train)

Fitting 3 folds for each of 3 candidates, totalling 9 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done   4 out of   9 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=-1)]: Done   9 out of   9 | elapsed:    0.0s finished


GridSearchCV(cv=StratifiedKFold(n_splits=3, random_state=42, shuffle=False),
             estimator=DecisionTreeClassifier(), n_jobs=-1,
             param_grid={'class_weight': [{0: 0.175, 1: 0.825}],
                         'max_depth': [20, 25, 25], 'max_features': ['auto'],
                         'min_samples_leaf': [68]},
             scoring='recall', verbose=1)

In [1753]:
DT_GS1.best_params_

{'class_weight': {0: 0.175, 1: 0.825},
 'max_depth': 20,
 'max_features': 'auto',
 'min_samples_leaf': 68}

In [1754]:
Model_DT2_Tuned = DT_GS1.best_estimator_

In [1755]:
y_pred_DT2_Tuned = Model_DT2_Tuned.predict(X_test)

In [1756]:
print(classification_report(y_test, y_pred_DT2_Tuned))

              precision    recall  f1-score   support

           0       0.93      0.73      0.81      1593
           1       0.42      0.78      0.55       407

    accuracy                           0.74      2000
   macro avg       0.67      0.75      0.68      2000
weighted avg       0.83      0.74      0.76      2000



In [1757]:
cm_DT2_Tuned = confusion_matrix(y_test, y_pred_DT2_Tuned, labels=[1,0])

In [1758]:
df_DT2_Tuned = pd.DataFrame(cm_DT2_Tuned, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_DT2_Tuned

Unnamed: 0,Pred 1,Pred 0
Akt 1,318,89
Akt 0,438,1155


===============================================================================================================

## 3. XGBoost

### 3a. XGBoost - Base Algorithm

In [766]:
XG_Base = XGBClassifier()

In [767]:
XG_Base.fit(X_train, y_train)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=8, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [768]:
y_pred_XGBase = XG_Base.predict(X_test)

In [769]:
print(classification_report(y_test, y_pred_XGBase))

              precision    recall  f1-score   support

           0       0.88      0.95      0.91      1593
           1       0.70      0.48      0.57       407

    accuracy                           0.85      2000
   macro avg       0.79      0.71      0.74      2000
weighted avg       0.84      0.85      0.84      2000



In [770]:
cm_XG_Base = confusion_matrix(y_test, y_pred_XGBase , labels=[1,0])

In [771]:
df_XG_Base = pd.DataFrame(cm_XG_Base, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_XG_Base

Unnamed: 0,Pred 1,Pred 0
Akt 1,194,213
Akt 0,83,1510


### 3b. XGBoost - Class Weight Tuning

In [779]:
XG_1 = XGBClassifier(scale_pos_weight =0.99)

In [780]:
XG_1.fit(X_train, y_train)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
              importance_type='gain', interaction_constraints='',
              learning_rate=0.300000012, max_delta_step=0, max_depth=6,
              min_child_weight=1, missing=nan, monotone_constraints='()',
              n_estimators=100, n_jobs=8, num_parallel_tree=1, random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=0.99, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [781]:
y_pred_XG1 = XG_1.predict(X_test)

In [782]:
print(classification_report(y_test, y_pred_XG1))

              precision    recall  f1-score   support

           0       0.87      0.95      0.91      1593
           1       0.69      0.46      0.55       407

    accuracy                           0.85      2000
   macro avg       0.78      0.70      0.73      2000
weighted avg       0.84      0.85      0.84      2000



In [783]:
cm_XG_1 = confusion_matrix(y_test, y_pred_XG1 , labels=[1,0])

In [784]:
df_XG_1 = pd.DataFrame(cm_XG_1, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_XG_1

Unnamed: 0,Pred 1,Pred 0
Akt 1,186,221
Akt 0,83,1510


### 3c. XGBoost- Hyper Parameter Tuning

In [892]:
param_XG = {
    'learning_rate': [1],
    "n_estimators" : np.arange(100, 10000, 100),
 "max_depth" : [None, 5, 10, 25, 30, 40, 50, 60, 80, 95, 100],
    'min_child_weight': [None, 5, 10, 25, 30, 40, 50, 60, 80, 95, 100],
    'eta':[.3],
    'subsample': [None, 5, 10, 25, 30, 40, 50, 60, 80, 95, 100],
    'colsample_bytree': [None, 5, 10, 25, 30, 40, 50, 60, 80, 95, 100],
    # Other parameters
    'objective': ['binary:logistic'],
    'scale_pos_weight' : [0.99]
}

In [893]:
XGBClassifier().get_params()

{'objective': 'binary:logistic',
 'use_label_encoder': True,
 'base_score': None,
 'booster': None,
 'colsample_bylevel': None,
 'colsample_bynode': None,
 'colsample_bytree': None,
 'gamma': None,
 'gpu_id': None,
 'importance_type': 'gain',
 'interaction_constraints': None,
 'learning_rate': None,
 'max_delta_step': None,
 'max_depth': None,
 'min_child_weight': None,
 'missing': nan,
 'monotone_constraints': None,
 'n_estimators': 100,
 'n_jobs': None,
 'num_parallel_tree': None,
 'random_state': None,
 'reg_alpha': None,
 'reg_lambda': None,
 'scale_pos_weight': None,
 'subsample': None,
 'tree_method': None,
 'validate_parameters': None,
 'verbosity': None}

In [894]:
XG_rand = XGBClassifier(random_state=42)

In [895]:
skf = StratifiedKFold(n_splits=3, random_state=42)

In [896]:
XG_RS= RandomizedSearchCV(XG_rand, param_XG, cv=skf, n_iter=50, n_jobs=-1, verbose=1, random_state=42, scoring='recall')

In [897]:
XG_RS.fit(X_train, y_train)

Fitting 3 folds for each of 50 candidates, totalling 150 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  56 tasks      | elapsed:    0.2s
[Parallel(n_jobs=-1)]: Done 150 out of 150 | elapsed:   10.8s finished




RandomizedSearchCV(cv=StratifiedKFold(n_splits=3, random_state=42, shuffle=False),
                   estimator=XGBClassifier(base_score=None, booster=None,
                                           colsample_bylevel=None,
                                           colsample_bynode=None,
                                           colsample_bytree=None, gamma=None,
                                           gpu_id=None, importance_type='gain',
                                           interaction_constraints=None,
                                           learning_rate=None,
                                           max_delta_step=None, max_depth=None,
                                           min_child_weight=None...
       5600, 5700, 5800, 5900, 6000, 6100, 6200, 6300, 6400, 6500, 6600,
       6700, 6800, 6900, 7000, 7100, 7200, 7300, 7400, 7500, 7600, 7700,
       7800, 7900, 8000, 8100, 8200, 8300, 8400, 8500, 8600, 8700, 8800,
       8900, 9000, 9100, 9200, 9300, 9400, 9500, 

In [898]:
XG_RS.best_params_

{'subsample': None,
 'scale_pos_weight': 0.99,
 'objective': 'binary:logistic',
 'n_estimators': 3600,
 'min_child_weight': 40,
 'max_depth': 40,
 'learning_rate': 1,
 'eta': 0.3,
 'colsample_bytree': None}

In [899]:
Model_XG_Tuned = XG_RS.best_estimator_

In [900]:
y_pred_XG_Tuned = Model_XG_Tuned.predict(X_test)

In [901]:
print(classification_report(y_test, y_pred_XG_Tuned))

              precision    recall  f1-score   support

           0       0.86      0.90      0.88      1593
           1       0.53      0.43      0.47       407

    accuracy                           0.81      2000
   macro avg       0.70      0.67      0.68      2000
weighted avg       0.79      0.81      0.80      2000



In [882]:
cm_XG_Tuned = confusion_matrix(y_test, y_pred_XG_Tuned, labels=[1,0])

In [883]:
df_XG_Tuned = pd.DataFrame(cm_XG_Tuned, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_XG_Tuned

Unnamed: 0,Pred 1,Pred 0
Akt 1,178,229
Akt 0,139,1454


### 3d. XGBoost - Hyper Parameter Tuning (FINE TUNING)

In [1490]:
param_XG2 = {
    'learning_rate': [2],
    "n_estimators" : [4600],
 "max_depth" : [0.095, 1, 1.05,],
    'min_child_weight': [0.065, 0.07, 0.075],
    'eta':[.3],
    'subsample': [0.7],
    'colsample_bytree': [0.7],
    # Other parameters
    'objective': ['binary:logistic'],
    'scale_pos_weight' : [100],
}

In [1491]:
XG_rand2 = XGBClassifier()

In [1492]:
skf = StratifiedKFold(n_splits=3, random_state=42)

In [1493]:
XG_GS1 = GridSearchCV(XG_rand2, param_XG2, cv=skf, n_jobs=-1, verbose=1, scoring='recall')

In [1494]:
XG_GS1.fit(X_train, y_train)

Fitting 3 folds for each of 9 candidates, totalling 27 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  27 out of  27 | elapsed:   20.1s finished




GridSearchCV(cv=StratifiedKFold(n_splits=3, random_state=42, shuffle=False),
             estimator=XGBClassifier(base_score=None, booster=None,
                                     colsample_bylevel=None,
                                     colsample_bynode=None,
                                     colsample_bytree=None, gamma=None,
                                     gpu_id=None, importance_type='gain',
                                     interaction_constraints=None,
                                     learning_rate=None, max_delta_step=None,
                                     max_depth=None, min_child_weight=None,
                                     missi...
                                     scale_pos_weight=None, subsample=None,
                                     tree_method=None, validate_parameters=None,
                                     verbosity=None),
             n_jobs=-1,
             param_grid={'colsample_bytree': [0.7], 'eta': [0.3],
                    

In [1495]:
XG_GS1.best_params_

{'colsample_bytree': 0.7,
 'eta': 0.3,
 'learning_rate': 2,
 'max_depth': 1,
 'min_child_weight': 0.065,
 'n_estimators': 4600,
 'objective': 'binary:logistic',
 'scale_pos_weight': 100,
 'subsample': 0.7}

In [1496]:
Model_XG2_Tuned = XG_GS1.best_estimator_

In [1497]:
y_pred_XG2_Tuned = Model_XG2_Tuned.predict(X_test)

In [1498]:
print(classification_report(y_test, y_pred_XG2_Tuned))

              precision    recall  f1-score   support

           0       0.91      0.64      0.75      1593
           1       0.35      0.76      0.48       407

    accuracy                           0.67      2000
   macro avg       0.63      0.70      0.62      2000
weighted avg       0.80      0.67      0.70      2000



In [1499]:
cm_XG2_Tuned = confusion_matrix(y_test, y_pred_XG2_Tuned, labels=[1,0])

In [1500]:
df_XG2_Tuned = pd.DataFrame(cm_XG2_Tuned, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_XG2_Tuned

Unnamed: 0,Pred 1,Pred 0
Akt 1,310,97
Akt 0,573,1020


## 4.Logistic Regression

### 4a. LR Base

In [29]:
LR_Base = LogisticRegression()

In [30]:
LR_Base.fit(X_train, y_train)

LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
                   intercept_scaling=1, l1_ratio=None, max_iter=100,
                   multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [31]:
y_pred_LRBase = LR_Base.predict(X_test)

In [33]:
print(classification_report(y_test, y_pred_LRBase))

              precision    recall  f1-score   support

           0       0.80      1.00      0.89      1593
           1       0.00      0.00      0.00       407

    accuracy                           0.80      2000
   macro avg       0.40      0.50      0.44      2000
weighted avg       0.63      0.80      0.71      2000



In [34]:
cm_LR_Base = confusion_matrix(y_test, y_pred_LRBase , labels=[1,0])

In [35]:
df_LR_Base = pd.DataFrame(cm_LR_Base, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_LR_Base

Unnamed: 0,Pred 1,Pred 0
Akt 1,0,407
Akt 0,0,1593


### 4b. LR with weighted parameter

In [36]:
LR_1 = LogisticRegression(class_weight={0:.1, 1:.9})

In [37]:
LR_1.fit(X_train,y_train)

LogisticRegression(C=1.0, class_weight={0: 0.1, 1: 0.9}, dual=False,
                   fit_intercept=True, intercept_scaling=1, l1_ratio=None,
                   max_iter=100, multi_class='auto', n_jobs=None, penalty='l2',
                   random_state=None, solver='lbfgs', tol=0.0001, verbose=0,
                   warm_start=False)

In [38]:
y_pred_LR1 = LR_1.predict(X_test)

In [39]:
print(classification_report(y_test,y_pred_LR1))

              precision    recall  f1-score   support

           0       0.94      0.22      0.35      1593
           1       0.24      0.94      0.38       407

    accuracy                           0.36      2000
   macro avg       0.59      0.58      0.36      2000
weighted avg       0.79      0.36      0.36      2000



In [40]:
cm_LR1 = confusion_matrix(y_test,y_pred_LR1,labels=[1,0])

In [41]:
df_LR1 = pd.DataFrame(cm_LR1, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_LR1

Unnamed: 0,Pred 1,Pred 0
Akt 1,384,23
Akt 0,1249,344


### 4c. LR With Hyperparameter tuning

In [60]:
param_LR = {
    'penalty' : ['none', 'l1', 'l2'],
    'C' : np.logspace(-4 , 4, 14),
    'class_weight' : [{0 : x, 1 : 1 - x} for x in [.05,.1,.15, .2,]]
}

In [43]:
LR_HP = LogisticRegression()

In [26]:
skf = StratifiedKFold(n_splits=3, random_state=42)

In [75]:
LR_GS = GridSearchCV(LR_HP, param_LR, cv=skf, n_jobs=-1, verbose=1, scoring='recall')

In [76]:
LR_GS.fit(X_train, y_train)

Fitting 3 folds for each of 168 candidates, totalling 504 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done 128 tasks      | elapsed:    1.6s
[Parallel(n_jobs=-1)]: Done 504 out of 504 | elapsed:    7.8s finished


GridSearchCV(cv=StratifiedKFold(n_splits=3, random_state=42, shuffle=False),
             error_score=nan,
             estimator=LogisticRegression(C=1.0, class_weight=None, dual=False,
                                          fit_intercept=True,
                                          intercept_scaling=1, l1_ratio=None,
                                          max_iter=100, multi_class='auto',
                                          n_jobs=None, penalty='l2',
                                          random_state=None, solver='lbfgs',
                                          tol=0.0001, verbose=0,
                                          warm_start=False),
             iid='deprecate...
             param_grid={'C': array([1.00000000e-04, 4.12462638e-04, 1.70125428e-03, 7.01703829e-03,
       2.89426612e-02, 1.19377664e-01, 4.92388263e-01, 2.03091762e+00,
       8.37677640e+00, 3.45510729e+01, 1.42510267e+02, 5.87801607e+02,
       2.42446202e+03, 1.00000000e+04]),
          

In [77]:
LR_GS.best_params_

{'C': 0.0001, 'class_weight': {0: 0.05, 1: 0.95}, 'penalty': 'l2'}

In [78]:
LR_GS_Tuned = LR_GS.best_estimator_

In [79]:
y_pred_LR_Tuned = LR_GS_Tuned.predict(X_test)

In [80]:
print(classification_report(y_test, y_pred_LR_Tuned))

              precision    recall  f1-score   support

           0       0.89      0.01      0.01      1593
           1       0.20      1.00      0.34       407

    accuracy                           0.21      2000
   macro avg       0.55      0.50      0.17      2000
weighted avg       0.75      0.21      0.08      2000



In [81]:
cm_LR_Tuned = confusion_matrix(y_test, y_pred_LR_Tuned, labels=[1,0])

In [82]:
df_LR_Tuned = pd.DataFrame(cm_LR_Tuned, index=['Akt 1', 'Akt 0'], columns =  ['Pred 1', 'Pred 0'])
df_LR_Tuned

Unnamed: 0,Pred 1,Pred 0
Akt 1,406,1
Akt 0,1585,8


# Recommendation

- Create customer retention program, for customers who are predicted will Exited bank
    - Give 1 Honda Vario (with value IDR15.000.000) for customers who are predicted will Exited bank with 3 years contract agreement
    - The cost of acquiring new customers is estimated at five times the rate of retaining existing ones
        - Source : https://www.fpsc.com/the_cost_of_customer_churn.pdf
        - With asumption retain existing customer cost is IDR 15.000.000 => lose 1 customers are equal to 75000000

# Summary

- We suggest to use Model Random Forest with Fine Tuning 2, because after rough calculation, it has the lowest loss of money
- The model has recall 1 score of 0.77 with False Positive 359 and False Negative 97

# Business Loss Rough Calculation

- False Positive Loss

In [9]:
359 * 15000000

5385000000

- False Negative Loss

In [8]:
65000000 * 97

6305000000