In [13]:
from models.decision_tree import DecisionTreeModel
from models.adaBoost import AdaBoostModel
from models.xg_boost import XGBoostModel
from models.gradient_boosting import GradientBoostingModel
from models.random_forest import RandomForestModel
from models.cat_boost import CatBoostModel
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import f1_score, roc_auc_score
from metrics import cross_validate_with_resampling, Metrics
from imblearn.combine import SMOTEENN
from imblearn.over_sampling import RandomOverSampler, SMOTE
import pandas as pd
import numpy as np

## Reading Data and Setting up Grid search parameters

In [14]:
data = pd.read_csv('data/Telco-Customer-Churn-encoded-data-FE-Features-Selected.csv')
label = pd.read_csv('data/Telco-Customer-Churn-encoded-label.csv')

In [15]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [16]:
data.head()

Unnamed: 0,Contract_Month-to-month,PaymentMethod_Mailed check,OnlineSecurity_No,Monthly/Total_Charges,TechSupport_No,InternetService_DSL,tenure,InternetService_Fiber optic,PaymentMethod_Electronic check,tenure_group,...,OnlineBackup_No internet service,DeviceProtection_No internet service,PaymentMethod_Credit card (automatic),Partner_Yes,TechSupport_No internet service,StreamingTV_No internet service,PaymentMethod_Bank transfer (automatic),TechSupport_Yes,StreamingMovies_No internet service,TotalCharges
0,1,0,1,1.0,1,1,1,0,1,0,...,0,0,0,1,0,0,0,0,0,29.85
1,0,1,0,0.03014,1,1,34,0,0,2,...,0,0,0,0,0,0,0,0,0,1889.5
2,1,1,0,0.49792,1,1,2,0,0,0,...,0,0,0,0,0,0,0,0,0,108.15
3,0,0,0,0.02298,0,1,45,0,0,3,...,0,0,0,0,0,0,1,1,0,1840.75
4,1,0,1,0.466205,1,0,2,1,1,0,...,0,0,0,0,0,0,0,0,0,151.65


In [17]:
label[:5]

Unnamed: 0,Churn
0,0
1,0
2,1
3,0
4,1


In [18]:
X = data.to_numpy()
y = label.to_numpy().ravel()

In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

### Decision tree Grid search

In [23]:
DecisionTreeModel_gs = DecisionTreeModel("DecisionTreeModel with grid search")
DT_best_params, DT_recempling = DecisionTreeModel_gs.gs_parameter_tune(X, y, max_search=200)


100%|██████████| 200/200 [00:39<00:00,  5.11it/s]

Model: DecisionTreeModel with grid search
Best Parameters: {'criterion': 'gini', 'max_depth': 7, 'min_samples_split': 7, 'min_impurity_decrease': 0.001}
Resempling: True
Validation Accuracy: 0.7230935114796371





In [26]:

DT_hyper_model = DecisionTreeModel("DecisionTreeModel with hyper parameters")
DT_hyper_model.hyper_parameter(DT_best_params)
if(DT_recempling):
    DT_hyper_model.fit_with_resampling(X_train, y_train)
else:
    DT_hyper_model.fit(X_train, y_train)

y_pred_hyper = DT_hyper_model.predict(X_test)

Metrics(y_pred_hyper, y_test).f1_score()

0.7240879456105085

### XGBoost with Grid Search

In [28]:
xgb_gs = XGBoostModel("XGBoostModel with grid search")
xgb_best_params, xgb_resempling = xgb_gs.gs_parameter_tune(X, y, max_search=200)

  0%|          | 0/200 [00:00<?, ?it/s]

100%|██████████| 200/200 [49:43<00:00, 14.92s/it]  

Model: XGBoostModel with grid search
Best Parameters: {'max_depth': 3, 'learning_rate': 0.1, 'n_estimators': 50, 'subsample': 0.7, 'colsample_bytree': 0.8999999999999999}
Resempling: True
Validation Accuracy: 0.7365560530635161





In [29]:
xgb_hyper_model = XGBoostModel("XGBoostModel with hyper parameters")
xgb_hyper_model.hyper_parameter(xgb_best_params)
if(xgb_resempling):
    xgb_hyper_model.fit_with_resampling(X_train, y_train)
else:
    xgb_hyper_model.fit(X_train, y_train)

y_pred_hyper = xgb_hyper_model.predict(X_test)

Metrics(y_pred_hyper, y_test).f1_score()

0.757269118418096

### AdaBoost with Grid Search

In [22]:
AdaBoostModel_gs = AdaBoostModel("AdaBoostModel with grid search")
ab_best_params, ab_resempling = AdaBoostModel_gs.gs_parameter_tune(X, y, max_search=200)

  7%|▋         | 2/30 [00:15<03:30,  7.51s/it]


KeyboardInterrupt: 

In [None]:
ab_hyper_model = AdaBoostModel("AdaBoostModel with hyper parameters")
ab_hyper_model.hyper_parameter(ab_best_params)
if(ab_resempling):
    ab_hyper_model.fit_with_resampling(X_train, y_train)
else:
    ab_hyper_model.fit(X_train, y_train)

y_pred_hyper = ab_hyper_model.predict(X_test)

Metrics(y_pred_hyper, y_test).f1_score()



0.7435896076820467

### Gradient Boosting with Grid Search

In [None]:
GradientBoostingModel_gs = GradientBoostingModel("GradientBoostingModel with grid search")
gb_best_params, gb_resempling = GradientBoostingModel_gs.gs_parameter_tune(X, y, grid_search_parameters['GradientBoostingClassifier'], max_search=100)

100%|██████████| 100/100 [45:45<00:00, 27.46s/it]

Model: GradientBoostingModel with grid search
Best Parameters: {'loss': 'log_loss', 'learning_rate': 0.1, 'n_estimators': 50, 'max_depth': 3, 'min_samples_split': 7, 'min_impurity_decrease': 0.01}
Resempling: True
Validation Accuracy: 0.7370372507161347





In [None]:
gb_hyper_model = GradientBoostingModel("GradientBoostingModel with hyper parameters")
gb_hyper_model.hyper_parameter(gb_best_params)
if(gb_resempling):
    gb_hyper_model.fit_with_resampling(X_train, y_train)
else:
    gb_hyper_model.fit(X_train, y_train)

y_pred_hyper = gb_hyper_model.predict(X_test)

Metrics(y_pred_hyper, y_test).f1_score()

0.7553634785234393

In [None]:
# gb_hyper_model.save('gb_hyper_model.pkl')

In [None]:
load_model = GradientBoostingModel()
load_model.load('gb_hyper_model.pkl')

In [None]:
y_pred_load = load_model.predict(X_test)
Metrics(y_pred_load, y_test).f1_score()


0.7553634785234393

### Random Forest Classifier with Grid Search

In [None]:
RandomForestModel_gs = RandomForestModel("RandomForestModel with grid search")
rf_best_params, rf_resempling = RandomForestModel_gs.gs_parameter_tune(X, y, grid_search_parameters['RandomForestClassifier'], max_search=200)

In [None]:
rf_hyper_model = RandomForestModel("RandomForestModel with hyper parameters")
rf_hyper_model.hyper_parameter(rf_best_params)
if(rf_resempling):
    rf_hyper_model.fit_with_resampling(X_train, y_train)
else:
    rf_hyper_model.fit(X_train, y_train)

y_pred_hyper = rf_hyper_model.predict(X_test)

Metrics(y_pred_hyper, y_test).f1_score()

In [None]:
DecisionTreeModel_gs = DecisionTreeModel("DecisionTreeModel with grid search")
DT_best_params, DT_recempling = DecisionTreeModel_gs.gs_parameter_tune(X, y, grid_search_parameters['DecisionTreeModel'], max_search=200)

In [None]:
DT_hyper_model = DecisionTreeModel("DecisionTreeModel with hyper parameters")
DT_hyper_model.hyper_parameter(DT_best_params)
if(DT_recempling):
    DT_hyper_model.fit_with_resampling(X_train, y_train)
else:
    DT_hyper_model.fit(X_train, y_train)

y_pred_hyper = DT_hyper_model.predict(X_test)

Metrics(y_pred_hyper, y_test).f1_score()

0.7409847115414075

### CatBoost Classifier with Grid Search

In [None]:
CatBoost_gs = CatBoostModel("CatBoostModel with grid search")
cb_best_params, cb_resempling = CatBoost_gs.gs_parameter_tune(X, y, grid_search_parameters['CatBoostClassifier'], max_search=-1)

In [17]:
cb_hyper_model = CatBoostModel("CatBoostModel with hyper parameters")
cb_hyper_model.hyper_parameter(cb_best_params)
if(cb_resempling):
    cb_hyper_model.fit_with_resampling(X_train, y_train)
else:
    cb_hyper_model.fit(X_train, y_train)

y_pred_hyper = cb_hyper_model.predict(X_test)

Metrics(y_pred_hyper, y_test).f1_score()

0:	learn: 0.6847073	total: 2.15ms	remaining: 320ms
1:	learn: 0.6792827	total: 3.89ms	remaining: 288ms
2:	learn: 0.6717061	total: 5.75ms	remaining: 282ms
3:	learn: 0.6666778	total: 7.43ms	remaining: 271ms
4:	learn: 0.6617661	total: 8.98ms	remaining: 261ms
5:	learn: 0.6565347	total: 10.4ms	remaining: 249ms
6:	learn: 0.6466224	total: 11.9ms	remaining: 243ms
7:	learn: 0.6366963	total: 13.4ms	remaining: 238ms
8:	learn: 0.6321884	total: 14.9ms	remaining: 233ms
9:	learn: 0.6285178	total: 16.3ms	remaining: 228ms
10:	learn: 0.6202254	total: 17.8ms	remaining: 225ms
11:	learn: 0.6135358	total: 19.3ms	remaining: 222ms
12:	learn: 0.6093813	total: 20.8ms	remaining: 219ms
13:	learn: 0.6054479	total: 22.2ms	remaining: 216ms
14:	learn: 0.6015785	total: 23.7ms	remaining: 213ms
15:	learn: 0.5977211	total: 25.2ms	remaining: 211ms
16:	learn: 0.5940839	total: 26.7ms	remaining: 209ms
17:	learn: 0.5874969	total: 28.2ms	remaining: 207ms
18:	learn: 0.5842962	total: 29.7ms	remaining: 205ms
19:	learn: 0.5809552	t

0.751683778495237