<h1><center><font size="6">Hyperparameter Tuning Notebook</font></center></h1>

# Contents

- <a href='#1'>Importing Packages</a>
- <a href='#2'>Preparing Data for Modeling</a>  
- <a href='#3'>Hyperparameter Tuning</a>
    - <a href='#31'>Logistic Regression with GridSearchCV</a>
    - <a href='#32'>Random Forest Classifier with GridSearchCV</a>
    - <a href='#33'>AdaBoost Classifier with GridSearchCV</a>
    - <a href='#34'>Gradient Boosting Classifier with GridSearchCV</a>
    - <a href='#35'>XGBoost Classifier with GridSearchCV</a>
    - <a href='#36'>Evaluation Metrics</a>
- <a href='#4'>In-Depth Hyperparameter Tuning of Gradient Boosting Classifier</a>


# <a id='1'>Importing Packages</a>

In [9]:
import numpy as np
from numpy import where, mean
import pandas as pd
import re
from matplotlib import pyplot as plt
from matplotlib import style
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')
%matplotlib inline
import itertools
from collections import Counter
import pickle

%reload_ext autoreload
%autoreload 2

from utils import *


from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import GaussianNB
from sklearn.experimental import enable_hist_gradient_boosting
from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier, RandomForestClassifier, BaggingClassifier, HistGradientBoostingClassifier
from sklearn.model_selection import train_test_split, KFold, cross_val_score, cross_val_predict, GridSearchCV,  RepeatedStratifiedKFold, cross_validate
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, precision_recall_curve, roc_curve, roc_auc_score, classification_report, plot_confusion_matrix, auc, mean_squared_error, confusion_matrix, balanced_accuracy_score
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.pipeline import make_pipeline
from sklearn.impute import SimpleImputer
from sklearn.compose import ColumnTransformer, make_column_selector as selector
from sklearn.dummy import DummyClassifier
from sklearn.utils import resample

from imblearn.pipeline import Pipeline
from imblearn.over_sampling import RandomOverSampler, SMOTE
from imblearn.under_sampling import CondensedNearestNeighbour, NeighbourhoodCleaningRule, NearMiss, OneSidedSelection, RandomUnderSampler
from imblearn.ensemble import BalancedBaggingClassifier, BalancedRandomForestClassifier, EasyEnsembleClassifier, RUSBoostClassifier
from imblearn.metrics import geometric_mean_score
from imblearn.under_sampling import TomekLinks

import xgboost as xgb
from xgboost.sklearn import XGBClassifier


# <a id='2'>Preparing Data for Modeling</a>

In [2]:
pickle_in = open("../data/pickles/training_model.pickle","rb")
train = pickle.load(pickle_in)
pickle_in = open("../data/pickles/validate_model.pickle","rb")
validate = pickle.load(pickle_in)

In [3]:
train.head()

Unnamed: 0,limit,behind1,paid2,delayed,latemths,age,behind2,billed1,avg_av,avail1,default
0,1790.26,0,179.13,0,0,44,0,1631.93,0.344578,0.08844,0
1,5728.83,-1,173.87,0,0,46,-1,891.69,0.957227,0.84435,0
2,3580.52,-1,0.0,0,0,47,-1,238.68,0.96865,0.933339,1
3,6086.88,0,89.26,0,0,29,0,2831.87,0.650602,0.534758,0
4,5370.78,-2,1171.37,0,0,33,-2,873.4,0.836153,0.837379,0


In [4]:
X_train = train.drop(["default"], axis=1)
y_tr = train["default"]
X_validate = validate.drop(["default"], axis=1)
y_val = validate["default"]

In [5]:
scaler = StandardScaler()
scaler.fit(X_train)
X_tr = scaler.transform(X_train)
X_val = scaler.transform(X_validate)

# <a id='3'>Hyperparameter Tuning</a>

## <a id='31'>Logistic Regression with GridSearchCV</a>

In [6]:
# logreg = LogisticRegression()
# params = {'C': [0.001, 0.01, 0.1, 1, 10], 
#           'penalty': ['none', 'l1', 'l2', 'elasticnet'],
#           'solver': ['liblinear', 'newton-cg', 'lbfgs', 'sag', 'saga']}
# gslog = GridSearchCV(estimator = logreg,
#                      param_grid = params,
#                      scoring = 'average_precision',
#                      cv = 10,
#                      n_jobs = -1).fit(X_tr, y_tr)
# y_pred_gslog_tr = gslog.predict(X_tr)
# y_pred_gslog_val = gslog.predict(X_val)
# print("Best: %f using %s" % (gslog.best_score_, gslog.best_params_))
# print("")
# get_metrics(X_tr, y_tr, X_val, y_val, y_pred_gslog_tr, y_pred_gslog_val, gslog)

# Best: 0.522622 using {'C': 1, 'penalty': 'l2', 'solver': 'newton-cg'}

In [7]:
logb = LogisticRegression(C=1, penalty='l2', solver='newton-cg').fit(X_tr, y_tr)
y_pred_logb_tr = logb.predict(X_tr)
y_pred_logb_val = logb.predict(X_val)
get_metric(X_tr, y_tr, X_val, y_val, y_pred_logb_tr, y_pred_logb_val, logb)

Training Accuracy:  0.8087142857142857
Validation Accuracy:  0.8071666666666667
Training F1 Score:  0.40320903283316006
Validation F1 Score:  0.380952380952381
Training AUC Score:  0.7476971040793051
Validation AUC Score:  0.7467126831177808
Training Recall Score:  0.2914518900343643
Validation Recall Score:  0.27113480578827115
Training Precision Score:  0.6539759036144578
Validation Precision Score:  0.6402877697841727
Training Average Precision Score:  0.5205202430336466
Validation Average Precision Score:  0.4986582661881126


## <a id='33'>Random Forest Classifier with GridSearchCV</a>

In [8]:
# rfc = RandomForestClassifier()
# params = {'n_estimators': [100, 200, 400, 600, 1000],
#           'criterion': ['entropy', 'gini'],
#           'max_depth': [5, 8, 15, 25, 30],
#           'min_samples_split': [2, 5, 10, 15, 100],
#           'min_samples_leaf': [1, 2, 5, 10]}
# gsrfc = GridSearchCV(estimator = rfc,
#                      param_grid = params,
#                      scoring = 'average_precision',
#                      cv = 5,
#                      n_jobs = -1).fit(X_tr, y_tr)
# y_pred_gsrfc_tr = gsrfc.predict(X_tr)
# y_pred_gsrfc_val = gsrfc.predict(X_val)
# print("Best: %f using %s" % (gsrfc.best_score_, gsrfc.best_params_))
# print("")
# get_metrics(X_tr, y_tr, X_val, y_val, y_pred_gsrfc_tr, y_pred_gsrfc_val, gsrfc)

# Best: 0.565196 using {'criterion': 'gini', 'max_depth': 8, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 400}

In [9]:
rfcb = RandomForestClassifier(criterion='gini', max_depth=8, min_samples_leaf=1, min_samples_split=2, n_estimators=400).fit(X_tr, y_tr)
y_pred_rfcb_tr = rfcb.predict(X_tr)
y_pred_rfcb_val = rfcb.predict(X_val)
get_metric(X_tr, y_tr, X_val, y_val, y_pred_rfcb_tr, y_pred_rfcb_val, rfcb)

Training Accuracy:  0.8336666666666667
Validation Accuracy:  0.8213333333333334
Training F1 Score:  0.5153323158040795
Validation F1 Score:  0.46772591857000984
Training AUC Score:  0.8263548471080532
Validation AUC Score:  0.7804000337339867
Training Recall Score:  0.39884020618556704
Validation Recall Score:  0.3587204874333587
Training Precision Score:  0.7279498235985888
Validation Precision Score:  0.6718972895863052
Training Average Precision Score:  0.6506872748217742
Validation Average Precision Score:  0.5445290195021129


## <a id='34'>AdaBoost Classifier with GridSearchCV</a>

In [10]:
# abc = AdaBoostClassifier()
# params = {'n_estimators': [10, 50, 100, 200],
#           'learning_rate': [0.001, 0.01, 0.1, 0.2, 0.5]}
# gsabc = GridSearchCV(estimator = abc,
#                      param_grid = params,
#                      n_jobs = -1,
#                      cv = 5,
#                      scoring = 'average_precision').fit(X_tr, y_tr)
# y_pred_gsabc_tr = gsabc.predict(X_tr)
# y_pred_gsabc_val = gsabc.predict(X_val)
# print("Best: %f using %s" % (gsabc.best_score_, gsabc.best_params_))
# print("")
# get_metrics(X_tr, y_tr, X_val, y_val, y_pred_gsabc_tr, y_pred_gsabc_val, gsabc)

# Best: 0.545818 using {'learning_rate': 0.1, 'n_estimators': 200}

In [11]:
abcb = AdaBoostClassifier(learning_rate=0.1, n_estimators=200).fit(X_tr, y_tr)
y_pred_abcb_tr = abcb.predict(X_tr)
y_pred_abcb_val = abcb.predict(X_val)
get_metric(X_tr, y_tr, X_val, y_val, y_pred_abcb_tr, y_pred_abcb_val, abcb)

Training Accuracy:  0.8192380952380952
Validation Accuracy:  0.8208333333333333
Training F1 Score:  0.44969556393157434
Validation F1 Score:  0.4438696326952923
Training AUC Score:  0.7866775976198166
Validation AUC Score:  0.7772694027703144
Training Recall Score:  0.3331185567010309
Validation Recall Score:  0.32673267326732675
Training Precision Score:  0.691793041926851
Validation Precision Score:  0.6919354838709677
Training Average Precision Score:  0.5528304845005094
Validation Average Precision Score:  0.5244329096074963


## <a id='35'>Gradient Boosting with GridSearchCV</a>

In [12]:
# gbc = GradientBoostingClassifier()
# params = {'n_estimators': [10, 100, 1000],
#           'learning_rate': [0.001, 0.01, 0.1],
#           'max_depth': [3, 7, 9]}
# gsgbc = GridSearchCV(estimator = gbc,
#                      param_grid = params, 
#                      n_jobs = -1, 
#                      cv = 5, 
#                      scoring = 'average_precision').fit(X_tr, y_tr)
# y_pred_gsgbc_tr = gsgbc.predict(X_tr)
# y_pred_gsgbc_val = gsgbc.predict(X_val)
# print("Best: %f using %s" % (gsgbc.best_score_, gsgbc.best_params_))
# print("")
# get_metric(X_tr, y_tr, X_val, y_val, y_pred_gsgbc_tr, y_pred_gsgbc_tr, gsgbc)

# Best: 0.558390 using {'learning_rate': 0.01, 'max_depth': 3, 'n_estimators': 1000}

In [13]:
gbcb = GradientBoostingClassifier(learning_rate=0.01, max_depth=3, n_estimators=1000).fit(X_tr, y_tr)
y_pred_gbcb_tr = gbcb.predict(X_tr)
y_pred_gbcb_val = gbcb.predict(X_val)
get_metric(X_tr, y_tr, X_val, y_val, y_pred_gbcb_tr, y_pred_gbcb_val, gbcb)

Training Accuracy:  0.8276190476190476
Validation Accuracy:  0.8196666666666667
Training F1 Score:  0.49694274596998333
Validation F1 Score:  0.4622266401590458
Training AUC Score:  0.805111244939135
Validation AUC Score:  0.7812172216877036
Training Recall Score:  0.38402061855670105
Validation Recall Score:  0.3541507996953541
Training Precision Score:  0.7039370078740157
Validation Precision Score:  0.6652360515021459
Training Average Precision Score:  0.6003473510491001
Validation Average Precision Score:  0.5435298387150838


## <a id='36'>XGBoost Classifier with GridSearchCV</a>

In [14]:
# xgb = XGBClassifier()
# params = {'n_estimators': [50, 100, 150, 200], 
#           'max_depth': [3, 5, 7, 10], 
#           'min_child_weight': [2, 3, 4, 5]}
# gsxgb = GridSearchCV(estimator = xgb,
#                      param_grid = params,
#                      scoring = 'average_precision',
#                      cv = 5,
#                      n_jobs = -1).fit(X_tr, y_tr)
# y_pred_gsxgb_tr = gsxgb.predict(X_tr)
# y_pred_gsxgb_val = gsxgb.predict(X_val)
# print("Best: %f using %s" % (gsxgb.best_score_, gsxgb.best_params_))
# print("")
# get_metrics(X_tr, y_tr, X_val, y_val, y_pred_gsxgb_tr, y_pred_gsxgb_val, gsxgb)

# Best: 0.555500 using {'max_depth': 3, 'min_child_weight': 5, 'n_estimators': 50}


In [15]:
xgbb = XGBClassifier(max_depth=3, min_child_weight=1, n_estimators=50).fit(X_tr, y_tr)
y_pred_xgbb_tr = xgbb.predict(X_tr)
y_pred_xgbb_val = xgbb.predict(X_val)
get_metric(X_tr, y_tr, X_val, y_val, y_pred_xgbb_tr, y_pred_xgbb_val, xgbb)

Training Accuracy:  0.8276190476190476
Validation Accuracy:  0.8185
Training F1 Score:  0.49833702882483377
Validation F1 Score:  0.4579392732702837
Training AUC Score:  0.810540741434586
Validation AUC Score:  0.7767761488364293
Training Recall Score:  0.3861683848797251
Validation Recall Score:  0.3503427265803503
Training Precision Score:  0.70234375
Validation Precision Score:  0.6609195402298851
Training Average Precision Score:  0.6011613543281619
Validation Average Precision Score:  0.5393954705676095


## <a id='37'>Evaluation Metrics</a>

In [16]:
data = {'Accuracy': [accuracy(y_val, y_pred_logb_val), 
                     accuracy(y_val, y_pred_rfcb_val), 
                     accuracy(y_val, y_pred_abcb_val), 
                     accuracy(y_val, y_pred_gbcb_val),
                     accuracy(y_val, y_pred_xgbb_val)],
        'F1 Score': [f1(y_val, y_pred_logb_val), 
                     f1(y_val, y_pred_rfcb_val), 
                     f1(y_val, y_pred_abcb_val), 
                     f1(y_val, y_pred_gbcb_val),
                     f1(y_val, y_pred_xgbb_val)],
        'Recall': [recall(y_val, y_pred_logb_val), 
                   recall(y_val, y_pred_rfcb_val), 
                   recall(y_val, y_pred_abcb_val),
                   recall(y_val, y_pred_gbcb_val),
                   recall(y_val, y_pred_xgbb_val)],
        'Precision': [precision(y_val, y_pred_logb_val), 
                      precision(y_val, y_pred_rfcb_val), 
                      precision(y_val, y_pred_abcb_val),
                      precision(y_val, y_pred_gbcb_val),
                      precision(y_val, y_pred_xgbb_val)],
        'ROC-AUC': [auc(X_val, y_val, logb),
                   auc(X_val, y_val, rfcb),
                   auc(X_val, y_val, abcb),
                   auc(X_val, y_val, gbcb),
                   auc(X_val, y_val, xgbb)]}
scores3 = pd.DataFrame(data=data, index = ['Logistic Regression with GridSearchCV', 
                                           'Random Forest with GridSearchCV', 
                                           'AdaBoost with GridSearchCV', 
                                           'Gradient Boosting with GridSearchCV',
                                           'XGBoost with GridSearchCV'])

In [17]:
scores3

Unnamed: 0,Accuracy,F1 Score,Recall,Precision,PR AUC
Logistic Regression with GridSearchCV,0.807167,0.380952,0.271135,0.640288,0.498658
Random Forest with GridSearchCV,0.821333,0.467726,0.35872,0.671897,0.544529
AdaBoost with GridSearchCV,0.820833,0.44387,0.326733,0.691935,0.524433
Gradient Boosting with GridSearchCV,0.819667,0.462227,0.354151,0.665236,0.54353
XGBoost with GridSearchCV,0.8185,0.457939,0.350343,0.66092,0.539395


In [18]:
scores3.to_csv("../data/charts/scores3.csv")

## Pickle out best model

In [54]:
rfcb

RandomForestClassifier(max_depth=8, n_estimators=400)

In [55]:
pickle_out = open("../data/best_model.pickle","wb")
pickle.dump(rfcb, pickle_out)
pickle_out.close()

## In-Depth Hyperparameter Tuning of Gradient Boosting Classifier

In [7]:
params = {'n_estimators': range(20, 81, 10)}
gs_gbc = GridSearchCV(estimator = GradientBoostingClassifier(random_state=42),
                      param_grid = params, 
                      n_jobs = 4,
                      iid = False, 
                      cv = 5, 
                      scoring = 'roc_auc',
                      verbose = 1).fit(X_tr, y_tr)
print("Best: %f using %s" % (gs_gbc.best_score_, gs_gbc.best_params_))

Fitting 5 folds for each of 7 candidates, totalling 35 fits
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  35 out of  35 | elapsed:   16.9s finished
Best: 0.784376 using {'n_estimators': 70}


In [8]:
y_pred_gsgbc = gs_gbc.predict(X_val)
get_metric(X_val, y_val, y_pred_gsgbc, gs_gbc)

Accuracy:  0.8205
F1 Score:  0.4644455494778717
ROC-AUC Score:  0.7802866933884474
Recall Score:  0.3556740289413557
Precision Score:  0.669054441260745
PR-AUC Score:  0.544958983883158


In [10]:
params_2 = {'max_depth': range(5, 16, 2), 'min_samples_split': range(200, 1001, 200)}
gs_gbc_2 = GridSearchCV(estimator = GradientBoostingClassifier(random_state=42, n_estimators=70),
                        param_grid = params_2, 
                        n_jobs = 4,
                        iid = False, 
                        cv = 5, 
                        scoring = 'roc_auc',
                        verbose = 1).fit(X_tr, y_tr)
print("Best: %f using %s" % (gs_gbc_2.best_score_, gs_gbc_2.best_params_))

Fitting 5 folds for each of 30 candidates, totalling 150 fits
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   58.8s
[Parallel(n_jobs=4)]: Done 150 out of 150 | elapsed:  6.1min finished
Best: 0.785429 using {'max_depth': 5, 'min_samples_split': 200}


In [11]:
y_pred_gsgbc_2 = gs_gbc_2.predict(X_val)
get_metric(X_val, y_val, y_pred_gsgbc_2, gs_gbc_2)

Accuracy:  0.818
F1 Score:  0.45994065281899105
ROC-AUC Score:  0.7802444446574937
Recall Score:  0.3541507996953541
Precision Score:  0.6558533145275035
PR-AUC Score:  0.5431625883138161


In [12]:
params_3 = {'min_samples_split': range(1000, 2001, 200), 'min_samples_leaf': range(10, 71, 10)}
gs_gbc_3 = GridSearchCV(estimator = GradientBoostingClassifier(random_state=42, n_estimators=70, max_depth=5),
                        param_grid = params_3, 
                        n_jobs = 4,
                        iid = False, 
                        cv = 5, 
                        scoring = 'roc_auc',
                        verbose = 1).fit(X_tr, y_tr)
print("Best: %f using %s" % (gs_gbc_3.best_score_, gs_gbc_3.best_params_))

Fitting 5 folds for each of 42 candidates, totalling 210 fits
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  42 tasks      | elapsed:   49.3s
[Parallel(n_jobs=4)]: Done 192 tasks      | elapsed:  4.0min
[Parallel(n_jobs=4)]: Done 210 out of 210 | elapsed:  4.5min finished
Best: 0.786073 using {'min_samples_leaf': 50, 'min_samples_split': 1600}


In [13]:
y_pred_gsgbc_3 = gs_gbc_3.predict(X_val)
get_metric(X_val, y_val, y_pred_gsgbc_3, gs_gbc_3)

Accuracy:  0.8201666666666667
F1 Score:  0.46504709965294994
ROC-AUC Score:  0.7793591712488936
Recall Score:  0.3571972581873572
Precision Score:  0.6661931818181818
PR-AUC Score:  0.5417669015523743


In [14]:
params_4 = {'max_features': range(7, 20, 2)}
gs_gbc_4 = GridSearchCV(estimator = GradientBoostingClassifier(random_state=42, n_estimators=70, min_samples_split=1800, min_samples_leaf=30),
                        param_grid = params_4, 
                        n_jobs = 4,
                        iid = False, 
                        cv = 5, 
                        scoring = 'roc_auc',
                        verbose = 1).fit(X_tr, y_tr)
print("Best: %f using %s" % (gs_gbc_4.best_score_, gs_gbc_4.best_params_))

Fitting 5 folds for each of 7 candidates, totalling 35 fits
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  35 out of  35 | elapsed:    6.5s finished
Best: 0.784187 using {'max_features': 9}


In [15]:
y_pred_gsgbc_4 = gs_gbc_4.predict(X_val)
get_metric(X_val, y_val, y_pred_gsgbc_4, gs_gbc_4)

Accuracy:  0.8193333333333334
F1 Score:  0.45854145854145856
ROC-AUC Score:  0.7814054722831263
Recall Score:  0.3495811119573496
Precision Score:  0.6661828737300436
PR-AUC Score:  0.5440445818118469


In [16]:
params_5 = {'subsample': [0.6, 0.65, 0.7, 0.75, 0.8, 0.85, 0.9]}
gs_gbc_5 = GridSearchCV(estimator = GradientBoostingClassifier(random_state=42, n_estimators=70, min_samples_split=1800, 
                                                               min_samples_leaf=30, max_features=9),
                        param_grid = params_5, 
                        n_jobs = 4,
                        iid = False, 
                        cv = 5, 
                        scoring = 'roc_auc',
                        verbose = 1).fit(X_tr, y_tr)
print("Best: %f using %s" % (gs_gbc_5.best_score_, gs_gbc_5.best_params_))

Fitting 5 folds for each of 7 candidates, totalling 35 fits
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done  35 out of  35 | elapsed:   15.6s finished
Best: 0.784879 using {'subsample': 0.75}


In [17]:
y_pred_gsgbc_5 = gs_gbc_5.predict(X_val)
get_metric(X_val, y_val, y_pred_gsgbc_5, gs_gbc_5)

Accuracy:  0.8196666666666667
F1 Score:  0.4685658153241651
ROC-AUC Score:  0.7798962988649228
Recall Score:  0.3632901751713633
Precision Score:  0.6597510373443983
PR-AUC Score:  0.539293391995348


In [23]:
gbc_tuned = GradientBoostingClassifier(random_state=42, n_estimators=140, min_samples_split=1800, 
                                       min_samples_leaf=30, max_features=9, subsample=0.7, learning_rate=0.05).fit(X_tr, y_tr)
y_pred_gbc_tuned = gbc_tuned.predict(X_val)
get_metric(X_val, y_val, y_pred_gbc_tuned, gbc_tuned)

Accuracy:  0.821
F1 Score:  0.4724950884086444
ROC-AUC Score:  0.7814880198036052
Recall Score:  0.36633663366336633
Precision Score:  0.665283540802213
PR-AUC Score:  0.5417204420522411


In [24]:
gbc_tuned_2 = GradientBoostingClassifier(random_state=42, n_estimators=800, min_samples_split=1800, 
                                       min_samples_leaf=30, max_features=9, subsample=0.7, learning_rate=0.01).fit(X_tr, y_tr)
y_pred_gbc_tuned_2 = gbc_tuned_2.predict(X_val)
get_metric(X_val, y_val, y_pred_gbc_tuned_2, gbc_tuned_2)

Accuracy:  0.8205
F1 Score:  0.4707616707616708
ROC-AUC Score:  0.7814543020664018
Recall Score:  0.3648134044173648
Precision Score:  0.6634349030470914
PR-AUC Score:  0.5417136315291816


In [25]:
gbc_tuned_3 = GradientBoostingClassifier(random_state=42, n_estimators=1600, min_samples_split=1800, 
                                       min_samples_leaf=30, max_features=9, subsample=0.7, learning_rate=0.005).fit(X_tr, y_tr)
y_pred_gbc_tuned_3 = gbc_tuned_3.predict(X_val)
get_metric(X_val, y_val, y_pred_gbc_tuned_3, gbc_tuned_3)

Accuracy:  0.8206666666666667
F1 Score:  0.4715127701375246
ROC-AUC Score:  0.7813930414065187
Recall Score:  0.3655750190403656
Precision Score:  0.6639004149377593
PR-AUC Score:  0.5408805522595578


In [26]:
gbc_tuned_4 = GradientBoostingClassifier(random_state=42, n_estimators=1600, min_samples_split=1800, 
                                       min_samples_leaf=30, max_features=9, subsample=0.7, learning_rate=0.005).fit(X_tr, y_tr)
y_pred_gbc_tuned_4 = gbc_tuned_4.predict(X_val)
get_metric(X_val, y_val, y_pred_gbc_tuned_4, gbc_tuned_4)

Accuracy:  0.8206666666666667
F1 Score:  0.4715127701375246
ROC-AUC Score:  0.7813930414065187
Recall Score:  0.3655750190403656
Precision Score:  0.6639004149377593
PR-AUC Score:  0.5408805522595578
