# XGB Pipeline

In [2]:
%run pipeline_utils.ipynb

(32560, 14)
(32560, 1)


In [3]:
xgbPipeline = Pipeline([
    ("preprocess", preprocess),
    ("xgb", xgb.XGBClassifier())
])

In [4]:
xgbPipeline.fit(Xtr, Ytr.ravel())

Pipeline(memory=None,
         steps=[('preprocess',
                 FeatureUnion(n_jobs=None,
                              transformer_list=[('numerical_transform',
                                                 Pipeline(memory=None,
                                                          steps=[('selector',
                                                                  FunctionTransformer(accept_sparse=False,
                                                                                      check_inverse=True,
                                                                                      func=<function select_number at 0x7fd02aa96ea0>,
                                                                                      inv_kw_args=None,
                                                                                      inverse_func=None,
                                                                                      kw_args=None,
                          

## Hyper-parameter Tuning

```python
from sklearn.model_selection import GridSearchCV
hyperparameters = {
        'xgb__min_child_weight': [1, 5, 10],
        'xgb__gamma': [0.5, 1, 1.5, 2],
        'xgb__subsample': [0.6, 0.8, 1.0],
        'xgb__colsample_bytree': [0.6, 0.8, 1.0],
        'xgb__max_depth': [3, 4, 5]
        }
clf = GridSearchCV(xgbPipeline, hyperparameters, cv=10)
clf.fit(Xtr, Ytr.ravel())
joblib.dump(clf.best_estimator_, 'xgb_grid_model.pkl')
joblib.dump(clf.best_params_, 'best_xgb_grid_params.pkl', compress = 1) # Only best parameters

```

## Loading xgb model

In [10]:
import joblib
xgbPipeline = joblib.load('xgb_grid_model.pkl')

### Passing XGB after selecting best params

In [14]:
print("xgb score: ",xgbPipeline.score(Xva,Yva))
print(auc(xgbPipeline,Xtr,Ytr))

xgb score:  0.8684534812076402
[0 0 0 ... 0 0 0]
F1 weighted 0.8755090832965929
AUC : 0.8056611943772053
Classification report : 
              precision    recall  f1-score   support

           0       0.90      0.95      0.92     22229
           1       0.81      0.66      0.73      7086

    accuracy                           0.88     29315
   macro avg       0.85      0.81      0.82     29315
weighted avg       0.88      0.88      0.88     29315

[[21096  1133]
 [ 2393  4693]]
None
None


In [13]:
print(auc(xgbPipeline,Xva,Yva))

[0 1 1 ... 0 1 1]
F1 weighted 0.8641271465124886
AUC : 0.7869094302402557
Classification report : 
              precision    recall  f1-score   support

           0       0.89      0.94      0.92      2491
           1       0.76      0.63      0.69       755

    accuracy                           0.87      3246
   macro avg       0.83      0.79      0.80      3246
weighted avg       0.86      0.87      0.86      3246

[[2340  151]
 [ 276  479]]
None
None


In [16]:
print(auc(xgbPipeline, Xts, Yts))

[0 0 0 ... 0 0 1]
F1 weighted 0.8743670780193409
AUC : 0.8038329473969731
Classification report : 
              precision    recall  f1-score   support

           0       0.90      0.95      0.92     24719
           1       0.80      0.66      0.72      7841

    accuracy                           0.88     32560
   macro avg       0.85      0.80      0.82     32560
weighted avg       0.87      0.88      0.87     32560

[[23435  1284]
 [ 2669  5172]]
None
None
