In [None]:
import pandas as pd
import numpy as np

import joblib

from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report, confusion_matrix

from xgboost import XGBClassifier


In [None]:
# Carregar o preprocess
preprocess = joblib.load('preprocess.pkl')

# Carregar os splits
X_train, X_test, y_train, y_test = joblib.load('data_splits.pkl')

# 03 - XGBoost

Modelo baseado em gradient boosting.

In [None]:

ratio = y_train.value_counts()[0] / y_train.value_counts()[1]

xgb = XGBClassifier(
    objective='binary:logistic',
    eval_metric='logloss',
    tree_method='hist',
    scale_pos_weight=ratio
)

pipeline_xgb = Pipeline(steps=[
    ('preprocess', preprocess),
    ('clf', xgb)
])

param_grid_xgb = {
    'clf__n_estimators': [200, 500],
    'clf__max_depth': [4, 6, 8],
    'clf__learning_rate': [0.01, 0.1],
    'clf__subsample': [0.8, 1.0],
    'clf__colsample_bytree': [0.7, 1.0]
}

grid_xgb = GridSearchCV(
    pipeline_xgb,
    param_grid_xgb,
    scoring='f1_macro',
    cv=3,
    n_jobs=-1
)

grid_xgb.fit(X_train, y_train)

y_pred_xgb = grid_xgb.predict(X_test)
print(classification_report(y_test, y_pred_xgb))
print(confusion_matrix(y_test, y_pred_xgb))
