In [1]:
import pandas as pd
import xgboost 
import optuna
from xgboost import XGBClassifier
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
df = pd.read_csv('C:\\Users\\souza\\OneDrive\\Área de Trabalho\\Risk Nubank\\data\\data_tratado.csv')
df = df.drop('Unnamed: 0', axis=1)
df = df.drop('score_rating_enc', axis=1)
x = df.drop('target_default', axis=1)
y = df['target_default']

In [3]:
# split 
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [4]:
# seleçao de atributos 
cat_atribs = ['score_rating', 'situation']
num_atribs = ['score_3', 'risk_rate', 'credit_limit', 'income', 'n_defaulted_loans', ' n_issues', 'ok_since',
            'n_bankruptcies']

In [5]:
column_transf = ColumnTransformer([('onehot', OneHotEncoder(handle_unknown='ignore'), cat_atribs)
                                ])
pipe_xgboost = Pipeline(steps=[('transformer', column_transf),
                    ('model', XGBClassifier())
                    ])

In [10]:
# criando estudo para xgboost 
def objective(trial):
    params = {
        'gamma': trial.suggest_int('gamma', 0, 5),
        'max_depth': trial.suggest_int('max_depth', 0, 10), 
        'max_delta_step': trial.suggest_int('max_delta_step', 1, 10), 
        'subsample': trial.suggest_float('subsample', 0, 1), 
        'reg_lambda': trial.suggest_float('reg_lambda', 0.5, 1)
    }
    model = pipe_xgboost.set_params(model=XGBClassifier(**params)).fit(x_train, y_train)
    y_pred = model.predict(x_test)
    recall = recall_score(y_test, y_pred)
    
    return recall 
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)
print(f'melhores parametros {study.best_params}')

[I 2025-08-06 16:35:38,449] A new study created in memory with name: no-name-bc0c93ef-70a7-4798-845e-6de027278a7e
[I 2025-08-06 16:35:38,574] Trial 0 finished with value: 1.0 and parameters: {'gamma': 3, 'max_depth': 8, 'max_delta_step': 1, 'subsample': 0.6803547999898337, 'reg_lambda': 0.7557940213385035}. Best is trial 0 with value: 1.0.
[I 2025-08-06 16:35:38,696] Trial 1 finished with value: 1.0 and parameters: {'gamma': 3, 'max_depth': 10, 'max_delta_step': 7, 'subsample': 0.2695669714329568, 'reg_lambda': 0.8789036916098918}. Best is trial 0 with value: 1.0.
[I 2025-08-06 16:35:38,810] Trial 2 finished with value: 1.0 and parameters: {'gamma': 4, 'max_depth': 2, 'max_delta_step': 1, 'subsample': 0.20456513984492675, 'reg_lambda': 0.6465897822990677}. Best is trial 0 with value: 1.0.
[I 2025-08-06 16:35:38,925] Trial 3 finished with value: 1.0 and parameters: {'gamma': 4, 'max_depth': 5, 'max_delta_step': 5, 'subsample': 0.38293787650907496, 'reg_lambda': 0.9194477009639626}. Best

melhores parametros {'gamma': 3, 'max_depth': 8, 'max_delta_step': 1, 'subsample': 0.6803547999898337, 'reg_lambda': 0.7557940213385035}


In [11]:
xgboost = pipe_xgboost.set_params(model=XGBClassifier(gamma='3', max_depth=8, max_delta_step=1, 
                                                    subsample=0.6803547999898337, reg_lambda=0.7557940213385035)).fit(x_train, y_train)
y_pred = xgboost.predict(x_test)
print(f'recall: {recall_score(y_test, y_pred)}')
print(f'acc: {accuracy_score(y_test, y_pred)}')
print(f'precision: {precision_score(y_test, y_pred)}')

recall: 1.0
acc: 1.0
precision: 1.0
