In [12]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
import optuna
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, recall_score, precision_score

In [2]:
df = pd.read_csv('C:\\Users\\souza\\OneDrive\\Área de Trabalho\\Risk Nubank\\data\\data_tratado.csv')
df = df.drop('Unnamed: 0', axis=1)
x = df.drop('target_default', axis=1)
y = df['target_default']

In [3]:
# split 
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)

In [4]:
# seleçao de atributos 
cat_atribs = ['score_rating', 'situation']
num_atribs = ['score_3', 'risk_rate', 'credit_limit', 'income', 'n_defaulted_loans', ' n_issues', 'ok_since',
            'n_bankruptcies', ' score_rating_enc']

In [5]:
column_transf = ColumnTransformer([('onehot', OneHotEncoder(handle_unknown='ignore'), cat_atribs)
                                ])
pipe_log = Pipeline(steps=[('transformer', column_transf),
                    ('model', LogisticRegression())
                    ])

In [14]:
# criando estudo com optuna
def objective(trial):
    paramns = {
        'C': trial.suggest_float('C', 0,1),
        'max_iter': trial.suggest_int('max_iter', 0,100),
        'penalty': trial.suggest_categorical('penalty', ['l2'])
    }
    model = pipe_log.set_params(model=LogisticRegression(**paramns, random_state=42)).fit(x_train, y_train)
    y_pred = model.predict(x_test)
    recall = recall_score(y_test, y_pred)
    
    return recall
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

[I 2025-08-06 15:42:52,424] A new study created in memory with name: no-name-8a32add4-83cc-458e-9249-a8094cc2eb45
[I 2025-08-06 15:42:52,484] Trial 0 finished with value: 0.9977494373593399 and parameters: {'C': 0.4075176204524815, 'max_iter': 44, 'penalty': 'l2'}. Best is trial 0 with value: 0.9977494373593399.
[I 2025-08-06 15:42:52,522] Trial 1 finished with value: 0.9977494373593399 and parameters: {'C': 0.8550417126757136, 'max_iter': 58, 'penalty': 'l2'}. Best is trial 0 with value: 0.9977494373593399.
[I 2025-08-06 15:42:52,559] Trial 2 finished with value: 0.9977494373593399 and parameters: {'C': 0.09273453617356808, 'max_iter': 88, 'penalty': 'l2'}. Best is trial 0 with value: 0.9977494373593399.
[I 2025-08-06 15:42:52,600] Trial 3 finished with value: 0.9977494373593399 and parameters: {'C': 0.09159890773330481, 'max_iter': 80, 'penalty': 'l2'}. Best is trial 0 with value: 0.9977494373593399.
STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max

In [15]:
print(f'melhores parametros modelo regressao logistica: {study.best_params}')

melhores parametros modelo regressao logistica: {'C': 0.4075176204524815, 'max_iter': 44, 'penalty': 'l2'}


In [16]:
logistic_reg = pipe_log.set_params(model=LogisticRegression(C= 0.4075176204524815, max_iter=44, penalty='l2', random_state=42)).fit(x_train, y_train)
y_pred = logistic_reg.predict(x_test)
print(f'recall: {recall_score(y_test, y_pred)}')
print(f'acc: {accuracy_score(y_test, y_pred)}')
print(f'precision: {precision_score(y_test, y_pred)}')

recall: 0.9977494373593399
acc: 0.9996395097332372
precision: 1.0
