In [1]:
import os
import sys

os.getcwd()
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('lasos_ml'), '..')))

from lasos_ml.main import LasosML
lasos = LasosML()

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import pandas as pd
import numpy as np

df = pd.read_csv('conjunto_de_treinamento.csv') # Carregando o conjunto para treinar o modelo
df.replace([' ', 'N/A', '', '?'], np.nan, inplace=True)

y_col = 'inadimplente'  # Coluna target
cols_to_drop = ['id_solicitante', 'local_onde_reside', 'local_onde_trabalha']
x_cols = df.drop(columns=[y_col] + cols_to_drop).columns  # Colunas de features

cat_cols = [
    'produto_solicitado',
    'dia_vencimento',
    'forma_envio_solicitacao',
    'tipo_endereco',
    'sexo',
    'estado_civil',
    'grau_instrucao',
    'nacionalidade',
    'estado_onde_nasceu',
    'estado_onde_reside',
    'possui_telefone_residencial',
    'codigo_area_telefone_residencial',
    'tipo_residencia',
    'possui_telefone_celular',
    'possui_email',
    'possui_cartao_visa',
    'possui_cartao_mastercard',
    'possui_cartao_diners',
    'possui_cartao_amex',
    'possui_outros_cartoes',
    'possui_carro',
    'vinculo_formal_com_empresa',
    'estado_onde_trabalha',
    'possui_telefone_trabalho',
    'codigo_area_telefone_trabalho',
    'profissao',
    'ocupacao',
    'profissao_companheiro',
    'grau_instrucao_companheiro'
]

num_cols = [
    'idade',
    'qtde_dependentes',
    'meses_na_residencia',
    'renda_mensal_regular',
    'renda_extra',
    'qtde_contas_bancarias',
    'qtde_contas_bancarias_especiais',
    'valor_patrimonio_pessoal',
    'meses_no_trabalho'
]

nan_lines = df[x_cols].isna().any(axis=1) | df[y_col].isna()

clean_df = df[~nan_lines]
nan_df = df[nan_lines]

x = clean_df[x_cols]
x_nan_reservoir = nan_df[x_cols]
y = clean_df[y_col]
y_nan_reservoir = nan_df[y_col]

number_classes = 2

In [3]:
from sklearn.model_selection import StratifiedKFold

fold_results = []
n_splits = 5
shuffle = True
random_state = 42

splitter = StratifiedKFold(n_splits=n_splits, shuffle=shuffle, random_state=random_state)
split_gen = splitter.split(x, y)

for fold_index, (train_indices, test_indices) in enumerate(split_gen):
    x_train, x_test = x.iloc[train_indices], x.iloc[test_indices]
    y_train, y_test = (y.iloc[train_indices], y.iloc[test_indices])

    x_train = pd.concat([x_train, x_nan_reservoir], ignore_index=True)
    y_train = pd.concat([y_train, y_nan_reservoir], ignore_index=True)

    x_train = x_train.reset_index(drop = True)
    x_test = x_test.reset_index(drop = True)
    y_train = y_train.reset_index(drop = True)
    y_test = y_test.reset_index(drop = True)

    fold_results.append({
                "fold_index": fold_index + 1,
                "x_train": x_train,
                "x_test": x_test,
                "y_train": y_train,
                "y_test": y_test
            })

In [4]:
# test_size = 0.3 # Tamanho do conjunto de teste
# random_state = 42 # Semente para reprodutibilidade
# cv = 5 # Número de folds para validação cruzada
# fold_results = []  # Lista para armazenar os resultados de cada fold

# rng = np.random.default_rng(random_state)  # Gerador de números aleatórios
# test_set_size = int(len(X) * test_size)  # Tamanho do conjunto de teste

# for fold in range(cv):
#     shuffled_indices = rng.permutation(len(X))

#     test_indices = shuffled_indices[:test_set_size]
#     train_indices = shuffled_indices[test_set_size:]

#     x_train, x_test = X.iloc[train_indices], X.iloc[test_indices]
#     y_train, y_test = y.iloc[train_indices], y.iloc[test_indices]

#     x_train = pd.concat([df_treinamento.loc[nan_lines, x_cols], x_train], axis=0)
#     y_train = pd.concat([df_treinamento.loc[nan_lines, y_col], y_train])

#     x_train = x_train.reset_index(drop = True)
#     x_test = x_test.reset_index(drop = True)
#     y_train = y_train.reset_index(drop = True)
#     y_test = y_test.reset_index(drop = True)

#     fold_results.append({
#         'fold_index': fold + 1,
#         'x_train': x_train,
#         'y_train': y_train,
#         'x_test': x_test,
#         'y_test': y_test
#     })

# print(f'Número de folds: {len(fold_results)}')  # Exibindo o número de folds criados
# print(f'Tamanho do conjunto de treino: {len(fold_results[0]["x_train"])}')  # Exibindo o tamanho do conjunto de treino
# print(f'Tamanho do conjunto de teste: {len(fold_results[0]["x_test"])}')  # Exibindo o tamanho do conjunto de test

In [5]:
from pandas.api.types import is_numeric_dtype, is_object_dtype
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler, MaxAbsScaler
from sklearn.preprocessing import OneHotEncoder, OrdinalEncoder

pipeline_registry, models, logs = {}, {}, {}  # Dicionário para registrar os pipelines

num_scalers = [
                MinMaxScaler(),
                StandardScaler(),
                RobustScaler(),
                MaxAbsScaler()
        ]
cat_encoders = [OneHotEncoder(sparse_output=False, handle_unknown="ignore"), OrdinalEncoder(handle_unknown="use_encoded_value", unknown_value=-1)] 

for i, scaler in enumerate(num_scalers):  
    for fold_index, data in enumerate(fold_results):
                
        x_num_train = data['x_train'][num_cols]
        x_num_test = data['x_test'][num_cols]
                
        x_num_train_scaled = scaler.fit_transform(x_num_train)
        x_num_test_scaled = scaler.transform(x_num_test)
                
        x_num_train_scaled = pd.DataFrame(x_num_train_scaled, columns=num_cols)
        x_num_test_scaled = pd.DataFrame(x_num_test_scaled, columns=num_cols)

        for j, encoder in enumerate(cat_encoders):

            x_cat_train = data['x_train'][cat_cols]
            x_cat_test = data['x_test'][cat_cols]

            x_cat_train_encoded = encoder.fit_transform(x_cat_train)
            x_cat_test_encoded = encoder.transform(x_cat_test)

            x_cat_train_encoded_f = pd.DataFrame(x_cat_train_encoded)
            x_cat_test_encoded_f = pd.DataFrame(x_cat_test_encoded)

            scaler_name = type(scaler).__name__
            encoder_name = type(encoder).__name__
            key = f'scaled_{scaler_name}_encoded_{encoder_name}'
            pipeline_registry[key] = {
                'scaler': scaler,
                'encoder': encoder
            }

            if key not in models:
                models[key] = []
                logs[key] = []

                models[key].append({
                    'fold': fold_index,
                    'x_train': pd.concat([x_cat_train_encoded_f, x_num_train_scaled], axis=1),
                    'x_test': pd.concat([x_cat_test_encoded_f, x_num_test_scaled], axis=1),
                    'y_train': data['y_train'],
                    'y_test': data['y_test']
                })

                logs[key].append({
                    'fold': fold_index,
                    'train': models[key][-1]['x_train'].describe().to_dict(),
                    'test': models[key][-1]['x_test'].describe().to_dict()
                })

In [6]:
from sklearn.impute import SimpleImputer, KNNImputer

imputers = [
            SimpleImputer(strategy='most_frequent'),
            KNNImputer(n_neighbors=5, weights='uniform')
]

updated_models = {}
update_pipeline_registry = {}

for key in list(models.keys()):
    for imputer in imputers:

        imputer_name = type(imputer).__name__
        new_key = f"{key}_imputed_{imputer_name}"
        update_pipeline_registry[new_key] = {
            **pipeline_registry[key],
            'imputer': imputer
        }

        updated_models[new_key] = []

        for fold_index in range(len(models[key])):
            x_train = models[key][fold_index]['x_train']
            x_train.columns = x_train.columns.astype(str)
            imputed_data = imputer.fit_transform(x_train)

            updated_models[new_key].append({
                **models[key][fold_index],
                'x_train': pd.DataFrame(imputed_data, columns=x_train.columns)
            })

models = updated_models
pipeline_registry = update_pipeline_registry

print(pipeline_registry.keys())

dict_keys(['scaled_MinMaxScaler_encoded_OneHotEncoder_imputed_SimpleImputer', 'scaled_MinMaxScaler_encoded_OneHotEncoder_imputed_KNNImputer', 'scaled_MinMaxScaler_encoded_OrdinalEncoder_imputed_SimpleImputer', 'scaled_MinMaxScaler_encoded_OrdinalEncoder_imputed_KNNImputer', 'scaled_StandardScaler_encoded_OneHotEncoder_imputed_SimpleImputer', 'scaled_StandardScaler_encoded_OneHotEncoder_imputed_KNNImputer', 'scaled_StandardScaler_encoded_OrdinalEncoder_imputed_SimpleImputer', 'scaled_StandardScaler_encoded_OrdinalEncoder_imputed_KNNImputer', 'scaled_RobustScaler_encoded_OneHotEncoder_imputed_SimpleImputer', 'scaled_RobustScaler_encoded_OneHotEncoder_imputed_KNNImputer', 'scaled_RobustScaler_encoded_OrdinalEncoder_imputed_SimpleImputer', 'scaled_RobustScaler_encoded_OrdinalEncoder_imputed_KNNImputer', 'scaled_MaxAbsScaler_encoded_OneHotEncoder_imputed_SimpleImputer', 'scaled_MaxAbsScaler_encoded_OneHotEncoder_imputed_KNNImputer', 'scaled_MaxAbsScaler_encoded_OrdinalEncoder_imputed_Simple

In [7]:
import matplotlib.pyplot as plt
import os
import inspect
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay

from pathlib import Path
default_path = Path.cwd() / 'output'
default_path.mkdir(parents=True, exist_ok=True)
output_path = default_path

def _plot_confusion_matrix(key, all_conf_matrices = []):
    cm = np.mean(all_conf_matrices, axis=0)

    disp = ConfusionMatrixDisplay(confusion_matrix=cm)
    disp.plot(cmap="Blues")

    os.makedirs(output_path, exist_ok=True)

    plt.savefig(f"{output_path}/confusion_matrix_{key}.jpg")
    plt.close()

def _generate_metric_csv(key, results, average_results):
    expanded_results = []
    for metric, values in results.items():
        if isinstance(values, list):
            for value in values:
                expanded_results.append({"metric": metric, "value": value})
        else:
            expanded_results.append({"metric": metric, "value": values})
    metrics_df = pd.DataFrame(expanded_results)

    avg_df = pd.DataFrame(average_results.items(), columns=["metric", "value"])
    metrics_df = pd.concat([metrics_df, avg_df], ignore_index=True)

    title_key = pd.DataFrame([['Key:', key]], columns=metrics_df.columns)
    metrics_df = pd.concat([metrics_df, title_key], ignore_index=True)
    
    os.makedirs(output_path, exist_ok=True)
    metrics_df.to_csv(f"{output_path}/metrics_{key}.csv", index=None)

def evaluate(key, fold_results_cv, metrics={}):
    results = {metric.__name__: [] for metric in metrics}
    all_conf_matrices = []

    for fold in fold_results_cv:
        y_true = fold["y_true"]
        y_pred = fold["y_pred"]
        
        for metric, parameters in metrics.items():

            valid_params = {
                param: value
                for param, value in parameters.items()
                if param in inspect.signature(metric).parameters
            }

            score = metric(y_true, y_pred, **valid_params)

            results[metric.__name__].append(score)

        conf_matrix = confusion_matrix(y_true, y_pred)
        all_conf_matrices.append(conf_matrix)

    average_results = {f'avg_{metric}': sum(scores) / len(scores) for metric, scores in results.items()}

    _plot_confusion_matrix(key, all_conf_matrices)
    _generate_metric_csv(key, results, average_results)

    return results, average_results

In [8]:
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
import xgboost as xgb
import lightgbm as lgb
from sklearn.metrics import accuracy_score

models_ml = {
    'XGBoost': {
        'model': xgb.XGBClassifier(random_state=42, use_label_encoder=False, eval_metric='logloss'),
        'hyperparameters': {
            'n_estimators': [100, 200, 500, 1000],
            'learning_rate': [0.01, 0.05, 0.1],
            'max_depth': [3, 5, 7, 10],
            'subsample': [0.7, 0.8, 0.9, 1.0],
            'colsample_bytree': [0.7, 0.8, 0.9, 1.0],
            'gamma': [0, 0.1, 0.5]
        },
        'selection_method': 'random',
        'scoring': 'roc_auc'
    },
}

metrics = {accuracy_score : {}}

update_pipeline_registry = {}
original_models = models.copy()

print("\nIniciando a seleção de modelos...\n")
for model_name, model_info in models_ml.items():
    print(f"\nSelecting best {model_name} model")

    model = model_info['model']
    hyperparameters = model_info['hyperparameters'] if 'hyperparameters' in model_info else {}
    selection_method = model_info['selection_method'] if 'selection_method' in model_info else 'grid'
    scoring = model_info['scoring'] if 'scoring' in model_info else 'accuracy'
    cv = model_info['cv'] if 'cv' in model_info else 5
    n_iter = model_info['n_iter'] if 'n_iter' in model_info else 10
    random_state = model_info['random_state'] if 'random_state' in model_info else 0

    best_models = {}

    for key in original_models.keys():
        key_model = f'{model_name}_{key}'
        model_data = original_models[key]

        best_models[key] = []
        fold_results = []

        for fold_index in range(len(model_data)):
            # Separar os dados do fold
            x_train, y_train = model_data[fold_index]['x_train'], model_data[fold_index]['y_train']
            x_test, y_test = model_data[fold_index]['x_test'], model_data[fold_index]['y_test']

            # Garantir que os nomes das colunas sejam strings
            x_train.columns = x_train.columns.astype(str)
            x_test.columns = x_test.columns.astype(str)

            # Seleção de modelo
            if selection_method == 'grid':
                from sklearn.model_selection import GridSearchCV
                grid_search = GridSearchCV(model, hyperparameters, cv=cv, scoring=scoring)
                search = grid_search.fit(x_train, y_train)

            elif selection_method == 'random':
                from sklearn.model_selection import RandomizedSearchCV
                random_search = RandomizedSearchCV(model, hyperparameters, n_iter=n_iter, cv=cv, scoring=scoring, random_state=random_state)
                search = random_search.fit(x_train, y_train)

            elif selection_method == 'bayes':
                from skopt import BayesSearchCV
                bayes_search = BayesSearchCV(model, hyperparameters, n_iter=n_iter, cv=cv, scoring=scoring, random_state=random_state)
                search = bayes_search.fit(x_train, y_train)

            best_models[key].append(search.best_estimator_)

            y_pred = search.predict(x_test)

            fold_results.append({"y_true": y_test, "y_pred": y_pred})

        score = evaluate(key_model, fold_results, metrics)
        print(f'{key_model} metrics: {score}')

        if f'{key}_metrics' not in logs:
            logs[f'{key}_metrics'] = []
            logs[f'{key}_best_params'] = []

        logs[f'{key}_metrics'].append(score)
        logs[f'{key}_best_params'].append(search.best_params_)



Iniciando a seleção de modelos...


Selecting best XGBoost model


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_MinMaxScaler_encoded_OneHotEncoder_imputed_SimpleImputer metrics: ({'accuracy_score': [0.6172839506172839]}, {'avg_accuracy_score': 0.6172839506172839})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_MinMaxScaler_encoded_OneHotEncoder_imputed_KNNImputer metrics: ({'accuracy_score': [0.6131687242798354]}, {'avg_accuracy_score': 0.6131687242798354})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_MinMaxScaler_encoded_OrdinalEncoder_imputed_SimpleImputer metrics: ({'accuracy_score': [0.5761316872427984]}, {'avg_accuracy_score': 0.5761316872427984})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_MinMaxScaler_encoded_OrdinalEncoder_imputed_KNNImputer metrics: ({'accuracy_score': [0.5596707818930041]}, {'avg_accuracy_score': 0.5596707818930041})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_StandardScaler_encoded_OneHotEncoder_imputed_SimpleImputer metrics: ({'accuracy_score': [0.6172839506172839]}, {'avg_accuracy_score': 0.6172839506172839})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_StandardScaler_encoded_OneHotEncoder_imputed_KNNImputer metrics: ({'accuracy_score': [0.6296296296296297]}, {'avg_accuracy_score': 0.6296296296296297})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_StandardScaler_encoded_OrdinalEncoder_imputed_SimpleImputer metrics: ({'accuracy_score': [0.5761316872427984]}, {'avg_accuracy_score': 0.5761316872427984})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_StandardScaler_encoded_OrdinalEncoder_imputed_KNNImputer metrics: ({'accuracy_score': [0.5555555555555556]}, {'avg_accuracy_score': 0.5555555555555556})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_RobustScaler_encoded_OneHotEncoder_imputed_SimpleImputer metrics: ({'accuracy_score': [0.6172839506172839]}, {'avg_accuracy_score': 0.6172839506172839})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_RobustScaler_encoded_OneHotEncoder_imputed_KNNImputer metrics: ({'accuracy_score': [0.6131687242798354]}, {'avg_accuracy_score': 0.6131687242798354})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_RobustScaler_encoded_OrdinalEncoder_imputed_SimpleImputer metrics: ({'accuracy_score': [0.5761316872427984]}, {'avg_accuracy_score': 0.5761316872427984})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_RobustScaler_encoded_OrdinalEncoder_imputed_KNNImputer metrics: ({'accuracy_score': [0.5637860082304527]}, {'avg_accuracy_score': 0.5637860082304527})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_MaxAbsScaler_encoded_OneHotEncoder_imputed_SimpleImputer metrics: ({'accuracy_score': [0.6172839506172839]}, {'avg_accuracy_score': 0.6172839506172839})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_MaxAbsScaler_encoded_OneHotEncoder_imputed_KNNImputer metrics: ({'accuracy_score': [0.6213991769547325]}, {'avg_accuracy_score': 0.6213991769547325})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_MaxAbsScaler_encoded_OrdinalEncoder_imputed_SimpleImputer metrics: ({'accuracy_score': [0.5761316872427984]}, {'avg_accuracy_score': 0.5761316872427984})


Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.

  bst.update(dtrain, iteration=i, fobj=obj)
Parameters: { "use_label_encoder" } are not used.


XGBoost_scaled_MaxAbsScaler_encoded_OrdinalEncoder_imputed_KNNImputer metrics: ({'accuracy_score': [0.5761316872427984]}, {'avg_accuracy_score': 0.5761316872427984})
