In [1]:
!pip install autogluon openml

Collecting autogluon
  Downloading autogluon-1.3.1-py3-none-any.whl.metadata (11 kB)
Collecting openml
  Downloading openml-0.15.1-py3-none-any.whl.metadata (10 kB)
Collecting autogluon.core==1.3.1 (from autogluon.core[all]==1.3.1->autogluon)
  Downloading autogluon.core-1.3.1-py3-none-any.whl.metadata (12 kB)
Collecting autogluon.features==1.3.1 (from autogluon)
  Downloading autogluon.features-1.3.1-py3-none-any.whl.metadata (11 kB)
Collecting autogluon.tabular==1.3.1 (from autogluon.tabular[all]==1.3.1->autogluon)
  Downloading autogluon.tabular-1.3.1-py3-none-any.whl.metadata (14 kB)
Collecting autogluon.multimodal==1.3.1 (from autogluon)
  Downloading autogluon.multimodal-1.3.1-py3-none-any.whl.metadata (13 kB)
Collecting autogluon.timeseries==1.3.1 (from autogluon.timeseries[all]==1.3.1->autogluon)
  Downloading autogluon.timeseries-1.3.1-py3-none-any.whl.metadata (12 kB)
Collecting scikit-learn<1.7.0,>=1.4.0 (from autogluon.core==1.3.1->autogluon.core[all]==1.3.1->autogluon)
  D

In [1]:
import time
import pandas as pd
import numpy as np
import openml
import os
import math # Importa a biblioteca de matemática para o cálculo do teto

from autogluon.tabular import TabularPredictor

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import roc_auc_score, accuracy_score, log_loss

# Configurações
SEED = 42

def carregar_base_openml(openml_id):
    """Carrega um dataset do OpenML e prepara os tipos de dados."""
    dataset = openml.datasets.get_dataset(openml_id, download_data=True, download_qualities=True,
                                          download_features_meta_data=True)
    X, y, _, attribute_names = dataset.get_data(dataset_format="dataframe", target=dataset.default_target_attribute)

    if isinstance(y, pd.Series):
        y = y.values.ravel()

    categorical_cols_names = [name for i, name in enumerate(attribute_names) if
                              dataset.features[i].data_type == 'nominal']
    for col_name in categorical_cols_names:
        if col_name in X.columns:
            X[col_name] = X[col_name].astype('category')

    return X, y


In [2]:
def run_autogluon_experiment(openml_id):
    """
    Executa o fluxo completo de avaliação para o AutoGluon em um dataset.
    """
    print(f"--- Processando Dataset ID: {openml_id} com AutoGluon ---")
    
    AUTOML_TIME_BUDGET = 3600

    # 1. Carregamento e pré-processamento dos dados
    X_df, y_orig = carregar_base_openml(openml_id)

    X_train_df, X_test_df, y_train_orig, y_test_orig = train_test_split(
        X_df, y_orig, test_size=0.3, random_state=SEED, stratify=(y_orig if len(np.unique(y_orig)) > 1 else None)
    )

    le = LabelEncoder()
    y_train_enc = le.fit_transform(y_train_orig)
    y_test_enc = le.transform(y_test_orig)
    
    # Lógica de seleção de métrica dinâmica
    n_classes = len(le.classes_)
    if n_classes > 2:
        metric_to_use = 'roc_auc_ovo'
        print(f"Problema multiclasse detectado ({n_classes} classes). Usando a métrica: {metric_to_use}")
    else:
        metric_to_use = 'roc_auc'
        print(f"Problema binário detectado. Usando a métrica: {metric_to_use}")

    # Imputação de dados faltantes
    num_cols = X_train_df.select_dtypes(include=np.number).columns
    if len(num_cols) > 0 and X_train_df[num_cols].isnull().sum().sum() > 0:
        imp = SimpleImputer(strategy='median')
        X_train_df.loc[:, num_cols] = imp.fit_transform(X_train_df[num_cols])
        X_test_df.loc[:, num_cols] = imp.transform(X_test_df[num_cols])

    cat_cols = X_train_df.select_dtypes(include=['category', 'object']).columns
    if len(cat_cols) > 0:
        for col in cat_cols:
            X_train_df.loc[:, col] = X_train_df[col].astype(str).fillna("__MISSING__")
            X_test_df.loc[:, col] = X_test_df[col].astype(str).fillna("__MISSING__")

    # 2. Treinamento com AutoGluon
    t0_total = time.time()

    train_df_ag = X_train_df.copy()
    target_col = 'target'
    train_df_ag[target_col] = y_train_enc
    
    save_path = f'autogluon_models/ds_{openml_id}'

    predictor = TabularPredictor(
        label=target_col,
        path=save_path,
        eval_metric=metric_to_use
    ).fit(
        train_data=train_df_ag,
        time_limit=AUTOML_TIME_BUDGET,
        presets='best_quality'
    )

    leaderboard = predictor.leaderboard(silent=True)
    best_model_name = leaderboard.iloc[0]['model']
    best_model_score_val = leaderboard.iloc[0]['score_val']

    # 3. Predição e Métricas
    y_proba = predictor.predict_proba(X_test_df, as_multiclass=True).values
    y_pred = predictor.predict(X_test_df).values
    
    total_time = time.time() - t0_total

    if n_classes == 2:
        auc_ovo = roc_auc_score(y_test_enc, y_proba[:, 1])
    else:
        auc_ovo = roc_auc_score(y_test_enc, y_proba, multi_class='ovo')

    acc = accuracy_score(y_test_enc, y_pred)
    ce = log_loss(y_test_enc, y_proba, labels=le.transform(le.classes_))

    print(f"Resultados para {openml_id}: AUC={auc_ovo:.4f}, ACC={acc:.4f}, Time={total_time:.2f}s")
    
    return {
        'openml_id': openml_id, 'modelo': 'autogluon', 'total_time_sec': total_time,
        'mean_auc_ovo': auc_ovo, 'mean_accuracy': acc, 'mean_cross_entropy': ce,
        'best_model_info': best_model_name,
        'best_score_cv': best_model_score_val
    }

In [4]:
if __name__ == "__main__":
    # --- CONFIGURAÇÃO DA EXECUÇÃO DISTRIBUÍDA ---
    # Altere esta variável em cada máquina de 1 a 10
    MACHINE_ID = 3
    TOTAL_MACHINES = 10
    # --- FIM DA CONFIGURAÇÃO ---

    # Lista completa dos 30 datasets do CC18
    cc18_ids_full = [
        11, 15, 18, 23, 29, 31, 37, 50, 54, 188, 307, 458, 469, 1049,
        1050, 1063, 1068, 1462, 1464, 1468, 1480, 1494, 1501, 1510,
        6332, 23381, 40966, 40975, 40982, 40994
    ]

    # Lógica para dividir a lista de IDs em lotes
    chunk_size = math.ceil(len(cc18_ids_full) / TOTAL_MACHINES)
    start_index = (MACHINE_ID - 1) * chunk_size
    end_index = start_index + chunk_size
    ids_to_process = cc18_ids_full[start_index:end_index]

    print(f"--- MÁQUINA {MACHINE_ID}/{TOTAL_MACHINES} ---")
    print(f"Processando os seguintes datasets: {ids_to_process}")
    
    todos_resultados = []

    for oid in ids_to_process:
        try:
            res = run_autogluon_experiment(oid)
            todos_resultados.append(res)
        except Exception as e:
            print(f"!!!!!! ERRO AO PROCESSAR O DATASET {oid}: {e} !!!!!!")
            import traceback
            traceback.print_exc()
            continue

    # Salva o arquivo com um nome único para esta máquina
    if todos_resultados:
        df_results = pd.DataFrame(todos_resultados)
        output_filename = f"resultados_autogluon_maquina_{MACHINE_ID}.csv"
        df_results.to_csv(output_filename, index=False)
        print(f"\nArquivo '{output_filename}' salvo com sucesso!")
    else:
        print("Nenhuma tarefa foi concluída com sucesso nesta máquina.")


--- MÁQUINA 3/10 ---
Processando os seguintes datasets: [37, 50, 54]
--- Processando Dataset ID: 37 com AutoGluon ---


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Debian 5.10.237-1 (2025-05-19)
CPU Count:          8
Memory Avail:       29.98 GB / 31.36 GB (95.6%)
Disk Space Avail:   94.43 GB / 97.87 GB (96.5%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of rem

Problema binário detectado. Usando a métrica: roc_auc


	Running DyStack sub-fit in a ray process to avoid memory leakage. Enabling ray logging (enable_ray_logging=True). Specify `ds_args={'enable_ray_logging': False}` if you experience logging issues.
2025-06-14 10:22:29,954	INFO worker.py:1843 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
		Context path: "/home/jupyter/autogluon_models/ds_37/ds_sub_fit/sub_fit_ho"
[36m(_dystack pid=4312)[0m Running DyStack sub-fit ...
[36m(_dystack pid=4312)[0m Beginning AutoGluon training ... Time limit = 896s
[36m(_dystack pid=4312)[0m AutoGluon will save models to "/home/jupyter/autogluon_models/ds_37/ds_sub_fit/sub_fit_ho"
[36m(_dystack pid=4312)[0m Train Data Rows:    477
[36m(_dystack pid=4312)[0m Train Data Columns: 8
[36m(_dystack pid=4312)[0m Label Column:       target
[36m(_dystack pid=4312)[0m Problem Type:       binary
[36m(_dystack pid=4312)[0m Preprocessing data ...
[36m(_dystack pid=4312)[0m Selected class <--> label mapping:  cl

[36m(_ray_fit pid=4725)[0m [1000]	valid_set's binary_logloss: 0.247291


[36m(_dystack pid=4312)[0m 	0.8344	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	0.85s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.01s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: LightGBM_BAG_L1 ... Training model for up to 592.09s of the 890.76s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.02%)
[36m(_dystack pid=4312)[0m 	0.8006	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	0.99s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.01s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: RandomForestGini_BAG_L1 ... Training model for up to 588.63s of the 887.30s of remaining time.
[36m(_dystack pid=4312)[0m 	0.8002	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	0.96s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.08s	 = Validation runtime
[36m(_dystac

[36m(_ray_fit pid=7919)[0m [1000]	valid_set's binary_logloss: 0.251508


[36m(_dystack pid=4312)[0m 	0.8027	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	1.15s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.01s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: NeuralNetFastAI_r191_BAG_L1 ... Training model for up to 537.99s of the 836.66s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.00%)
[36m(_ray_fit pid=8237)[0m No improvement since epoch 5: early stopping
[36m(_dystack pid=4312)[0m 	0.8167	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	5.0s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.08s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: CatBoost_r9_BAG_L1 ... Training model for up to 530.36s of the 829.03s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 

[36m(_ray_fit pid=8981)[0m [1000]	valid_set's binary_logloss: 0.423348


[36m(_dystack pid=4312)[0m 	0.8389	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	1.86s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.04s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: NeuralNetTorch_r22_BAG_L1 ... Training model for up to 516.93s of the 815.60s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.00%)
[36m(_dystack pid=4312)[0m 	0.8196	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	6.39s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.09s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: XGBoost_r33_BAG_L1 ... Training model for up to 508.27s of the 806.93s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.13%)
[36m(_dystack pid=4312)[0m 	0

[36m(_ray_fit pid=12482)[0m [1000]	valid_set's binary_logloss: 0.22855[32m [repeated 12x across cluster][0m


[36m(_dystack pid=4312)[0m 	0.8278	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	1.54s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.01s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: NeuralNetTorch_r86_BAG_L1 ... Training model for up to 446.12s of the 744.78s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.00%)
[36m(_dystack pid=4312)[0m 	0.8109	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	7.64s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.08s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: CatBoost_r50_BAG_L1 ... Training model for up to 435.88s of the 734.54s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.35%)
[36m(_dystack pid=4312)[0m 	

[36m(_ray_fit pid=16797)[0m [1000]	valid_set's binary_logloss: 0.284498[32m [repeated 2x across cluster][0m


[36m(_dystack pid=4312)[0m 	0.8375	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	1.6s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.05s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: RandomForest_r39_BAG_L1 ... Training model for up to 362.63s of the 661.29s of remaining time.
[36m(_ray_fit pid=16469)[0m No improvement since epoch 22: early stopping[32m [repeated 6x across cluster][0m
[36m(_dystack pid=4312)[0m 	0.8011	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	1.03s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.08s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: CatBoost_r167_BAG_L1 ... Training model for up to 361.50s of the 660.16s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.33%)
[36m(_dystack pid=4312)[0m 	0.8199	 = Validation score   (roc_auc)
[36m(_dyst

[36m(_ray_fit pid=21506)[0m [1000]	valid_set's binary_logloss: 0.249699[32m [repeated 4x across cluster][0m


[36m(_dystack pid=4312)[0m 	0.8412	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	1.47s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.03s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: NeuralNetTorch_r143_BAG_L1 ... Training model for up to 224.10s of the 522.76s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.00%)
[36m(_ray_fit pid=21171)[0m No improvement since epoch 22: early stopping[32m [repeated 7x across cluster][0m
[36m(_dystack pid=4312)[0m 	0.839	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	10.33s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.1s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: CatBoost_r128_BAG_L1 ... Training model for up to 211.38s of the 510.05s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting 

[36m(_ray_fit pid=23949)[0m [1000]	valid_set's binary_logloss: 0.411871[32m [repeated 7x across cluster][0m


[36m(_dystack pid=4312)[0m 	0.8357	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	2.53s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.05s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: XGBoost_r49_BAG_L1 ... Training model for up to 152.15s of the 450.82s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.04%)
[36m(_ray_fit pid=23637)[0m No improvement since epoch 23: early stopping[32m [repeated 4x across cluster][0m
[36m(_dystack pid=4312)[0m 	0.8107	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	0.99s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.03s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: CatBoost_r5_BAG_L1 ... Training model for up to 148.81s of the 447.47s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with Para

[36m(_ray_fit pid=27573)[0m [1000]	valid_set's binary_logloss: 0.230857[32m [repeated 9x across cluster][0m


[36m(_dystack pid=4312)[0m 	0.8228	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	1.22s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.01s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: XGBoost_r22_BAG_L1 ... Training model for up to 83.96s of the 382.62s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.03%)
[36m(_dystack pid=4312)[0m 	0.8142	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	1.22s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.03s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: NeuralNetFastAI_r69_BAG_L1 ... Training model for up to 80.12s of the 378.79s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.00%)
[36m(_dystack pid=4312)[0m 	0.

[36m(_ray_fit pid=29359)[0m [1000]	valid_set's binary_logloss: 0.266354


[36m(_dystack pid=4312)[0m 	0.8058	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	1.65s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.02s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: NeuralNetFastAI_r172_BAG_L1 ... Training model for up to 51.16s of the 349.82s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.00%)
[36m(_ray_fit pid=29040)[0m No improvement since epoch 16: early stopping[32m [repeated 7x across cluster][0m
[36m(_ray_fit pid=29668)[0m No improvement since epoch 0: early stopping
[36m(_ray_fit pid=29674)[0m No improvement since epoch 1: early stopping
[36m(_dystack pid=4312)[0m 	0.8277	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	8.23s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.1s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: CatBoost_r180_BAG_L

[36m(_ray_fit pid=35764)[0m [1000]	valid_set's binary_logloss: 0.392436


[36m(_dystack pid=4312)[0m 	0.8429	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	1.07s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.02s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: NeuralNetTorch_r22_BAG_L2 ... Training model for up to 207.65s of the 207.39s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.00%)
[36m(_dystack pid=4312)[0m 	0.8367	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	9.06s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.11s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: XGBoost_r33_BAG_L2 ... Training model for up to 195.70s of the 195.44s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.11%)
[36m(_dystack pid=4312)[0m 	0

[36m(_ray_fit pid=43624)[0m [1000]	valid_set's binary_logloss: 0.403261[32m [repeated 2x across cluster][0m


[36m(_dystack pid=4312)[0m 	0.8365	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	1.9s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.05s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: RandomForest_r39_BAG_L2 ... Training model for up to 31.49s of the 31.23s of remaining time.
[36m(_ray_fit pid=43309)[0m No improvement since epoch 3: early stopping[32m [repeated 6x across cluster][0m
[36m(_dystack pid=4312)[0m 	0.8307	 = Validation score   (roc_auc)
[36m(_dystack pid=4312)[0m 	1.28s	 = Training   runtime
[36m(_dystack pid=4312)[0m 	0.08s	 = Validation runtime
[36m(_dystack pid=4312)[0m Fitting model: CatBoost_r167_BAG_L2 ... Training model for up to 30.09s of the 29.84s of remaining time.
[36m(_dystack pid=4312)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.62%)
[36m(_dystack pid=4312)[0m 	0.8434	 = Validation score   (roc_auc)
[36m(_dystack p

Resultados para 37: AUC=0.8388, ACC=0.7532, Time=1702.80s
--- Processando Dataset ID: 50 com AutoGluon ---


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Debian 5.10.237-1 (2025-05-19)
CPU Count:          8
Memory Avail:       18.11 GB / 31.36 GB (57.7%)
Disk Space Avail:   94.08 GB / 97.87 GB (96.1%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of rem

Problema binário detectado. Usando a métrica: roc_auc


Leaderboard on holdout data (DyStack):
                          model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0     NeuralNetTorch_r22_BAG_L1       1.000000   0.996456     roc_auc        0.070445       0.078875   13.238182                 0.070445                0.078875          13.238182            1       True         18
1          CatBoost_r137_BAG_L1       1.000000   0.999289     roc_auc        0.081630       0.059408   64.831224                 0.081630                0.059408          64.831224            1       True         21
2     NeuralNetTorch_r79_BAG_L1       1.000000   0.998839     roc_auc        0.083338       0.080904    9.346550                 0.083338                0.080904           9.346550            1       True         13
3               LightGBM_BAG_L1       1.000000   1.000000     roc_auc        0.140116       0.179

Resultados para 50: AUC=1.0000, ACC=0.9965, Time=3601.50s
--- Processando Dataset ID: 54 com AutoGluon ---


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Debian 5.10.237-1 (2025-05-19)
CPU Count:          8
Memory Avail:       28.43 GB / 31.36 GB (90.7%)
Disk Space Avail:   91.13 GB / 97.87 GB (93.1%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of rem

Problema multiclasse detectado (4 classes). Usando a métrica: roc_auc_ovo


Leaderboard on holdout data (DyStack):
                          model  score_holdout  score_val  eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0     NeuralNetTorch_r22_BAG_L1       0.991888   0.967916  roc_auc_ovo        0.086650       0.090820   22.848148                 0.086650                0.090820          22.848148            1       True         20
1           WeightedEnsemble_L2       0.991311   0.968963  roc_auc_ovo        0.185605       0.219414   33.210417                 0.001872                0.015057           3.581880            2       True         39
2     NeuralNetTorch_r30_BAG_L1       0.990734   0.965795  roc_auc_ovo        0.102631       0.090686   30.704119                 0.102631                0.090686          30.704119            1       True         30
3             LightGBMXT_BAG_L2       0.990484   0.966515  roc_auc_ovo        0.899250       

Resultados para 54: AUC=0.9654, ACC=0.8583, Time=3320.02s

Arquivo 'resultados_autogluon_maquina_3.csv' salvo com sucesso!


In [5]:
 # --- CONFIGURAÇÃO DA EXECUÇÃO DISTRIBUÍDA ---
    # Altere esta variável em cada máquina de 1 a 10
MACHINE_ID = 7
TOTAL_MACHINES = 10
# --- FIM DA CONFIGURAÇÃO ---

# Lista completa dos 30 datasets do CC18
cc18_ids_full = [
    11, 15, 18, 23, 29, 31, 37, 50, 54, 188, 307, 458, 469, 1049,
    1050, 1063, 1068, 1462, 1464, 1468, 1480, 1494, 1501, 1510,
    6332, 23381, 40966, 40975, 40982, 40994
]

# Lógica para dividir a lista de IDs em lotes
chunk_size = math.ceil(len(cc18_ids_full) / TOTAL_MACHINES)
start_index = (MACHINE_ID - 1) * chunk_size
end_index = start_index + chunk_size
ids_to_process = cc18_ids_full[start_index:end_index]

print(f"--- MÁQUINA {MACHINE_ID}/{TOTAL_MACHINES} ---")
print(f"Processando os seguintes datasets: {ids_to_process}")

todos_resultados = []

for oid in ids_to_process:
    try:
        res = run_autogluon_experiment(oid)
        todos_resultados.append(res)
    except Exception as e:
        print(f"!!!!!! ERRO AO PROCESSAR O DATASET {oid}: {e} !!!!!!")
        import traceback
        traceback.print_exc()
        continue

# Salva o arquivo com um nome único para esta máquina
if todos_resultados:
    df_results = pd.DataFrame(todos_resultados)
    output_filename = f"resultados_autogluon_maquina_{MACHINE_ID}.csv"
    df_results.to_csv(output_filename, index=False)
    print(f"\nArquivo '{output_filename}' salvo com sucesso!")
else:
    print("Nenhuma tarefa foi concluída com sucesso nesta máquina.")


--- MÁQUINA 7/10 ---
Processando os seguintes datasets: [1464, 1468, 1480]
--- Processando Dataset ID: 1464 com AutoGluon ---


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Debian 5.10.237-1 (2025-05-19)
CPU Count:          8
Memory Avail:       27.98 GB / 31.36 GB (89.2%)
Disk Space Avail:   89.30 GB / 97.87 GB (91.2%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of rem

Problema binário detectado. Usando a métrica: roc_auc


Leaderboard on holdout data (DyStack):
                           model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0      NeuralNetTorch_r22_BAG_L1       0.828571   0.765691     roc_auc        0.089951       0.097732  15.501757                 0.089951                0.097732          15.501757            1       True         20
1      NeuralNetTorch_r31_BAG_L1       0.826984   0.781061     roc_auc        0.083367       0.087179  15.132389                 0.083367                0.087179          15.132389            1       True         65
2      NeuralNetTorch_r22_BAG_L2       0.823810   0.805727     roc_auc        1.086852       0.812057  54.145880                 0.118919                0.351232          23.135412            2       True         98
3    NeuralNetFastAI_r102_BAG_L2       0.823810   0.794376     roc_auc        1.114805       0.71

Resultados para 1464: AUC=0.7600, ACC=0.8089, Time=1863.97s
--- Processando Dataset ID: 1468 com AutoGluon ---


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Debian 5.10.237-1 (2025-05-19)
CPU Count:          8
Memory Avail:       27.85 GB / 31.36 GB (88.8%)
Disk Space Avail:   88.95 GB / 97.87 GB (90.9%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of rem

Problema multiclasse detectado (9 classes). Usando a métrica: roc_auc_ovo


Leaderboard on holdout data (DyStack):
                        model  score_holdout  score_val  eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0      NeuralNetFastAI_BAG_L2       0.998148   0.997324  roc_auc_ovo        2.734730       1.533044  192.649257                 0.116493                0.138332           7.642628            2       True         17
1             CatBoost_BAG_L2       0.997805   0.998411  roc_auc_ovo        2.903659       2.194687  328.681769                 0.285422                0.799975         143.675140            2       True         22
2     RandomForestGini_BAG_L2       0.997497   0.998203  roc_auc_ovo        2.799627       1.549579  186.537241                 0.181390                0.154867           1.530612            2       True         20
3       ExtraTreesGini_BAG_L2       0.997497   0.998594  roc_auc_ovo        2.814493       1.536379  

Resultados para 1468: AUC=0.9959, ACC=0.9599, Time=3609.94s
--- Processando Dataset ID: 1480 com AutoGluon ---


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Debian 5.10.237-1 (2025-05-19)
CPU Count:          8
Memory Avail:       27.54 GB / 31.36 GB (87.8%)
Disk Space Avail:   87.10 GB / 97.87 GB (89.0%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of rem

Problema binário detectado. Usando a métrica: roc_auc


Leaderboard on holdout data (DyStack):
                           model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0      NeuralNetTorch_r22_BAG_L1       0.727273   0.770759     roc_auc        0.108823       0.131834  12.739066                 0.108823                0.131834          12.739066            1       True         20
1      NeuralNetTorch_r30_BAG_L1       0.722611   0.786822     roc_auc        0.133987       0.185655  23.288300                 0.133987                0.185655          23.288300            1       True         30
2     NeuralNetTorch_r197_BAG_L1       0.715618   0.783617     roc_auc        0.150098       0.174481  12.264035                 0.150098                0.174481          12.264035            1       True         54
3          ExtraTreesEntr_BAG_L1       0.713287   0.771206     roc_auc        0.131471       0.11

Resultados para 1480: AUC=0.7304, ACC=0.6686, Time=2906.45s

Arquivo 'resultados_autogluon_maquina_7.csv' salvo com sucesso!




In [3]:
# --- CONFIGURAÇÃO DA EXECUÇÃO DISTRIBUÍDA ---
# Altere esta variável em cada máquina de 1 a 10
MACHINE_ID = 10
TOTAL_MACHINES = 10
# --- FIM DA CONFIGURAÇÃO ---

# Lista completa dos 30 datasets do CC18
cc18_ids_full = [
    11, 15, 18, 23, 29, 31, 37, 50, 54, 188, 307, 458, 469, 1049,
    1050, 1063, 1068, 1462, 1464, 1468, 1480, 1494, 1501, 1510,
    6332, 23381, 40966, 40975, 40982, 40994
]

# Lógica para dividir a lista de IDs em lotes
chunk_size = math.ceil(len(cc18_ids_full) / TOTAL_MACHINES)
start_index = (MACHINE_ID - 1) * chunk_size
end_index = start_index + chunk_size
ids_to_process = cc18_ids_full[start_index:end_index]

print(f"--- MÁQUINA {MACHINE_ID}/{TOTAL_MACHINES} ---")
print(f"Processando os seguintes datasets: {ids_to_process}")

todos_resultados = []

for oid in ids_to_process:
    try:
        res = run_autogluon_experiment(oid)
        todos_resultados.append(res)
    except Exception as e:
        print(f"!!!!!! ERRO AO PROCESSAR O DATASET {oid}: {e} !!!!!!")
        import traceback
        traceback.print_exc()
        continue

# Salva o arquivo com um nome único para esta máquina
if todos_resultados:
    df_results = pd.DataFrame(todos_resultados)
    output_filename = f"resultados_autogluon_maquina_{MACHINE_ID}.csv"
    df_results.to_csv(output_filename, index=False)
    print(f"\nArquivo '{output_filename}' salvo com sucesso!")
else:
    print("Nenhuma tarefa foi concluída com sucesso nesta máquina.")

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Debian 5.10.237-1 (2025-05-19)
CPU Count:          8
Memory Avail:       29.99 GB / 31.36 GB (95.6%)
Disk Space Avail:   83.36 GB / 97.87 GB (85.2%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of rem

--- MÁQUINA 10/10 ---
Processando os seguintes datasets: [40975, 40982, 40994]
--- Processando Dataset ID: 40975 com AutoGluon ---
Problema multiclasse detectado (4 classes). Usando a métrica: roc_auc_ovo


	Running DyStack sub-fit in a ray process to avoid memory leakage. Enabling ray logging (enable_ray_logging=True). Specify `ds_args={'enable_ray_logging': False}` if you experience logging issues.
2025-06-14 16:42:07,691	INFO worker.py:1843 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
		Context path: "/home/jupyter/autogluon_models/ds_40975/ds_sub_fit/sub_fit_ho"
[36m(_dystack pid=420311)[0m Running DyStack sub-fit ...
[36m(_dystack pid=420311)[0m Beginning AutoGluon training ... Time limit = 895s
[36m(_dystack pid=420311)[0m AutoGluon will save models to "/home/jupyter/autogluon_models/ds_40975/ds_sub_fit/sub_fit_ho"
[36m(_dystack pid=420311)[0m Train Data Rows:    1074
[36m(_dystack pid=420311)[0m Train Data Columns: 6
[36m(_dystack pid=420311)[0m Label Column:       target
[36m(_dystack pid=420311)[0m Problem Type:       multiclass
[36m(_dystack pid=420311)[0m Preprocessing data ...
[36m(_dystack pid=420311)[0m Train Dat

[36m(_ray_fit pid=421035)[0m [1000]	valid_set's multi_logloss: 0.065854	valid_set's roc_auc_ovo: 0.994878


[36m(_dystack pid=420311)[0m 	0.9944	 = Validation score   (roc_auc_ovo)
[36m(_dystack pid=420311)[0m 	17.22s	 = Training   runtime
[36m(_dystack pid=420311)[0m 	0.29s	 = Validation runtime
[36m(_dystack pid=420311)[0m Fitting model: LightGBM_BAG_L1 ... Training model for up to 562.68s of the 861.15s of remaining time.
[36m(_ray_fit pid=420711)[0m Metric roc_auc_ovo is not supported by this model - using log_loss instead[32m [repeated 7x across cluster] (Ray deduplicates logs by default. Set RAY_DEDUP_LOGS=0 to disable log deduplication, or see https://docs.ray.io/en/master/ray-observability/user-guides/configure-logging.html#log-deduplication for more options.)[0m
[36m(_dystack pid=420311)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.04%)


[36m(_ray_fit pid=421371)[0m [1000]	valid_set's multi_logloss: 0.0399475	valid_set's roc_auc_ovo: 0.997778[32m [repeated 2x across cluster][0m


[36m(_dystack pid=420311)[0m 	0.9953	 = Validation score   (roc_auc_ovo)
[36m(_dystack pid=420311)[0m 	15.59s	 = Training   runtime
[36m(_dystack pid=420311)[0m 	0.22s	 = Validation runtime
[36m(_dystack pid=420311)[0m Fitting model: RandomForestGini_BAG_L1 ... Training model for up to 544.11s of the 842.58s of remaining time.
[36m(_dystack pid=420311)[0m 	0.995	 = Validation score   (roc_auc_ovo)
[36m(_dystack pid=420311)[0m 	1.39s	 = Training   runtime
[36m(_dystack pid=420311)[0m 	0.1s	 = Validation runtime
[36m(_dystack pid=420311)[0m Fitting model: RandomForestEntr_BAG_L1 ... Training model for up to 542.56s of the 841.03s of remaining time.
[36m(_dystack pid=420311)[0m 	0.9956	 = Validation score   (roc_auc_ovo)
[36m(_dystack pid=420311)[0m 	1.18s	 = Training   runtime
[36m(_dystack pid=420311)[0m 	0.1s	 = Validation runtime
[36m(_dystack pid=420311)[0m Fitting model: CatBoost_BAG_L1 ... Training model for up to 541.24s of the 839.71s of remaining time.
[

[36m(_ray_fit pid=424074)[0m [1000]	valid_set's multi_logloss: 0.0483115	valid_set's roc_auc_ovo: 0.995[32m [repeated 2x across cluster][0m


[36m(_dystack pid=420311)[0m 	0.9959	 = Validation score   (roc_auc_ovo)
[36m(_dystack pid=420311)[0m 	32.31s	 = Training   runtime
[36m(_dystack pid=420311)[0m 	0.54s	 = Validation runtime
[36m(_dystack pid=420311)[0m Fitting model: CatBoost_r177_BAG_L1 ... Training model for up to 108.60s of the 407.07s of remaining time.
[36m(_dystack pid=420311)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.65%)
[36m(_ray_fit pid=424491)[0m 	Ran out of time, early stopping on iteration 8811.
[36m(_dystack pid=420311)[0m 	0.9955	 = Validation score   (roc_auc_ovo)
[36m(_dystack pid=420311)[0m 	87.74s	 = Training   runtime
[36m(_dystack pid=420311)[0m 	0.09s	 = Validation runtime
[36m(_dystack pid=420311)[0m Fitting model: NeuralNetTorch_r79_BAG_L1 ... Training model for up to 16.21s of the 314.68s of remaining time.
[36m(_dystack pid=420311)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting wi

Resultados para 40975: AUC=1.0000, ACC=1.0000, Time=3599.54s
--- Processando Dataset ID: 40982 com AutoGluon ---
Problema multiclasse detectado (7 classes). Usando a métrica: roc_auc_ovo


Leaderboard on holdout data (DyStack):
                          model  score_holdout  score_val  eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0         ExtraTreesEntr_BAG_L2       0.982277   0.964436  roc_auc_ovo        4.889431       2.167187  316.642660                 0.281083                0.181140           1.950616            2       True         28
1         ExtraTreesGini_BAG_L2       0.982241   0.968151  roc_auc_ovo        4.888690       2.264861  316.730546                 0.280343                0.278815           2.038501            2       True         27
2               CatBoost_BAG_L2       0.981495   0.967929  roc_auc_ovo        4.669730       2.093802  485.286662                 0.061383                0.107756         170.594618            2       True         26
3           WeightedEnsemble_L2       0.981340   0.973337  roc_auc_ovo        2.525835       

Resultados para 40982: AUC=0.9739, ACC=0.8148, Time=3618.12s
--- Processando Dataset ID: 40994 com AutoGluon ---


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Debian 5.10.237-1 (2025-05-19)
CPU Count:          8
Memory Avail:       28.31 GB / 31.36 GB (90.3%)
Disk Space Avail:   81.28 GB / 97.87 GB (83.0%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of rem

Problema binário detectado. Usando a métrica: roc_auc


Leaderboard on holdout data (DyStack):
                          model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   NeuralNetFastAI_r111_BAG_L1       0.993421   0.985737     roc_auc        0.094423       0.117567    6.794905                 0.094423                0.117567           6.794905            1       True         64
1         ExtraTrees_r49_BAG_L1       0.993421   0.942544     roc_auc        0.103599       0.083164    1.330878                 0.103599                0.083164           1.330878            1       True         56
2    NeuralNetFastAI_r88_BAG_L1       0.993421   0.997565     roc_auc        0.109182       0.109418    9.101110                 0.109182                0.109418           9.101110            1       True         68
3    NeuralNetFastAI_r37_BAG_L1       0.993421   0.993275     roc_auc        0.115455       0.113

Resultados para 40994: AUC=0.9228, ACC=0.9383, Time=2820.29s

Arquivo 'resultados_autogluon_maquina_10.csv' salvo com sucesso!


