In [1]:
!pip install autogluon openml

Collecting autogluon
  Downloading autogluon-1.3.1-py3-none-any.whl.metadata (11 kB)
Collecting openml
  Downloading openml-0.15.1-py3-none-any.whl.metadata (10 kB)
Collecting autogluon.core==1.3.1 (from autogluon.core[all]==1.3.1->autogluon)
  Downloading autogluon.core-1.3.1-py3-none-any.whl.metadata (12 kB)
Collecting autogluon.features==1.3.1 (from autogluon)
  Downloading autogluon.features-1.3.1-py3-none-any.whl.metadata (11 kB)
Collecting autogluon.tabular==1.3.1 (from autogluon.tabular[all]==1.3.1->autogluon)
  Downloading autogluon.tabular-1.3.1-py3-none-any.whl.metadata (14 kB)
Collecting autogluon.multimodal==1.3.1 (from autogluon)
  Downloading autogluon.multimodal-1.3.1-py3-none-any.whl.metadata (13 kB)
Collecting autogluon.timeseries==1.3.1 (from autogluon.timeseries[all]==1.3.1->autogluon)
  Downloading autogluon.timeseries-1.3.1-py3-none-any.whl.metadata (12 kB)
Collecting scikit-learn<1.7.0,>=1.4.0 (from autogluon.core==1.3.1->autogluon.core[all]==1.3.1->autogluon)
  D

In [2]:
import time
import pandas as pd
import numpy as np
import openml
import os
import math # Importa a biblioteca de matemática para o cálculo do teto

from autogluon.tabular import TabularPredictor

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.impute import SimpleImputer
from sklearn.metrics import roc_auc_score, accuracy_score, log_loss

# Configurações
SEED = 42

def carregar_base_openml(openml_id):
    """Carrega um dataset do OpenML e prepara os tipos de dados."""
    dataset = openml.datasets.get_dataset(openml_id, download_data=True, download_qualities=True,
                                          download_features_meta_data=True)
    X, y, _, attribute_names = dataset.get_data(dataset_format="dataframe", target=dataset.default_target_attribute)

    if isinstance(y, pd.Series):
        y = y.values.ravel()

    categorical_cols_names = [name for i, name in enumerate(attribute_names) if
                              dataset.features[i].data_type == 'nominal']
    for col_name in categorical_cols_names:
        if col_name in X.columns:
            X[col_name] = X[col_name].astype('category')

    return X, y


In [3]:
def run_autogluon_experiment(openml_id):
    """
    Executa o fluxo completo de avaliação para o AutoGluon em um dataset.
    """
    print(f"--- Processando Dataset ID: {openml_id} com AutoGluon ---")
    
    AUTOML_TIME_BUDGET = 3600

    # 1. Carregamento e pré-processamento dos dados
    X_df, y_orig = carregar_base_openml(openml_id)

    X_train_df, X_test_df, y_train_orig, y_test_orig = train_test_split(
        X_df, y_orig, test_size=0.3, random_state=SEED, stratify=(y_orig if len(np.unique(y_orig)) > 1 else None)
    )

    le = LabelEncoder()
    y_train_enc = le.fit_transform(y_train_orig)
    y_test_enc = le.transform(y_test_orig)
    
    # Lógica de seleção de métrica dinâmica
    n_classes = len(le.classes_)
    if n_classes > 2:
        metric_to_use = 'roc_auc_ovo'
        print(f"Problema multiclasse detectado ({n_classes} classes). Usando a métrica: {metric_to_use}")
    else:
        metric_to_use = 'roc_auc'
        print(f"Problema binário detectado. Usando a métrica: {metric_to_use}")

    # Imputação de dados faltantes
    num_cols = X_train_df.select_dtypes(include=np.number).columns
    if len(num_cols) > 0 and X_train_df[num_cols].isnull().sum().sum() > 0:
        imp = SimpleImputer(strategy='median')
        X_train_df.loc[:, num_cols] = imp.fit_transform(X_train_df[num_cols])
        X_test_df.loc[:, num_cols] = imp.transform(X_test_df[num_cols])

    cat_cols = X_train_df.select_dtypes(include=['category', 'object']).columns
    if len(cat_cols) > 0:
        for col in cat_cols:
            X_train_df.loc[:, col] = X_train_df[col].astype(str).fillna("__MISSING__")
            X_test_df.loc[:, col] = X_test_df[col].astype(str).fillna("__MISSING__")

    # 2. Treinamento com AutoGluon
    t0_total = time.time()

    train_df_ag = X_train_df.copy()
    target_col = 'target'
    train_df_ag[target_col] = y_train_enc
    
    save_path = f'autogluon_models/ds_{openml_id}'

    predictor = TabularPredictor(
        label=target_col,
        path=save_path,
        eval_metric=metric_to_use
    ).fit(
        train_data=train_df_ag,
        time_limit=AUTOML_TIME_BUDGET,
        presets='best_quality'
    )

    leaderboard = predictor.leaderboard(silent=True)
    best_model_name = leaderboard.iloc[0]['model']
    best_model_score_val = leaderboard.iloc[0]['score_val']

    # 3. Predição e Métricas
    y_proba = predictor.predict_proba(X_test_df, as_multiclass=True).values
    y_pred = predictor.predict(X_test_df).values
    
    total_time = time.time() - t0_total

    if n_classes == 2:
        auc_ovo = roc_auc_score(y_test_enc, y_proba[:, 1])
    else:
        auc_ovo = roc_auc_score(y_test_enc, y_proba, multi_class='ovo')

    acc = accuracy_score(y_test_enc, y_pred)
    ce = log_loss(y_test_enc, y_proba, labels=le.transform(le.classes_))

    print(f"Resultados para {openml_id}: AUC={auc_ovo:.4f}, ACC={acc:.4f}, Time={total_time:.2f}s")
    
    return {
        'openml_id': openml_id, 'modelo': 'autogluon', 'total_time_sec': total_time,
        'mean_auc_ovo': auc_ovo, 'mean_accuracy': acc, 'mean_cross_entropy': ce,
        'best_model_info': best_model_name,
        'best_score_cv': best_model_score_val
    }

In [4]:
if __name__ == "__main__":
    # --- CONFIGURAÇÃO DA EXECUÇÃO DISTRIBUÍDA ---
    # Altere esta variável em cada máquina de 1 a 10
    MACHINE_ID = 4
    TOTAL_MACHINES = 10
    # --- FIM DA CONFIGURAÇÃO ---

    # Lista completa dos 30 datasets do CC18
    cc18_ids_full = [
        11, 15, 18, 23, 29, 31, 37, 50, 54, 188, 307, 458, 469, 1049,
        1050, 1063, 1068, 1462, 1464, 1468, 1480, 1494, 1501, 1510,
        6332, 23381, 40966, 40975, 40982, 40994
    ]

    # Lógica para dividir a lista de IDs em lotes
    chunk_size = math.ceil(len(cc18_ids_full) / TOTAL_MACHINES)
    start_index = (MACHINE_ID - 1) * chunk_size
    end_index = start_index + chunk_size
    ids_to_process = cc18_ids_full[start_index:end_index]

    print(f"--- MÁQUINA {MACHINE_ID}/{TOTAL_MACHINES} ---")
    print(f"Processando os seguintes datasets: {ids_to_process}")
    
    todos_resultados = []

    for oid in ids_to_process:
        try:
            res = run_autogluon_experiment(oid)
            todos_resultados.append(res)
        except Exception as e:
            print(f"!!!!!! ERRO AO PROCESSAR O DATASET {oid}: {e} !!!!!!")
            import traceback
            traceback.print_exc()
            continue

    # Salva o arquivo com um nome único para esta máquina
    if todos_resultados:
        df_results = pd.DataFrame(todos_resultados)
        output_filename = f"resultados_autogluon_maquina_{MACHINE_ID}.csv"
        df_results.to_csv(output_filename, index=False)
        print(f"\nArquivo '{output_filename}' salvo com sucesso!")
    else:
        print("Nenhuma tarefa foi concluída com sucesso nesta máquina.")


--- MÁQUINA 4/10 ---
Processando os seguintes datasets: [188, 307, 458]
--- Processando Dataset ID: 188 com AutoGluon ---


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Debian 5.10.237-1 (2025-05-19)
CPU Count:          8
Memory Avail:       29.98 GB / 31.36 GB (95.6%)
Disk Space Avail:   94.43 GB / 97.87 GB (96.5%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of rem

Problema multiclasse detectado (5 classes). Usando a métrica: roc_auc_ovo


	Running DyStack sub-fit in a ray process to avoid memory leakage. Enabling ray logging (enable_ray_logging=True). Specify `ds_args={'enable_ray_logging': False}` if you experience logging issues.
2025-06-14 10:27:09,305	INFO worker.py:1843 -- Started a local Ray instance. View the dashboard at [1m[32m127.0.0.1:8265 [39m[22m
		Context path: "/home/jupyter/autogluon_models/ds_188/ds_sub_fit/sub_fit_ho"
[36m(_dystack pid=4395)[0m Running DyStack sub-fit ...
[36m(_dystack pid=4395)[0m Beginning AutoGluon training ... Time limit = 896s
[36m(_dystack pid=4395)[0m AutoGluon will save models to "/home/jupyter/autogluon_models/ds_188/ds_sub_fit/sub_fit_ho"
[36m(_dystack pid=4395)[0m Train Data Rows:    457
[36m(_dystack pid=4395)[0m Train Data Columns: 19
[36m(_dystack pid=4395)[0m Label Column:       target
[36m(_dystack pid=4395)[0m Problem Type:       multiclass
[36m(_dystack pid=4395)[0m Preprocessing data ...
[36m(_dystack pid=4395)[0m Train Data Class Count: 5
[36m

[36m(_ray_fit pid=9521)[0m [1000]	valid_set's multi_logloss: 1.02177	valid_set's roc_auc_ovo: 0.829851
[36m(_ray_fit pid=9520)[0m [1000]	valid_set's multi_logloss: 0.733588	valid_set's roc_auc_ovo: 0.922241[32m [repeated 6x across cluster][0m
[36m(_ray_fit pid=9519)[0m [2000]	valid_set's multi_logloss: 0.855809	valid_set's roc_auc_ovo: 0.897656
[36m(_ray_fit pid=9519)[0m [3000]	valid_set's multi_logloss: 0.902595	valid_set's roc_auc_ovo: 0.895671


[36m(_dystack pid=4395)[0m 	0.8967	 = Validation score   (roc_auc_ovo)
[36m(_dystack pid=4395)[0m 	72.72s	 = Training   runtime
[36m(_dystack pid=4395)[0m 	0.26s	 = Validation runtime
[36m(_dystack pid=4395)[0m Fitting model: NeuralNetTorch_r22_BAG_L1 ... Training model for up to 249.72s of the 548.37s of remaining time.
[36m(_dystack pid=4395)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.00%)
[36m(_dystack pid=4395)[0m 	0.9097	 = Validation score   (roc_auc_ovo)
[36m(_dystack pid=4395)[0m 	12.12s	 = Training   runtime
[36m(_dystack pid=4395)[0m 	0.13s	 = Validation runtime
[36m(_dystack pid=4395)[0m Fitting model: XGBoost_r33_BAG_L1 ... Training model for up to 234.30s of the 532.96s of remaining time.
[36m(_dystack pid=4395)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.29%)
[36m(_dystack pid=43

[36m(_ray_fit pid=15666)[0m [1000]	valid_set's multi_logloss: 1.31857	valid_set's roc_auc_ovo: 0.940784
[36m(_ray_fit pid=15669)[0m [1000]	valid_set's multi_logloss: 2.09857	valid_set's roc_auc_ovo: 0.868046
[36m(_ray_fit pid=15669)[0m [2000]	valid_set's multi_logloss: 2.1174	valid_set's roc_auc_ovo: 0.870303


[36m(_dystack pid=4395)[0m 	0.8567	 = Validation score   (roc_auc_ovo)
[36m(_dystack pid=4395)[0m 	35.37s	 = Training   runtime
[36m(_dystack pid=4395)[0m 	0.14s	 = Validation runtime
[36m(_dystack pid=4395)[0m Fitting model: CatBoost_r177_BAG_L2 ... Training model for up to 108.89s of the 108.83s of remaining time.
[36m(_dystack pid=4395)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.83%)
[36m(_dystack pid=4395)[0m 	0.9253	 = Validation score   (roc_auc_ovo)
[36m(_dystack pid=4395)[0m 	46.82s	 = Training   runtime
[36m(_dystack pid=4395)[0m 	0.06s	 = Validation runtime
[36m(_dystack pid=4395)[0m Fitting model: NeuralNetTorch_r79_BAG_L2 ... Training model for up to 59.66s of the 59.61s of remaining time.
[36m(_dystack pid=4395)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.01%)
[36m(_dystack pid=43

Resultados para 188: AUC=0.9375, ACC=0.7511, Time=3606.05s
--- Processando Dataset ID: 307 com AutoGluon ---


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Debian 5.10.237-1 (2025-05-19)
CPU Count:          8
Memory Avail:       28.63 GB / 31.36 GB (91.3%)
Disk Space Avail:   92.61 GB / 97.87 GB (94.6%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of rem

Problema multiclasse detectado (11 classes). Usando a métrica: roc_auc_ovo


Leaderboard on holdout data (DyStack):
                      model  score_holdout  score_val  eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0   RandomForestEntr_BAG_L1       1.000000   0.996410  roc_auc_ovo        0.200445       0.128125    2.302545                 0.200445                0.128125           2.302545            1       True          7
1     ExtraTreesGini_BAG_L1       1.000000   0.998184  roc_auc_ovo        0.306215       0.095422    1.422919                 0.306215                0.095422           1.422919            1       True          9
2    NeuralNetFastAI_BAG_L1       1.000000   0.999188  roc_auc_ovo        0.755716       0.102414    5.023182                 0.755716                0.102414           5.023182            1       True          3
3       WeightedEnsemble_L2       1.000000   0.999455  roc_auc_ovo        1.194713       0.338786   29.559257

Resultados para 307: AUC=0.9988, ACC=0.9697, Time=3614.29s
--- Processando Dataset ID: 458 com AutoGluon ---


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Debian 5.10.237-1 (2025-05-19)
CPU Count:          8
Memory Avail:       28.35 GB / 31.36 GB (90.4%)
Disk Space Avail:   90.30 GB / 97.87 GB (92.3%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of rem

Problema multiclasse detectado (4 classes). Usando a métrica: roc_auc_ovo


Leaderboard on holdout data (DyStack):
                          model  score_holdout  score_val  eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0         KNeighborsDist_BAG_L1       1.000000   0.995011  roc_auc_ovo        0.002972       0.004242    0.005702                 0.002972                0.004242           0.005702            1       True          2
1         KNeighborsUnif_BAG_L1       1.000000   0.995002  roc_auc_ovo        0.003153       0.053174    0.008101                 0.003153                0.053174           0.008101            1       True          1
2           LightGBM_r96_BAG_L1       1.000000   0.992691  roc_auc_ovo        0.017228       0.020366   10.095660                 0.017228                0.020366          10.095660            1       True         19
3               LightGBM_BAG_L1       1.000000   0.996340  roc_auc_ovo        0.062202       

Resultados para 458: AUC=0.9999, ACC=0.9960, Time=3601.29s

Arquivo 'resultados_autogluon_maquina_4.csv' salvo com sucesso!


In [5]:
# --- CONFIGURAÇÃO DA EXECUÇÃO DISTRIBUÍDA ---
# Altere esta variável em cada máquina de 1 a 10
MACHINE_ID = 8
TOTAL_MACHINES = 10
# --- FIM DA CONFIGURAÇÃO ---

# Lista completa dos 30 datasets do CC18
cc18_ids_full = [
    11, 15, 18, 23, 29, 31, 37, 50, 54, 188, 307, 458, 469, 1049,
    1050, 1063, 1068, 1462, 1464, 1468, 1480, 1494, 1501, 1510,
    6332, 23381, 40966, 40975, 40982, 40994
]

# Lógica para dividir a lista de IDs em lotes
chunk_size = math.ceil(len(cc18_ids_full) / TOTAL_MACHINES)
start_index = (MACHINE_ID - 1) * chunk_size
end_index = start_index + chunk_size
ids_to_process = cc18_ids_full[start_index:end_index]

print(f"--- MÁQUINA {MACHINE_ID}/{TOTAL_MACHINES} ---")
print(f"Processando os seguintes datasets: {ids_to_process}")

todos_resultados = []

for oid in ids_to_process:
    try:
        res = run_autogluon_experiment(oid)
        todos_resultados.append(res)
    except Exception as e:
        print(f"!!!!!! ERRO AO PROCESSAR O DATASET {oid}: {e} !!!!!!")
        import traceback
        traceback.print_exc()
        continue

# Salva o arquivo com um nome único para esta máquina
if todos_resultados:
    df_results = pd.DataFrame(todos_resultados)
    output_filename = f"resultados_autogluon_maquina_{MACHINE_ID}.csv"
    df_results.to_csv(output_filename, index=False)
    print(f"\nArquivo '{output_filename}' salvo com sucesso!")
else:
    print("Nenhuma tarefa foi concluída com sucesso nesta máquina.")

--- MÁQUINA 8/10 ---
Processando os seguintes datasets: [1494, 1501, 1510]
--- Processando Dataset ID: 1494 com AutoGluon ---


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Debian 5.10.237-1 (2025-05-19)
CPU Count:          8
Memory Avail:       28.21 GB / 31.36 GB (89.9%)
Disk Space Avail:   88.28 GB / 97.87 GB (90.2%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of rem

Problema binário detectado. Usando a métrica: roc_auc


Leaderboard on holdout data (DyStack):
                           model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0    NeuralNetFastAI_r134_BAG_L1       0.971561   0.941946     roc_auc        0.250526       0.134060  15.943922                 0.250526                0.134060          15.943922            1       True         59
1         NeuralNetFastAI_BAG_L1       0.971561   0.926614     roc_auc        0.997080       0.089839   5.415656                 0.997080                0.089839           5.415656            1       True         10
2     NeuralNetFastAI_r11_BAG_L1       0.970238   0.926842     roc_auc        0.239033       0.100415   7.375288                 0.239033                0.100415           7.375288            1       True         34
3    NeuralNetFastAI_r145_BAG_L2       0.970238   0.933874     roc_auc        1.609415       0.91

Resultados para 1494: AUC=0.9309, ACC=0.8833, Time=3029.16s
--- Processando Dataset ID: 1501 com AutoGluon ---


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Debian 5.10.237-1 (2025-05-19)
CPU Count:          8
Memory Avail:       27.95 GB / 31.36 GB (89.1%)
Disk Space Avail:   87.32 GB / 97.87 GB (89.2%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of rem

Problema multiclasse detectado (10 classes). Usando a métrica: roc_auc_ovo


Leaderboard on holdout data (DyStack):
                      model  score_holdout  score_val  eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0    NeuralNetFastAI_BAG_L1       0.998742   0.996910  roc_auc_ovo        0.680124       0.091121    5.986186                 0.680124                0.091121           5.986186            1       True          1
1       WeightedEnsemble_L3       0.998439   0.998032  roc_auc_ovo        2.562426       1.037072  514.330477                 0.004589                0.047280           8.139638            3       True         21
2           LightGBM_BAG_L2       0.998386   0.992719  roc_auc_ovo        2.450694       0.920775  545.055697                 0.074636                0.052301          40.112937            2       True         14
3    NeuralNetFastAI_BAG_L2       0.998350   0.996582  roc_auc_ovo        2.474250       0.979180  511.100840

Resultados para 1501: AUC=0.9976, ACC=0.9414, Time=3623.56s
--- Processando Dataset ID: 1510 com AutoGluon ---


Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.3.1
Python Version:     3.10.18
Operating System:   Linux
Platform Machine:   x86_64
Platform Version:   #1 SMP Debian 5.10.237-1 (2025-05-19)
CPU Count:          8
Memory Avail:       27.66 GB / 31.36 GB (88.2%)
Disk Space Avail:   86.46 GB / 97.87 GB (88.3%)
Presets specified: ['best_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This is used to identify the optimal `num_stack_levels` value. Copies of AutoGluon will be fit on subsets of the data. Then holdout validation data is used to detect stacked overfitting.
	Running DyStack for up to 900s of the 3600s of rem

Problema binário detectado. Usando a métrica: roc_auc


Leaderboard on holdout data (DyStack):
                          model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val    fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0          CatBoost_r177_BAG_L1       1.000000   0.993776     roc_auc        0.018525       0.010391    8.505074                 0.018525                0.010391           8.505074            1       True         14
1               LightGBM_BAG_L1       1.000000   0.991782     roc_auc        0.020610       0.013180    1.623518                 0.020610                0.013180           1.623518            1       True          4
2          CatBoost_r137_BAG_L1       1.000000   0.994017     roc_auc        0.044106       0.013664   59.799386                 0.044106                0.013664          59.799386            1       True         23
3          LightGBM_r131_BAG_L1       1.000000   0.991404     roc_auc        0.053740       0.027

Resultados para 1510: AUC=0.9977, ACC=0.9766, Time=3599.92s

Arquivo 'resultados_autogluon_maquina_8.csv' salvo com sucesso!


