In [1]:
import pandas as pd

from autogluon.tabular import TabularPredictor
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
TIME_LIMIT = 2 * 3600

In [3]:
auxiliary_metrics = ['accuracy', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'roc_auc', 'average_precision', 'precision', 'recall', 'log_loss', 'pac_score']

In [4]:
for i in range(0, 10):
    print(f'Fitting model {i}')
    df_train = pd.read_csv(
        f'../../../data/synthetic/cg/set_{i}.csv'
        )

    predictor= TabularPredictor(
        label='ED_2Clases',
        problem_type='binary',
        eval_metric='roc_auc',
        sample_weight='balance_weight',
        path=f'AutogluonModels/synthetic/cg/v{i}'
    )

    predictor.fit(
        train_data = df_train,
        presets = ['high_quality'],
        time_limit = TIME_LIMIT,
        auto_stack = True,
        # excluded_model_types=['KNN','RF','XT', 'LR'],
        verbosity = 2
    )

    df_test = pd.read_csv(
        f'../../../data/test/set_{i}.csv'
    )

    predictors = predictor.leaderboard(
        df_test,
        extra_metrics = auxiliary_metrics,
        extra_info=True,
        silent=True
    )
    predictors.to_excel(
        f'../../../results/cg/predictors_set_{i}.xlsx',
        index=False
    )

    feature_importance = predictor.feature_importance(df_train)
    feature_importance.to_excel(
        f'../../../results/cg/feature_importance_set_{i}.xlsx',
        index=True
    )

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.11.11
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          12
Memory Avail:       8.29 GB / 15.94 GB (52.0%)
Disk Space Avail:   68.73 GB / 446.36 GB (15.4%)
Presets specified: ['high_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
Note: `save_bag_folds=False`! This will greatly reduce peak disk usage during fit (by ~8x), but runs the risk of an out-of-memory error during model refit if memory is small relative to the data size.
	You can avoid this risk by setting `save_bag_folds=True`.
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This 

Fitting model 0


2025-03-19 21:10:38,099	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
	Running DyStack sub-fit in a ray process to avoid memory leakage. Enabling ray logging (enable_ray_logging=True). Specify `ds_args={'enable_ray_logging': False}` if you experience logging issues.
2025-03-19 21:10:41,156	INFO worker.py:1762 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
		Context path: "c:\Users\jgala\uned\tfm\synthetic-data\autogluon\synthetic\cg\AutogluonModels\synthetic\cg\v0\ds_sub_fit\sub_fit_ho"
[36m(_dystack pid=16676)[0m Running DyStack sub-fit ...
[36m(_dystack pid=16676)[0m Using predefined sample weighting strategy: balance_weight. Evaluation metrics will ignore sample weights, specify weight_evaluation=True to instead report weighted metrics.
[36m(_dystack pid=16676)[0m Beginning AutoGluon training ... Time limit = 1795s
[36m(_dystack

[36m(_ray_fit pid=8460)[0m [1000]	valid_set's binary_logloss: 0.440126


[36m(_dystack pid=16676)[0m 	0.8433	 = Validation score   (roc_auc)
[36m(_dystack pid=16676)[0m 	2.47s	 = Training   runtime
[36m(_dystack pid=16676)[0m 	0.07s	 = Validation runtime
[36m(_dystack pid=16676)[0m Fitting model: RandomForest_r39_BAG_L1 ... Training model for up to 753.44s of the 1351.98s of remaining time.
[36m(_ray_fit pid=16560)[0m No improvement since epoch 2: early stopping[32m [repeated 6x across cluster][0m
[36m(_dystack pid=16676)[0m 	0.8383	 = Validation score   (roc_auc)
[36m(_dystack pid=16676)[0m 	0.72s	 = Training   runtime
[36m(_dystack pid=16676)[0m 	0.08s	 = Validation runtime
[36m(_dystack pid=16676)[0m Fitting model: CatBoost_r167_BAG_L1 ... Training model for up to 752.60s of the 1351.15s of remaining time.
[36m(_dystack pid=16676)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.03%)
[36m(_dystack pid=16676)[0m 	0.8337	 = Validation score   (roc_auc)

[36m(_ray_fit pid=15744)[0m [1000]	valid_set's binary_logloss: 0.419493[32m [repeated 2x across cluster][0m


[36m(_dystack pid=16676)[0m 	0.8472	 = Validation score   (roc_auc)
[36m(_dystack pid=16676)[0m 	1.66s	 = Training   runtime
[36m(_dystack pid=16676)[0m 	0.04s	 = Validation runtime
[36m(_dystack pid=16676)[0m Fitting model: XGBoost_r49_BAG_L1 ... Training model for up to 451.80s of the 1050.34s of remaining time.
[36m(_ray_fit pid=16592)[0m No improvement since epoch 0: early stopping[32m [repeated 7x across cluster][0m
[36m(_dystack pid=16676)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.09%)
[36m(_dystack pid=16676)[0m 	0.8391	 = Validation score   (roc_auc)
[36m(_dystack pid=16676)[0m 	2.73s	 = Training   runtime
[36m(_dystack pid=16676)[0m 	0.04s	 = Validation runtime
[36m(_dystack pid=16676)[0m Fitting model: CatBoost_r5_BAG_L1 ... Training model for up to 445.30s of the 1043.85s of remaining time.
[36m(_dystack pid=16676)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fittin

Fitting model 1


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val  fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0      NeuralNetTorch_r76_BAG_L1_FULL       0.776667   0.709345     roc_auc        0.039250            NaN  0.408229                 0.039250                     NaN           0.408229            1       True         90
1     NeuralNetFastAI_r37_BAG_L1_FULL       0.776667   0.690064     roc_auc        0.042241            NaN  0.197021                 0.042241                     NaN           0.197021            1       True         53
2             XGBoost_r33_BAG_L1_FULL       0.770000   0.756315     roc_auc        0.037761            NaN  0.282015                 0.037761                     NaN           0.282015            1       True         21
3           LightGBM_r131_BAG_L2_FULL       0.766667   0.775421     roc_auc      

Fitting model 2


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0             CatBoost_r5_BAG_L1_FULL       0.830000   0.757448     roc_auc        0.029233            NaN   0.248099                 0.029233                     NaN           0.248099            1       True         71
1           CatBoost_r137_BAG_L1_FULL       0.806667   0.761766     roc_auc        0.023124            NaN   0.250947                 0.023124                     NaN           0.250947            1       True         23
2           LightGBM_r143_BAG_L1_FULL       0.805000   0.769295     roc_auc        0.020119            NaN   0.178148                 0.020119                     NaN           0.178148            1       True         57
3           LightGBM_r130_BAG_L1_FULL       0.805000   0.758932     roc_auc  

Fitting model 3


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0           ExtraTrees_r4_BAG_L1_FULL       0.783333   0.681455     roc_auc        0.070080       0.085991   0.606533                 0.070080                0.085991           0.606533            1       True         66
1           CatBoost_r163_BAG_L1_FULL       0.776667   0.732324     roc_auc        0.022116            NaN   0.631565                 0.022116                     NaN           0.631565            1       True        102
2           CatBoost_r177_BAG_L1_FULL       0.775000   0.710060     roc_auc        0.030117            NaN   0.170650                 0.030117                     NaN           0.170650            1       True         14
3         ExtraTrees_r178_BAG_L1_FULL       0.770000   0.674790     roc_auc  

Fitting model 4


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val  fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0           LightGBM_r135_BAG_L1_FULL       0.836667   0.792557     roc_auc        0.021130            NaN  0.153901                 0.021130                     NaN           0.153901            1       True         82
1            CatBoost_r12_BAG_L1_FULL       0.835000   0.803028     roc_auc        0.021169            NaN  0.830396                 0.021169                     NaN           0.830396            1       True         96
2            LightGBM_r42_BAG_L1_FULL       0.833333   0.783733     roc_auc        0.024134            NaN  0.144773                 0.024134                     NaN           0.144773            1       True        108
3           LightGBM_r130_BAG_L1_FULL       0.833333   0.795944     roc_auc      

Fitting model 5


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0     NeuralNetFastAI_r11_BAG_L2_FULL       0.918333   0.852736     roc_auc        0.380509            NaN   6.523126                 0.041209                     NaN           0.191500            2       True        136
1        RandomForestGini_BAG_L1_FULL       0.915833   0.849714     roc_auc        0.089029       0.088612   0.783637                 0.089029                0.088612           0.783637            1       True          5
2           CatBoost_r143_BAG_L1_FULL       0.911667   0.869144     roc_auc        0.022101            NaN   0.401111                 0.022101                     NaN           0.401111            1       True         74
3     NeuralNetFastAI_r65_BAG_L1_FULL       0.906667   0.854598     roc_auc  

Fitting model 6


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val  fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0           LightGBM_r143_BAG_L1_FULL       0.820000   0.745817     roc_auc        0.022224            NaN  0.186403                 0.022224                     NaN           0.186403            1       True         57
1        RandomForest_r34_BAG_L1_FULL       0.813333   0.731002     roc_auc        0.067036       0.097821  0.832275                 0.067036                0.097821           0.832275            1       True         60
2        RandomForest_r39_BAG_L1_FULL       0.813333   0.748786     roc_auc        0.093197       0.086421  0.744408                 0.093197                0.086421           0.744408            1       True         45
3        RandomForest_r15_BAG_L1_FULL       0.811667   0.752294     roc_auc      

Fitting model 7


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val  fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0           CatBoost_r180_BAG_L1_FULL       0.858333   0.851063     roc_auc        0.032257            NaN  1.051960                 0.032257                     NaN           1.051960            1       True         89
1    NeuralNetFastAI_r187_BAG_L1_FULL       0.850000   0.827315     roc_auc        0.039283            NaN  0.282247                 0.039283                     NaN           0.282247            1       True        104
2          NeuralNetTorch_BAG_L1_FULL       0.848333   0.807966     roc_auc        0.051371            NaN  1.604027                 0.051371                     NaN           1.604027            1       True         12
3      NeuralNetFastAI_r4_BAG_L1_FULL       0.846667   0.845801     roc_auc      

Fitting model 8


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val  fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0            CatBoost_r70_BAG_L1_FULL       0.716667   0.818626     roc_auc        0.029587            NaN  2.665514                 0.029587                     NaN           2.665514            1       True         42
1        RandomForestGini_BAG_L1_FULL       0.715000   0.809289     roc_auc        0.089737       0.086651  0.785499                 0.089737                0.086651           0.785499            1       True          5
2        RandomForestEntr_BAG_L1_FULL       0.712500   0.804310     roc_auc        0.062374       0.087926  0.529443                 0.062374                0.087926           0.529443            1       True          6
3            CatBoost_r60_BAG_L1_FULL       0.708333   0.826668     roc_auc      

Fitting model 9


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val  fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0    NeuralNetFastAI_r102_BAG_L2_FULL       0.916667   0.623570     roc_auc        0.358453            NaN  5.100538                 0.040225                     NaN           0.181896            2       True        130
1     NeuralNetFastAI_r11_BAG_L1_FULL       0.906667   0.709170     roc_auc        0.041352            NaN  0.145718                 0.041352                     NaN           0.145718            1       True         34
2         NeuralNetFastAI_BAG_L2_FULL       0.900000   0.705176     roc_auc        0.357499            NaN  5.114607                 0.039271                     NaN           0.195966            2       True        116
3    NeuralNetFastAI_r187_BAG_L1_FULL       0.896667   0.717725     roc_auc      