In [1]:
import pandas as pd
import numpy as np

from autogluon.tabular import TabularPredictor
from sklearn.model_selection import train_test_split

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
TIME_LIMIT = 2 * 3600

In [3]:
auxiliary_metrics = ['accuracy', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'roc_auc', 'average_precision', 'precision', 'recall', 'log_loss', 'pac_score']

In [4]:
for i in range(0, 10):
    print(f'Fitting model {i}')
    df_train = pd.read_csv(
        f'../../../data/synthetic/ctgan/set_{i}.csv'
        )

    predictor= TabularPredictor(
        label='ED_2Clases',
        problem_type='binary',
        eval_metric='roc_auc',
        sample_weight='balance_weight',
        path=f'AutogluonModels/synthetic/ctgan/v{i}'
    )

    predictor.fit(
        train_data = df_train,
        presets = ['high_quality'],
        time_limit = TIME_LIMIT,
        auto_stack = True,
        # excluded_model_types=['KNN','RF','XT', 'LR'],
        verbosity = 2
    )

    df_test = pd.read_csv(
        f'../../../data/test/set_{i}.csv'
    )

    predictors = predictor.leaderboard(
        df_test,
        extra_metrics = auxiliary_metrics,
        extra_info=True,
        silent=True
    )
    predictors.to_excel(
        f'../../../results/ctgan/predictors_set_{i}.xlsx',
        index=False
    )

    feature_importance = predictor.feature_importance(df_train)
    feature_importance.to_excel(
        f'../../../results/ctgan/feature_importance_set_{i}.xlsx',
        index=True
    )

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.11.11
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          12
Memory Avail:       8.12 GB / 15.94 GB (50.9%)
Disk Space Avail:   73.85 GB / 446.36 GB (16.5%)
Presets specified: ['high_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
Note: `save_bag_folds=False`! This will greatly reduce peak disk usage during fit (by ~8x), but runs the risk of an out-of-memory error during model refit if memory is small relative to the data size.
	You can avoid this risk by setting `save_bag_folds=True`.
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This 

Fitting model 0


2025-03-17 23:03:59,287	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
	Running DyStack sub-fit in a ray process to avoid memory leakage. Enabling ray logging (enable_ray_logging=True). Specify `ds_args={'enable_ray_logging': False}` if you experience logging issues.
2025-03-17 23:04:03,893	INFO worker.py:1762 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
		Context path: "c:\Users\jgala\uned\tfm\synthetic-data\autogluon\synthetic\ctgan\AutogluonModels\synthetic\ctgan\v0\ds_sub_fit\sub_fit_ho"
[36m(_dystack pid=20576)[0m Running DyStack sub-fit ...
[36m(_dystack pid=20576)[0m Using predefined sample weighting strategy: balance_weight. Evaluation metrics will ignore sample weights, specify weight_evaluation=True to instead report weighted metrics.
[36m(_dystack pid=20576)[0m Beginning AutoGluon training ... Time limit = 1793s
[36m(_d

[36m(_ray_fit pid=11016)[0m [1000]	valid_set's binary_logloss: 0.563084


[36m(_dystack pid=20576)[0m 	0.6755	 = Validation score   (roc_auc)
[36m(_dystack pid=20576)[0m 	1.22s	 = Training   runtime
[36m(_dystack pid=20576)[0m 	0.04s	 = Validation runtime
[36m(_dystack pid=20576)[0m Fitting model: NeuralNetTorch_r22_BAG_L1 ... Training model for up to 993.15s of the 1590.98s of remaining time.
[36m(_dystack pid=20576)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.02%)
[36m(_dystack pid=20576)[0m 	0.6837	 = Validation score   (roc_auc)
[36m(_dystack pid=20576)[0m 	10.96s	 = Training   runtime
[36m(_dystack pid=20576)[0m 	0.16s	 = Validation runtime
[36m(_dystack pid=20576)[0m Fitting model: XGBoost_r33_BAG_L1 ... Training model for up to 978.39s of the 1576.22s of remaining time.
[36m(_dystack pid=20576)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=4.34%)
[36m(_dystack pid

[36m(_ray_fit pid=17048)[0m [1000]	valid_set's binary_logloss: 0.571157
[36m(_ray_fit pid=16580)[0m [1000]	valid_set's binary_logloss: 0.552262


[36m(_dystack pid=20576)[0m 	0.6744	 = Validation score   (roc_auc)
[36m(_dystack pid=20576)[0m 	2.97s	 = Training   runtime
[36m(_dystack pid=20576)[0m 	0.05s	 = Validation runtime
[36m(_dystack pid=20576)[0m Fitting model: RandomForest_r39_BAG_L1 ... Training model for up to 683.44s of the 1281.27s of remaining time.
[36m(_ray_fit pid=13332)[0m No improvement since epoch 1: early stopping[32m [repeated 6x across cluster][0m
[36m(_dystack pid=20576)[0m 	0.7102	 = Validation score   (roc_auc)
[36m(_dystack pid=20576)[0m 	0.71s	 = Training   runtime
[36m(_dystack pid=20576)[0m 	0.09s	 = Validation runtime
[36m(_dystack pid=20576)[0m Fitting model: CatBoost_r167_BAG_L1 ... Training model for up to 682.61s of the 1280.44s of remaining time.
[36m(_dystack pid=20576)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=1.04%)
[36m(_dystack pid=20576)[0m 	0.7186	 = Validation score   (roc_auc)

[36m(_ray_fit pid=17444)[0m [1000]	valid_set's binary_logloss: 0.56162[32m [repeated 2x across cluster][0m


[36m(_dystack pid=20576)[0m 	0.6922	 = Validation score   (roc_auc)
[36m(_dystack pid=20576)[0m 	2.12s	 = Training   runtime
[36m(_dystack pid=20576)[0m 	0.06s	 = Validation runtime
[36m(_dystack pid=20576)[0m Fitting model: RandomForest_r39_BAG_L2 ... Training model for up to 105.26s of the 104.88s of remaining time.
[36m(_ray_fit pid=13928)[0m No improvement since epoch 4: early stopping[32m [repeated 6x across cluster][0m
[36m(_dystack pid=20576)[0m 	0.7108	 = Validation score   (roc_auc)
[36m(_dystack pid=20576)[0m 	0.72s	 = Training   runtime
[36m(_dystack pid=20576)[0m 	0.09s	 = Validation runtime
[36m(_dystack pid=20576)[0m Fitting model: CatBoost_r167_BAG_L2 ... Training model for up to 104.41s of the 104.03s of remaining time.
[36m(_dystack pid=20576)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=2.01%)
[36m(_dystack pid=20576)[0m 	0.7092	 = Validation score   (roc_auc)


Fitting model 1


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0       RandomForest_r127_BAG_L1_FULL       0.855000   0.684963     roc_auc        0.089365       0.090271   0.943566                 0.089365                0.090271           0.943566            1       True         58
1        RandomForest_r39_BAG_L1_FULL       0.851667   0.702531     roc_auc        0.080932       0.088078   0.689998                 0.080932                0.088078           0.689998            1       True         45
2            CatBoost_r13_BAG_L1_FULL       0.841667   0.740312     roc_auc        0.027243            NaN  10.103669                 0.027243                     NaN          10.103669            1       True         25
3             CatBoost_r9_BAG_L1_FULL       0.836667   0.757340     roc_auc  

Fitting model 2


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0           CatBoost_r163_BAG_L1_FULL       0.873333   0.728600     roc_auc        0.021113            NaN   0.906693                 0.021113                     NaN           0.906693            1       True        102
1           CatBoost_r143_BAG_L1_FULL       0.860000   0.744171     roc_auc        0.022133            NaN   0.334062                 0.022133                     NaN           0.334062            1       True         74
2            CatBoost_r60_BAG_L1_FULL       0.845000   0.729194     roc_auc        0.022113            NaN   1.425822                 0.022113                     NaN           1.425822            1       True         80
3             XGBoost_r31_BAG_L1_FULL       0.833333   0.717994     roc_auc  

Fitting model 3


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val  fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0            CatBoost_r49_BAG_L1_FULL       0.825000   0.731622     roc_auc        0.023998            NaN  0.134318                 0.023998                     NaN           0.134318            1       True         55
1           CatBoost_r137_BAG_L1_FULL       0.825000   0.744414     roc_auc        0.026278            NaN  1.530879                 0.026278                     NaN           1.530879            1       True         23
2           CatBoost_r177_BAG_L1_FULL       0.816667   0.748003     roc_auc        0.033255            NaN  0.199502                 0.033255                     NaN           0.199502            1       True         14
3           CatBoost_r167_BAG_L1_FULL       0.811667   0.740501     roc_auc      

Fitting model 4


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0    NeuralNetFastAI_r127_BAG_L1_FULL       0.750000   0.659488     roc_auc        0.040236            NaN   0.205858                 0.040236                     NaN           0.205858            1       True         93
1                 XGBoost_BAG_L1_FULL       0.746667   0.749244     roc_auc        0.027140            NaN   0.244188                 0.027140                     NaN           0.244188            1       True         11
2          KNeighborsDist_BAG_L1_FULL       0.738333   0.628832     roc_auc        0.018908       0.008134   0.009998                 0.018908                0.008134           0.009998            1       True          2
3    NeuralNetFastAI_r102_BAG_L1_FULL       0.720000   0.651770     roc_auc  

Fitting model 5


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val  fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0             XGBoost_r31_BAG_L1_FULL       0.866667   0.724633     roc_auc        0.039250            NaN  0.238083                 0.039250                     NaN           0.238083            1       True         77
1                CatBoost_BAG_L1_FULL       0.861667   0.748948     roc_auc        0.022137            NaN  3.282080                 0.022137                     NaN           3.282080            1       True          7
2      NeuralNetTorch_r86_BAG_L1_FULL       0.845000   0.756045     roc_auc        0.049392            NaN  0.847147                 0.049392                     NaN           0.847147            1       True         32
3                LightGBM_BAG_L1_FULL       0.843333   0.737991     roc_auc      

Fitting model 6


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val  fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0    NeuralNetFastAI_r103_BAG_L1_FULL       0.803333   0.704204     roc_auc        0.034236            NaN  0.149779                 0.034236                     NaN           0.149779            1       True         38
1             XGBoost_r49_BAG_L1_FULL       0.801667   0.703665     roc_auc        0.035311            NaN  0.124880                 0.035311                     NaN           0.124880            1       True         70
2             XGBoost_r22_BAG_L1_FULL       0.801667   0.714486     roc_auc        0.149409            NaN  0.065987                 0.149409                     NaN           0.065987            1       True         83
3    NeuralNetFastAI_r127_BAG_L1_FULL       0.786667   0.675707     roc_auc      

Fitting model 7


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val  fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0             XGBoost_r89_BAG_L1_FULL       0.813333   0.732891     roc_auc        0.034214            NaN  0.066879                 0.034214                     NaN           0.066879            1       True         29
1             XGBoost_r33_BAG_L1_FULL       0.795000   0.723338     roc_auc        0.038835            NaN  0.257176                 0.038835                     NaN           0.257176            1       True         21
2            XGBoost_r194_BAG_L1_FULL       0.793333   0.750972     roc_auc        0.064835            NaN  0.117513                 0.064835                     NaN           0.117513            1       True         35
3           CatBoost_r180_BAG_L1_FULL       0.778333   0.746546     roc_auc      

Fitting model 8


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val  fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0                 XGBoost_BAG_L2_FULL       0.800000   0.761887     roc_auc        0.408913            NaN  4.393949                 0.029124                     NaN           0.079525            2       True        114
1             XGBoost_r33_BAG_L2_FULL       0.785000   0.744144     roc_auc        0.420047            NaN  4.610761                 0.040259                     NaN           0.296338            2       True        124
2             XGBoost_r89_BAG_L2_FULL       0.780000   0.757057     roc_auc        0.417312            NaN  4.382015                 0.037524                     NaN           0.067591            2       True        132
3            XGBoost_r194_BAG_L2_FULL       0.778333   0.749703     roc_auc      

Fitting model 9


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val  fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0           CatBoost_r180_BAG_L1_FULL       0.825000   0.725173     roc_auc        0.033245            NaN  0.946160                 0.033245                     NaN           0.946160            1       True         89
1           CatBoost_r177_BAG_L1_FULL       0.816667   0.743901     roc_auc        0.027604            NaN  0.274407                 0.027604                     NaN           0.274407            1       True         14
2        RandomForest_r34_BAG_L1_FULL       0.806667   0.668313     roc_auc        0.060344       0.098415  0.833457                 0.060344                0.098415           0.833457            1       True         60
3            CatBoost_r49_BAG_L1_FULL       0.806667   0.721098     roc_auc      