In [None]:
import pandas as pd
import numpy as np

from autogluon.tabular import TabularPredictor
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_recall_curve, auc

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
def pr_auc_Metric(y_true, y_pred):
    precision, recall, _ = precision_recall_curve(y_true, y_pred)
    score = auc(recall, precision)
    return score

ag_pr_auc_scorer = make_scorer(
    name='pr_auc',
    score_func=pr_auc_Metric,
    optimum=1,
    greater_is_better=True,
    needs_proba=True
    )

In [None]:
TIME_LIMIT = 2 * 3600
EVAL_METRIC = ag_pr_auc_scorer

In [3]:
auxiliary_metrics = ['accuracy', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'roc_auc', 'average_precision', 'precision', 'recall', 'log_loss', 'pac_score']

In [4]:
for i in range(0, 10):
    print(f'Fitting model {i}')
    df_train = pd.read_csv(
        f'../../../data/synthetic/gc/set_{i}.csv'
        )

    predictor= TabularPredictor(
        label='ED_2Clases',
        problem_type='binary',
        eval_metric='roc_auc',
        sample_weight='balance_weight',
        path=f'AutogluonModels/synthetic/gc/v{i}'
    )

    predictor.fit(
        train_data = df_train,
        presets = ['high_quality'],
        time_limit = TIME_LIMIT,
        auto_stack = True,
        # excluded_model_types=['KNN','RF','XT', 'LR'],
        verbosity = 2
    )

    df_test = pd.read_csv(
        f'../../../data/test/set_{i}.csv'
    )

    predictors = predictor.leaderboard(
        df_test,
        extra_metrics = auxiliary_metrics,
        extra_info=True,
        silent=True
    )
    predictors.to_excel(
        f'../../../results/gc/predictors_set_{i}.xlsx',
        index=False
    )

    feature_importance = predictor.feature_importance(df_train)
    feature_importance.to_excel(
        f'../../../results/gc/feature_importance_set_{i}.xlsx',
        index=True
    )

Verbosity: 2 (Standard Logging)
AutoGluon Version:  1.2
Python Version:     3.11.11
Operating System:   Windows
Platform Machine:   AMD64
Platform Version:   10.0.26100
CPU Count:          12
Memory Avail:       9.07 GB / 15.94 GB (56.9%)
Disk Space Avail:   66.82 GB / 446.36 GB (15.0%)
Presets specified: ['high_quality']
Setting dynamic_stacking from 'auto' to True. Reason: Enable dynamic_stacking when use_bag_holdout is disabled. (use_bag_holdout=False)
Stack configuration (auto_stack=True): num_stack_levels=1, num_bag_folds=8, num_bag_sets=1
Note: `save_bag_folds=False`! This will greatly reduce peak disk usage during fit (by ~8x), but runs the risk of an out-of-memory error during model refit if memory is small relative to the data size.
	You can avoid this risk by setting `save_bag_folds=True`.
DyStack is enabled (dynamic_stacking=True). AutoGluon will try to determine whether the input data is affected by stacked overfitting and enable or disable stacking as a consequence.
	This 

Fitting model 0


2025-03-16 22:40:26,555	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
	Running DyStack sub-fit in a ray process to avoid memory leakage. Enabling ray logging (enable_ray_logging=True). Specify `ds_args={'enable_ray_logging': False}` if you experience logging issues.
2025-03-16 22:40:29,570	INFO worker.py:1762 -- Started a local Ray instance. View the dashboard at [1m[32mhttp://127.0.0.1:8265 [39m[22m
		Context path: "c:\Users\jgala\uned\tfm\synthetic-data\autogluon\synthetic\gc\AutogluonModels\synthetic\gc\v0\ds_sub_fit\sub_fit_ho"
[36m(_dystack pid=14216)[0m Running DyStack sub-fit ...
[36m(_dystack pid=14216)[0m Using predefined sample weighting strategy: balance_weight. Evaluation metrics will ignore sample weights, specify weight_evaluation=True to instead report weighted metrics.
[36m(_dystack pid=14216)[0m Beginning AutoGluon training ... Time limit = 1795s
[36m(_dystack

[36m(_ray_fit pid=8528)[0m [1000]	valid_set's binary_logloss: 0.184845


[36m(_dystack pid=14216)[0m 	0.9021	 = Validation score   (roc_auc)
[36m(_dystack pid=14216)[0m 	2.19s	 = Training   runtime
[36m(_dystack pid=14216)[0m 	0.05s	 = Validation runtime
[36m(_dystack pid=14216)[0m Fitting model: NeuralNetFastAI_r191_BAG_L1 ... Training model for up to 1069.63s of the 1668.20s of remaining time.
[36m(_dystack pid=14216)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.03%)
[36m(_ray_fit pid=18600)[0m No improvement since epoch 5: early stopping
[36m(_dystack pid=14216)[0m 	0.9219	 = Validation score   (roc_auc)
[36m(_dystack pid=14216)[0m 	7.43s	 = Training   runtime
[36m(_dystack pid=14216)[0m 	0.15s	 = Validation runtime
[36m(_dystack pid=14216)[0m Fitting model: CatBoost_r9_BAG_L1 ... Training model for up to 1058.56s of the 1657.14s of remaining time.
[36m(_dystack pid=14216)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFit

[36m(_ray_fit pid=4272)[0m [1000]	valid_set's binary_logloss: 0.356288
[36m(_ray_fit pid=9856)[0m [1000]	valid_set's binary_logloss: 0.327107


[36m(_dystack pid=14216)[0m 	0.9344	 = Validation score   (roc_auc)
[36m(_dystack pid=14216)[0m 	3.79s	 = Training   runtime
[36m(_dystack pid=14216)[0m 	0.07s	 = Validation runtime
[36m(_dystack pid=14216)[0m Fitting model: NeuralNetTorch_r22_BAG_L1 ... Training model for up to 783.23s of the 1381.80s of remaining time.
[36m(_dystack pid=14216)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.02%)
[36m(_dystack pid=14216)[0m 	0.9384	 = Validation score   (roc_auc)
[36m(_dystack pid=14216)[0m 	10.15s	 = Training   runtime
[36m(_dystack pid=14216)[0m 	0.2s	 = Validation runtime
[36m(_dystack pid=14216)[0m Fitting model: XGBoost_r33_BAG_L1 ... Training model for up to 769.69s of the 1368.26s of remaining time.
[36m(_dystack pid=14216)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=3.81%)
[36m(_dystack pid=

[36m(_ray_fit pid=20516)[0m [1000]	valid_set's binary_logloss: 0.206232[32m [repeated 21x across cluster][0m


[36m(_dystack pid=14216)[0m 	0.8999	 = Validation score   (roc_auc)
[36m(_dystack pid=14216)[0m 	2.16s	 = Training   runtime
[36m(_dystack pid=14216)[0m 	0.05s	 = Validation runtime
[36m(_dystack pid=14216)[0m Fitting model: NeuralNetFastAI_r143_BAG_L1 ... Training model for up to 303.97s of the 902.54s of remaining time.
[36m(_dystack pid=14216)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.03%)
[36m(_ray_fit pid=21416)[0m No improvement since epoch 2: early stopping
[36m(_dystack pid=14216)[0m 	0.9456	 = Validation score   (roc_auc)
[36m(_dystack pid=14216)[0m 	9.51s	 = Training   runtime
[36m(_dystack pid=14216)[0m 	0.13s	 = Validation runtime
[36m(_dystack pid=14216)[0m Fitting model: CatBoost_r70_BAG_L1 ... Training model for up to 290.92s of the 889.49s of remaining time.
[36m(_dystack pid=14216)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittin

[36m(_ray_fit pid=9460)[0m [1000]	valid_set's binary_logloss: 0.251972
[36m(_ray_fit pid=11056)[0m [1000]	valid_set's binary_logloss: 0.432034


[36m(_dystack pid=14216)[0m 	0.9132	 = Validation score   (roc_auc)
[36m(_dystack pid=14216)[0m 	5.51s	 = Training   runtime
[36m(_dystack pid=14216)[0m 	0.13s	 = Validation runtime
[36m(_dystack pid=14216)[0m Fitting model: RandomForest_r39_BAG_L1 ... Training model for up to 227.84s of the 826.41s of remaining time.
[36m(_ray_fit pid=21436)[0m No improvement since epoch 15: early stopping[32m [repeated 6x across cluster][0m
[36m(_dystack pid=14216)[0m 	0.8795	 = Validation score   (roc_auc)
[36m(_dystack pid=14216)[0m 	0.69s	 = Training   runtime
[36m(_dystack pid=14216)[0m 	0.08s	 = Validation runtime
[36m(_dystack pid=14216)[0m Fitting model: CatBoost_r167_BAG_L1 ... Training model for up to 227.00s of the 825.58s of remaining time.
[36m(_dystack pid=14216)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.91%)
[36m(_dystack pid=14216)[0m 	0.9001	 = Validation score   (roc_auc)


[36m(_ray_fit pid=18812)[0m [1000]	valid_set's binary_logloss: 0.210511[32m [repeated 11x across cluster][0m


[36m(_dystack pid=14216)[0m 	0.9598	 = Validation score   (roc_auc)
[36m(_dystack pid=14216)[0m 	2.07s	 = Training   runtime
[36m(_dystack pid=14216)[0m 	0.04s	 = Validation runtime
[36m(_dystack pid=14216)[0m Fitting model: NeuralNetTorch_r22_BAG_L2 ... Training model for up to 352.41s of the 352.21s of remaining time.
[36m(_dystack pid=14216)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=0.02%)
[36m(_dystack pid=14216)[0m 	0.9213	 = Validation score   (roc_auc)
[36m(_dystack pid=14216)[0m 	9.12s	 = Training   runtime
[36m(_dystack pid=14216)[0m 	0.18s	 = Validation runtime
[36m(_dystack pid=14216)[0m Fitting model: XGBoost_r33_BAG_L2 ... Training model for up to 339.84s of the 339.64s of remaining time.
[36m(_dystack pid=14216)[0m 	Fitting 8 child models (S1F1 - S1F8) | Fitting with ParallelLocalFoldFittingStrategy (8 workers, per: cpus=1, gpus=0, memory=5.80%)
[36m(_dystack pid=14

[36m(_ray_fit pid=19100)[0m [1000]	valid_set's binary_logloss: 0.234353[32m [repeated 3x across cluster][0m


[36m(_dystack pid=14216)[0m 	0.9566	 = Validation score   (roc_auc)
[36m(_dystack pid=14216)[0m 	2.93s	 = Training   runtime
[36m(_dystack pid=14216)[0m 	0.06s	 = Validation runtime
[36m(_ray_fit pid=21324)[0m No improvement since epoch 4: early stopping[32m [repeated 6x across cluster][0m
[36m(_dystack pid=14216)[0m Fitting model: WeightedEnsemble_L3 ... Training model for up to 360.00s of the 7.26s of remaining time.
[36m(_dystack pid=14216)[0m 	Ensemble Weights: {'NeuralNetTorch_r30_BAG_L1': 0.542, 'CatBoost_BAG_L2': 0.167, 'XGBoost_r89_BAG_L2': 0.167, 'LightGBM_r96_BAG_L2': 0.083, 'NeuralNetFastAI_r143_BAG_L2': 0.042}
[36m(_dystack pid=14216)[0m 	0.9682	 = Validation score   (roc_auc)
[36m(_dystack pid=14216)[0m 	0.05s	 = Training   runtime
[36m(_dystack pid=14216)[0m 	0.0s	 = Validation runtime
[36m(_dystack pid=14216)[0m AutoGluon training complete, total runtime = 1787.83s ... Best model: WeightedEnsemble_L2 | Estimated inference throughput: 129.3 rows/s (4

Fitting model 1


Leaderboard on holdout data (DyStack):
                               model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0               CatBoost_BAG_L1_FULL       1.000000   0.964351     roc_auc        0.023450            NaN   9.368102                 0.023450                     NaN           9.368102            1       True          7
1           CatBoost_r13_BAG_L1_FULL       1.000000   0.960222     roc_auc        0.023601            NaN   6.685487                 0.023601                     NaN           6.685487            1       True         25
2          CatBoost_r137_BAG_L1_FULL       1.000000   0.964810     roc_auc        0.026111            NaN   4.324311                 0.026111                     NaN           4.324311            1       True         23
3          LightGBM_r130_BAG_L1_FULL       1.000000   0.966780     roc_auc       

Fitting model 2


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0      NeuralNetTorch_r79_BAG_L2_FULL       0.898333   0.839081     roc_auc        0.416283            NaN   7.719038                 0.060343                     NaN           0.541816            2       True         97
1      NeuralNetTorch_r14_BAG_L2_FULL       0.895000   0.842563     roc_auc        0.393064            NaN   7.307182                 0.037123                     NaN           0.129960            2       True        121
2          ExtraTrees_r42_BAG_L2_FULL       0.895000   0.833104     roc_auc        0.458230            NaN   7.872607                 0.102289                0.097192           0.695385            2       True        104
3     NeuralNetTorch_r143_BAG_L1_FULL       0.895000   0.839594     roc_auc  

Fitting model 3


Leaderboard on holdout data (DyStack):
                               model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0     NeuralNetTorch_r22_BAG_L1_FULL       0.951667   0.894727     roc_auc        0.047378            NaN   0.635646                 0.047378                     NaN           0.635646            1       True         20
1     NeuralNetTorch_r79_BAG_L1_FULL       0.951667   0.887737     roc_auc        0.051824            NaN   0.467729                 0.051824                     NaN           0.467729            1       True         15
2     NeuralNetTorch_r30_BAG_L1_FULL       0.950000   0.890935     roc_auc        0.057390            NaN   1.013287                 0.057390                     NaN           1.013287            1       True         30
3     NeuralNetTorch_r41_BAG_L1_FULL       0.946667   0.867336     roc_auc       

Fitting model 4


Leaderboard on holdout data (DyStack):
                               model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0     NeuralNetTorch_r41_BAG_L1_FULL       0.955000   0.968750     roc_auc        0.039396            NaN   0.829456                 0.039396                     NaN           0.829456            1       True         48
1     NeuralNetTorch_r14_BAG_L1_FULL       0.951667   0.969398     roc_auc        0.043230            NaN   0.794805                 0.043230                     NaN           0.794805            1       True         39
2     NeuralNetTorch_r79_BAG_L1_FULL       0.938333   0.969209     roc_auc        0.047839            NaN   0.522580                 0.047839                     NaN           0.522580            1       True         15
3           WeightedEnsemble_L2_FULL       0.935000   0.984510     roc_auc       

Fitting model 5


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0      NeuralNetTorch_r30_BAG_L2_FULL       0.898333   0.842752     roc_auc        0.484099            NaN  12.615671                 0.050728                     NaN           0.631337            2       True        103
1     NeuralNetFastAI_r37_BAG_L1_FULL       0.893333   0.832362     roc_auc        0.042235            NaN   0.426768                 0.042235                     NaN           0.426768            1       True         53
2      NeuralNetTorch_r14_BAG_L2_FULL       0.890000   0.831795     roc_auc        0.469635            NaN  12.292916                 0.036264                     NaN           0.308581            2       True        112
3      NeuralNetTorch_r22_BAG_L2_FULL       0.886667   0.828381     roc_auc  

Fitting model 6


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0      NeuralNetTorch_r30_BAG_L2_FULL       0.866667   0.750243     roc_auc        0.462286            NaN   8.051685                 0.050888                     NaN           0.705544            2       True        130
1             XGBoost_r22_BAG_L1_FULL       0.855000   0.713704     roc_auc        0.102059            NaN   0.072967                 0.102059                     NaN           0.072967            1       True         83
2           LightGBM_r161_BAG_L1_FULL       0.851667   0.713110     roc_auc        0.021099            NaN   0.228033                 0.021099                     NaN           0.228033            1       True         40
3            XGBoost_r194_BAG_L1_FULL       0.850000   0.693828     roc_auc  

Fitting model 7


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0     NeuralNetFastAI_r37_BAG_L1_FULL       0.931667   0.901419     roc_auc        0.041819            NaN   0.846815                 0.041819                     NaN           0.846815            1       True         53
1              LightGBMXT_BAG_L2_FULL       0.910000   0.902499     roc_auc        0.414632            NaN   6.262298                 0.027757                     NaN           0.155953            2       True         91
2            WeightedEnsemble_L3_FULL       0.908333   0.924196     roc_auc        0.479046            NaN   7.550977                 0.011573                     NaN           0.050675            3       True        135
3           LightGBM_r130_BAG_L2_FULL       0.906667   0.913199     roc_auc  

Fitting model 8


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val   fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0    NeuralNetFastAI_r102_BAG_L2_FULL       0.870000   0.861048     roc_auc        0.489048            NaN   7.663120                 0.034366                     NaN           0.300905            2       True        119
1      NeuralNetTorch_r79_BAG_L1_FULL       0.835000   0.869927     roc_auc        0.055256            NaN   0.489909                 0.055256                     NaN           0.489909            1       True         15
2      NeuralNetTorch_r14_BAG_L1_FULL       0.833333   0.854571     roc_auc        0.034377            NaN   0.369304                 0.034377                     NaN           0.369304            1       True         39
3     NeuralNetTorch_r143_BAG_L1_FULL       0.831667   0.873813     roc_auc  

Fitting model 9


Leaderboard on holdout data (DyStack):
                                model  score_holdout  score_val eval_metric  pred_time_test  pred_time_val  fit_time  pred_time_test_marginal  pred_time_val_marginal  fit_time_marginal  stack_level  can_infer  fit_order
0    NeuralNetFastAI_r102_BAG_L2_FULL       0.926667   0.846745     roc_auc        0.482571            NaN  4.315053                 0.070406                     NaN           0.186628            2       True        112
1      NeuralNetTorch_r14_BAG_L2_FULL       0.923333   0.844802     roc_auc        0.458462            NaN  4.440302                 0.046296                     NaN           0.311878            2       True        127
2      NeuralNetTorch_r79_BAG_L2_FULL       0.923333   0.843615     roc_auc        0.491429            NaN  4.599829                 0.079264                     NaN           0.471405            2       True        103
3      NeuralNetTorch_r14_BAG_L1_FULL       0.916667   0.850793     roc_auc      