In [None]:
import pandas as pd

from autogluon.tabular import TabularPredictor
from sklearn.model_selection import train_test_split

In [None]:
TIME_LIMIT = 2 * 3600

In [None]:
auxiliary_metrics = ['accuracy', 'balanced_accuracy', 'f1', 'f1_macro', 'f1_micro', 'roc_auc', 'average_precision', 'precision', 'recall', 'log_loss', 'pac_score']

In [None]:
for i in range(0, 10):
    print(f'Fitting model {i}')
    df_train = pd.read_csv(
        f'../../../data/synthetic/tvaes/set_{i}.csv'
        )

    predictor= TabularPredictor(
        label='ED_2Clases',
        problem_type='binary',
        eval_metric='roc_auc',
        sample_weight='balance_weight',
        path=f'AutogluonModels/synthetic/tvaes/v{i}'
    )

    predictor.fit(
        train_data = df_train,
        presets = ['high_quality'],
        time_limit = TIME_LIMIT,
        auto_stack = True,
        # excluded_model_types=['KNN','RF','XT', 'LR'],
        verbosity = 2
    )

    df_test = pd.read_csv(
        f'../../../data/test/set_{i}.csv'
    )

    predictors = predictor.leaderboard(
        df_test,
        extra_metrics = auxiliary_metrics,
        extra_info=True,
        silent=True
    )
    predictors.to_excel(
        f'../../../results/tvaes/predictors_set_{i}.xlsx',
        index=False
    )

    feature_importance = predictor.feature_importance(df_train)
    feature_importance.to_excel(
        f'../../../results/tvaes/feature_importance_set_{i}.xlsx',
        index=True
    )