In [1]:
import pandas as pd
import numpy as np
import re

In [2]:
results = pd.read_csv('results.csv')
results_22_05 = pd.read_csv('results_22-05.csv')
results_tan = pd.read_csv('results_tan.csv')
metafeatures = pd.read_csv('metafeatures.csv')

## Datasets

In [3]:
datasets = ['openml__credit-g__31', 'openml__electricity__219',
    'openml__elevators__3711', 'openml__MiniBooNE__168335',
    'openml__guillermo__168337', 'openml__higgs__146606',
    'openml__nomao__9977', 'openml__profb__3561', 'openml__socmob__3797',
    'openml__Australian__146818', 'openml__Bioresponse__9910', 'openml__SpeedDating__146607',
    'openml__ada_agnostic__3896', 'openml__airlines__189354', 'openml__colic__25',
    'openml__colic__27', 'openml__credit-approval__29', 'openml__heart-h__50',
    'openml__jasmine__168911', 'openml__kc1__3917', 'openml__phoneme__9952', 'openml__qsar-biodeg__9957'
]

In [4]:
meta_cols = ['dataset_name', 'f__pymfe.landmarking.best_node.mean', 'f__pymfe.landmarking.best_node.sd',
             'f__pymfe.landmarking.best_node.max', 'f__pymfe.landmarking.best_node.skewness', 'f__pymfe.landmarking.best_node.kurtosis',
             'f__pymfe.landmarking.best_node.iq_range'] 

metafeatures = metafeatures[meta_cols]

In [5]:
metafeatures['dataset_name'] = metafeatures['dataset_name'].apply(lambda x: re.sub(r'(__fold.*)', '', x))

In [6]:
metafeatures_filtered = metafeatures[metafeatures['dataset_name'].isin(datasets)]

In [7]:
metafeatures_grouped = metafeatures_filtered.groupby('dataset_name').mean()
metafeatures_grouped = metafeatures_grouped.reset_index()

In [8]:
metafeatures_grouped = metafeatures_grouped[metafeatures_grouped['dataset_name'] != 'openml__colic__27']

In [9]:
metafeatures_grouped['dataset_name'] = metafeatures_grouped['dataset_name'].apply(lambda x: re.sub(r'openml__', '', x))
metafeatures_grouped['dataset_name'] = metafeatures_grouped['dataset_name'].apply(lambda x: re.sub(r'__.*', '', x))

In [10]:
metafeatures_grouped.rename(columns={
    'f__pymfe.landmarking.best_node.mean': 'best_node_mean',
    'f__pymfe.landmarking.best_node.sd': 'best_node_std',
    'f__pymfe.landmarking.best_node.max': 'best_node_max',
    'f__pymfe.landmarking.best_node.skewness': 'best_node_skew',
    'f__pymfe.landmarking.best_node.kurtosis': 'best_node_kurt',
    'f__pymfe.landmarking.best_node.iq_range': 'best_node_iq_range'
}, inplace=True)

In [11]:
def get_n_instances_and_n_features(dataset_name):
    dataset = results[results['dataset_name'] == dataset_name]
    try:
        n_instances = dataset['dataset_num_instances'].values[0]
        n_features = dataset['dataset_num_features'].values[0]
        return n_instances, n_features
    except Exception as e:
        return None, None

In [12]:
metafeatures_grouped[['num_instances', 'num_features']] = metafeatures_grouped['dataset_name'].apply(
    lambda x: pd.Series(get_n_instances_and_n_features(x))
)

In [13]:
cols_order = ['dataset_name', 'num_instances', 'num_features', 'best_node_skew',
              'best_node_kurt', 'best_node_iq_range', 'best_node_mean',
              'best_node_std', 'best_node_max']
metafeatures_grouped = metafeatures_grouped[cols_order]

In [14]:
metafeatures_grouped = metafeatures_grouped[metafeatures_grouped['num_instances'].notna()]\
    .reset_index(drop=True)\
    .sort_values(
        by='dataset_name',
        key=lambda col: col.str.lower()
)

In [15]:
metafeatures_grouped['num_instances'] = metafeatures_grouped['num_instances'].astype(int)
metafeatures_grouped['num_features'] = metafeatures_grouped['num_features'].astype(int)

In [16]:
metafeatures_grouped

Unnamed: 0,dataset_name,num_instances,num_features,best_node_skew,best_node_kurt,best_node_iq_range,best_node_mean,best_node_std,best_node_max
1,ada_agnostic,4562,48,,,0.0,0.5,0.0,0.5
2,colic,368,26,0.284631,-1.173049,0.105702,0.59974,0.071867,0.714094
3,credit-approval,690,15,-0.529374,-0.979194,0.038305,0.719909,0.040102,0.771242
4,credit-g,1000,20,1.368262,1.624127,0.012128,0.507649,0.012861,0.532143
5,electricity,45312,8,-0.403566,-0.579314,0.009547,0.732736,0.009072,0.746601
6,elevators,16599,18,-0.428104,-0.773457,0.014233,0.697342,0.013756,0.717408
7,heart-h,294,13,0.126263,-0.931349,0.075955,0.765722,0.072736,0.88375
8,jasmine,2984,144,-0.244946,-1.321576,0.030983,0.762767,0.022844,0.792864
9,kc1,2109,21,,,0.0,0.5,0.0,0.5
10,nomao,34465,118,0.38072,-0.476791,0.00795,0.833644,0.007289,0.847559


In [17]:
metafeatures_grouped.to_latex(
    'metafeatures_grouped.tex',
    index=False,
    float_format='%.3f',
    escape=False,
    column_format='l' + 'r' * (len(metafeatures_grouped.columns) - 1),
    label='tab:metafeatures_grouped',
    caption='Grouped metafeatures for the datasets used in the experiments.'
)

## Results

In [18]:
results

Unnamed: 0,dataset_name,dataset_num_features,dataset_num_instances,model_name,train_time,val_time,test_time,train_log_loss,train_auc,train_accuracy,train_f1,val_log_loss,val_auc,val_accuracy,val_f1,test_log_loss,test_auc,test_accuracy,test_f1
0,ada_agnostic,48,4562,CatBoost,2.803,0.002,0.000,0.262,0.943,0.887,0.887,0.316,0.907,0.859,0.859,0.317,0.907,0.857,0.857
1,ada_agnostic,48,4562,CatBoost,2.067,0.000,0.000,0.247,0.954,0.896,0.896,0.321,0.904,0.856,0.856,0.326,0.901,0.852,0.852
2,ada_agnostic,48,4562,CatBoost,2.655,0.003,0.000,0.300,0.920,0.867,0.867,0.322,0.905,0.858,0.858,0.324,0.904,0.854,0.854
3,ada_agnostic,48,4562,CatBoost,1.747,0.000,0.002,0.253,0.950,0.892,0.892,0.322,0.904,0.853,0.853,0.325,0.902,0.852,0.852
4,ada_agnostic,48,4562,CatBoost,1.347,0.002,0.002,0.278,0.933,0.879,0.879,0.319,0.906,0.854,0.854,0.322,0.904,0.856,0.856
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
790,SpeedDating,120,8378,XGBoost,8.820,0.062,0.064,0.211,0.960,0.916,0.916,0.302,0.875,0.870,0.870,0.304,0.875,0.868,0.868
791,SpeedDating,120,8378,XGBoost,6.445,0.098,0.014,0.210,0.961,0.916,0.916,0.302,0.875,0.870,0.870,0.302,0.876,0.869,0.869
792,SpeedDating,120,8378,XGBoost,7.156,0.069,0.066,0.152,0.989,0.953,0.953,0.302,0.876,0.871,0.871,0.301,0.877,0.871,0.871
793,SpeedDating,120,8378,XGBoost,17.695,0.100,0.036,0.009,1.000,1.000,1.000,0.383,0.875,0.868,0.868,0.379,0.875,0.871,0.871


In [19]:
results_22_05

Unnamed: 0,dataset_name,dataset_num_features,dataset_num_instances,model_name,train_time,val_time,test_time,train_log_loss,train_auc,train_accuracy,train_f1,val_log_loss,val_auc,val_accuracy,val_f1,test_log_loss,test_auc,test_accuracy,test_f1
0,ada_agnostic,48,4562,EBMModel,11.040,0.001,0.001,0.285,0.927,0.869,0.869,0.321,0.905,0.853,0.853,0.321,0.906,0.853,0.853
1,ada_agnostic,48,4562,EBMModel,15.943,0.002,0.002,0.264,0.939,0.879,0.879,0.325,0.904,0.855,0.855,0.325,0.904,0.853,0.853
2,ada_agnostic,48,4562,EBMModel,27.447,0.001,0.001,0.269,0.937,0.877,0.877,0.321,0.906,0.853,0.853,0.320,0.906,0.855,0.855
3,ada_agnostic,48,4562,EBMModel,4.398,0.001,0.001,0.287,0.926,0.868,0.868,0.321,0.906,0.852,0.852,0.322,0.906,0.851,0.851
4,ada_agnostic,48,4562,EBMModel,11.892,0.001,0.002,0.271,0.935,0.875,0.875,0.324,0.904,0.852,0.852,0.325,0.904,0.851,0.851
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
175,SpeedDating,120,8378,NAMModel,43.489,0.057,0.059,0.333,0.848,0.852,0.852,0.340,0.839,0.850,0.850,0.339,0.840,0.848,0.848
176,SpeedDating,120,8378,NAMModel,40.852,0.209,0.220,0.337,0.843,0.849,0.849,0.341,0.838,0.846,0.846,0.341,0.838,0.850,0.850
177,SpeedDating,120,8378,NAMModel,55.375,0.048,0.046,0.329,0.852,0.852,0.852,0.337,0.842,0.847,0.847,0.337,0.842,0.850,0.850
178,SpeedDating,120,8378,NAMModel,141.860,0.555,0.490,0.324,0.858,0.855,0.855,0.332,0.848,0.852,0.852,0.332,0.847,0.851,0.851


In [20]:
results_tan

Unnamed: 0,dataset_name,dataset_num_features,dataset_num_instances,model_name,train_time,val_time,test_time,train_log_loss,train_auc,train_accuracy,train_f1,val_log_loss,val_auc,val_accuracy,val_f1,test_log_loss,test_auc,test_accuracy,test_f1
0,colic,26,368,TANModel,0.078,0.001,0.001,0.372,0.895,0.857,0.857,0.407,0.888,0.856,0.856,0.416,0.877,0.845,0.845
1,colic,26,368,TANModel,0.086,0.001,0.001,0.367,0.896,0.857,0.857,0.392,0.883,0.851,0.851,0.407,0.869,0.842,0.842
2,colic,26,368,TANModel,0.087,0.001,0.001,0.365,0.893,0.848,0.848,0.406,0.87,0.837,0.837,0.392,0.864,0.843,0.843
3,colic,26,368,TANModel,0.089,0.001,0.001,0.372,0.895,0.857,0.857,0.407,0.888,0.856,0.856,0.416,0.877,0.845,0.845
4,colic,26,368,TANModel,0.077,0.001,0.001,0.379,0.89,0.857,0.857,0.409,0.875,0.856,0.856,0.429,0.868,0.851,0.851
5,colic,26,368,TANModel,0.074,0.001,0.001,0.37,0.891,0.853,0.853,0.394,0.883,0.853,0.853,0.404,0.87,0.845,0.845
6,credit-approval,15,690,TANModel,0.084,0.004,0.003,0.307,0.939,0.877,0.877,0.345,0.931,0.868,0.868,0.348,0.927,0.857,0.857
7,credit-approval,15,690,TANModel,0.074,0.003,0.002,0.323,0.931,0.866,0.866,0.381,0.917,0.861,0.861,0.36,0.917,0.851,0.851
8,credit-approval,15,690,TANModel,0.074,0.003,0.002,0.327,0.928,0.864,0.864,0.344,0.925,0.862,0.862,0.356,0.916,0.851,0.851
9,credit-approval,15,690,TANModel,0.093,0.004,0.004,0.307,0.939,0.877,0.877,0.345,0.931,0.868,0.868,0.348,0.927,0.857,0.857


In [21]:
results = results[~results['model_name'].isin(['NAMModel', 'EBMModel'])]

In [22]:
results = pd.concat([results, results_22_05, results_tan], axis=0)

### Grouping by model

In [23]:
results.groupby(['model_name']).size()

model_name
CatBoost                   90
EBMModel                   90
GaussianNaiveBayesModel    90
LDAModel                   77
LightGBM                   90
LinearModelInterpret       90
NAMModel                   90
RandomForest               90
TANModel                   60
XGBoost                    90
dtype: int64

In [24]:
results

Unnamed: 0,dataset_name,dataset_num_features,dataset_num_instances,model_name,train_time,val_time,test_time,train_log_loss,train_auc,train_accuracy,train_f1,val_log_loss,val_auc,val_accuracy,val_f1,test_log_loss,test_auc,test_accuracy,test_f1
0,ada_agnostic,48,4562,CatBoost,2.803,0.002,0.000,0.262,0.943,0.887,0.887,0.316,0.907,0.859,0.859,0.317,0.907,0.857,0.857
1,ada_agnostic,48,4562,CatBoost,2.067,0.000,0.000,0.247,0.954,0.896,0.896,0.321,0.904,0.856,0.856,0.326,0.901,0.852,0.852
2,ada_agnostic,48,4562,CatBoost,2.655,0.003,0.000,0.300,0.920,0.867,0.867,0.322,0.905,0.858,0.858,0.324,0.904,0.854,0.854
3,ada_agnostic,48,4562,CatBoost,1.747,0.000,0.002,0.253,0.950,0.892,0.892,0.322,0.904,0.853,0.853,0.325,0.902,0.852,0.852
4,ada_agnostic,48,4562,CatBoost,1.347,0.002,0.002,0.278,0.933,0.879,0.879,0.319,0.906,0.854,0.854,0.322,0.904,0.856,0.856
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
55,socmob,5,1156,TANModel,0.065,0.003,0.003,0.198,0.963,0.927,0.927,0.227,0.955,0.926,0.926,0.226,0.958,0.925,0.925
56,socmob,5,1156,TANModel,0.057,0.003,0.003,0.195,0.964,0.922,0.922,0.214,0.960,0.922,0.922,0.221,0.959,0.923,0.923
57,socmob,5,1156,TANModel,0.056,0.003,0.003,0.206,0.958,0.920,0.920,0.214,0.956,0.926,0.926,0.217,0.956,0.920,0.920
58,socmob,5,1156,TANModel,0.052,0.003,0.003,0.211,0.959,0.916,0.916,0.222,0.953,0.918,0.918,0.221,0.955,0.918,0.918


In [25]:
results_grouped = results.groupby('model_name').agg(
    {'train_time': ['mean'],
     'test_time': ['mean'],
     'test_accuracy': ['mean'],
     'test_log_loss': ['mean'],
    }
).round(3)

In [26]:
results_grouped.columns = ['_'.join(col).strip() for col in results_grouped.columns.values]
results_grouped.reset_index()

Unnamed: 0,model_name,train_time_mean,test_time_mean,test_accuracy_mean,test_log_loss_mean
0,CatBoost,29.598,0.011,0.856,0.33
1,EBMModel,36.846,0.003,0.85,0.338
2,GaussianNaiveBayesModel,0.074,0.004,0.751,2.152
3,LDAModel,0.406,0.004,0.816,0.415
4,LightGBM,2.271,0.046,0.833,0.642
5,LinearModelInterpret,1.506,0.004,0.799,0.445
6,NAMModel,30.562,0.084,0.795,0.451
7,RandomForest,0.436,0.016,0.827,0.403
8,TANModel,6.61,0.017,0.797,0.441
9,XGBoost,12.072,0.034,0.854,0.34


In [27]:
results_grouped['test_accuracy_rank'] = results_grouped['test_accuracy_mean'].rank(ascending=False, method='min')
results_grouped['test_log_loss_rank'] = results_grouped['test_log_loss_mean'].rank(ascending=True, method='min')
results_grouped['train_time_rank'] = results_grouped['train_time_mean'].rank(ascending=True, method='min')
results_grouped['test_time_rank'] = results_grouped['test_time_mean'].rank(ascending=True, method='min')

In [28]:
results_grouped = results_grouped.reset_index()

In [29]:
results_grouped[['model_name', 'test_accuracy_mean', 'test_log_loss_mean', 
                 'test_accuracy_rank', 'test_log_loss_rank']]

Unnamed: 0,model_name,test_accuracy_mean,test_log_loss_mean,test_accuracy_rank,test_log_loss_rank
0,CatBoost,0.856,0.33,1.0,1.0
1,EBMModel,0.85,0.338,3.0,2.0
2,GaussianNaiveBayesModel,0.751,2.152,10.0,10.0
3,LDAModel,0.816,0.415,6.0,5.0
4,LightGBM,0.833,0.642,4.0,9.0
5,LinearModelInterpret,0.799,0.445,7.0,7.0
6,NAMModel,0.795,0.451,9.0,8.0
7,RandomForest,0.827,0.403,5.0,4.0
8,TANModel,0.797,0.441,8.0,6.0
9,XGBoost,0.854,0.34,2.0,3.0


In [30]:
results_grouped[['model_name', 'train_time_mean', 'test_time_mean', 'train_time_rank', 'test_time_rank']]

Unnamed: 0,model_name,train_time_mean,test_time_mean,train_time_rank,test_time_rank
0,CatBoost,29.598,0.011,8.0,5.0
1,EBMModel,36.846,0.003,10.0,1.0
2,GaussianNaiveBayesModel,0.074,0.004,1.0,2.0
3,LDAModel,0.406,0.004,2.0,2.0
4,LightGBM,2.271,0.046,5.0,9.0
5,LinearModelInterpret,1.506,0.004,4.0,2.0
6,NAMModel,30.562,0.084,9.0,10.0
7,RandomForest,0.436,0.016,3.0,6.0
8,TANModel,6.61,0.017,6.0,7.0
9,XGBoost,12.072,0.034,7.0,8.0


### Grouping by dataset

In [31]:
results = results.reset_index(drop=True)

In [32]:
# Obtener índices de la fila con mayor test_accuracy por grupo
idx = results.groupby(['model_name', 'dataset_name'])['test_accuracy'].idxmax()

# Seleccionar esas filas
best_results = results.loc[idx]
best_results = best_results.round(5).reset_index(drop=True)

In [33]:
best_results.loc[best_results['dataset_name'] == 'kc1', ['dataset_name', 'model_name', 'test_accuracy']]

Unnamed: 0,dataset_name,model_name,test_accuracy
9,kc1,CatBoost,0.859
24,kc1,EBMModel,0.859
39,kc1,GaussianNaiveBayesModel,0.831
54,kc1,LDAModel,0.858
69,kc1,LightGBM,0.859
84,kc1,LinearModelInterpret,0.856
99,kc1,NAMModel,0.851
114,kc1,RandomForest,0.859
126,kc1,TANModel,0.853
139,kc1,XGBoost,0.859


In [34]:
def get_best_model_and_accuracy_old(dataset_name):
    dataset = best_results[best_results['dataset_name'] == dataset_name]
    try:
        best_model = dataset.loc[dataset['test_accuracy'].idxmax()]['model_name']
        second_best_model = dataset.loc[dataset['test_accuracy'].nlargest(2).idxmin()]['model_name']
        third_best_model = dataset.loc[dataset['test_accuracy'].nlargest(3).idxmin()]['model_name']
        best_accuracy = dataset['test_accuracy'].max()
        second_best_accuracy = dataset['test_accuracy'].nlargest(2).min()
        third_best_accuracy = dataset['test_accuracy'].nlargest(3).min()
        return best_model, best_accuracy, second_best_model, second_best_accuracy, third_best_model, third_best_accuracy
    except Exception as e:
        return None, None, None, None, None, None

def get_best_model_and_accuracy(dataset_name):
    dataset = best_results[best_results['dataset_name'] == dataset_name]
    try:
        # Ordenamos por test_accuracy descendente y eliminamos duplicados por model_name
        top_unique_models = dataset.sort_values(by='test_accuracy', ascending=False).drop_duplicates(subset='model_name')
        
        # Seleccionamos los tres mejores
        top3 = top_unique_models.head(3)
        
        # Extraemos los nombres y accuracies
        best_model = top3.iloc[0]['model_name']
        best_accuracy = top3.iloc[0]['test_accuracy']
        
        second_best_model = top3.iloc[1]['model_name']
        second_best_accuracy = top3.iloc[1]['test_accuracy']
        
        third_best_model = top3.iloc[2]['model_name']
        third_best_accuracy = top3.iloc[2]['test_accuracy']
        
        return best_model, best_accuracy, second_best_model, second_best_accuracy, third_best_model, third_best_accuracy
    except Exception as e:
        return None, None, None, None, None, None

In [35]:
metafeatures_grouped[['best_model', 'best_accuracy', 'second_best_model', 'second_best_accuracy', 'third_best_model', 'third_best_accuracy']] = metafeatures_grouped['dataset_name'].apply(
    lambda x: pd.Series(get_best_model_and_accuracy(x))
)

In [36]:
metafeatures_grouped[['dataset_name', 'num_instances', 'num_features', 
                      'best_model', 'best_accuracy',
                      'second_best_model', 'second_best_accuracy',
                      'third_best_model', 'third_best_accuracy']] 

Unnamed: 0,dataset_name,num_instances,num_features,best_model,best_accuracy,second_best_model,second_best_accuracy,third_best_model,third_best_accuracy
1,ada_agnostic,4562,48,XGBoost,0.858,CatBoost,0.857,EBMModel,0.855
2,colic,368,26,XGBoost,0.875,CatBoost,0.867,RandomForest,0.858
3,credit-approval,690,15,EBMModel,0.88,CatBoost,0.877,RandomForest,0.871
4,credit-g,1000,20,XGBoost,0.769,EBMModel,0.76,CatBoost,0.746
5,electricity,45312,8,XGBoost,0.944,LightGBM,0.914,CatBoost,0.905
6,elevators,16599,18,EBMModel,0.905,LinearModelInterpret,0.899,CatBoost,0.898
7,heart-h,294,13,CatBoost,0.837,EBMModel,0.83,RandomForest,0.827
8,jasmine,2984,144,CatBoost,0.817,XGBoost,0.815,LightGBM,0.812
9,kc1,2109,21,CatBoost,0.859,EBMModel,0.859,LightGBM,0.859
10,nomao,34465,118,LightGBM,0.974,XGBoost,0.973,CatBoost,0.972


### Gráfico Accuracy vs Time

In [39]:
import plotly.express as px
import pandas as pd
import plotly

# Datos
data = {
    "model": [
        "CatBoost", "EBM", "GaussianNB", "LDA", "LightGBM",
        "LogisticReg", "NAM", "RandomForest", "TAN", "XGBoost"
    ],
    "accuracy_rank": [1, 3, 10, 6, 4, 7, 9, 5, 8, 2],
    "train_time_rank": [8, 10, 1, 2, 5, 4, 9, 3, 6, 7]
}

df = pd.DataFrame(data)

# Añadir columna de interpretabilidad
interpretable_models = {"EBM", "GaussianNB", "LDA", "LogisticReg", "NAM", "TAN"}
df["Interpretable"] = df["model"].apply(lambda x: "Sí" if x in interpretable_models else "No")

# Gráfico
fig = px.scatter(
    df,
    x="train_time_rank",
    y="accuracy_rank",
    text="model",
    color="Interpretable",
    color_discrete_map={"Sí": "green", "No": "red"},
    labels={
        "train_time_rank": "Ranking de Tiempo de Entrenamiento",
        "accuracy_rank": "Ranking de Accuracy",
        "Interpretable": "¿Interpretable?"
    }
)

# Invertir ejes (porque rank 1 es mejor que rank 10)
fig.update_layout(
    xaxis=dict(autorange="reversed"),
    yaxis=dict(autorange="reversed"),
    template="seaborn",
    width=700,
    height=600,
    font=dict(color="#000000", size=18),
    xaxis_title_font=dict(size=20),
    yaxis_title_font=dict(size=20),
    legend_title_font=dict(size=16),
)

fig.update_traces(
    textposition='top center',
    cliponaxis=False,
)

fig.show()






In [42]:
plotly.offline.plot(fig, image_filename="plot.svg", image='svg')

'temp-plot.html'

In [41]:
# Guardar el gráfico como archivo SVG
fig.write_image("trade_off_accuracy_vs_train_time.png", format="png")