In [1]:
import pandas as pd

In [2]:
performances = pd.read_csv('resource/d3m_leaderboard.csv')
performances = performances.round(2)
performances.head()

Unnamed: 0,Dataset,Task,Metric,AlphaD3M,AutonML,Ensemble,Aika,Distil AutoML,Autoflow,Axolotl,Drori
0,124_120_mnist_8747,IMAGE_CLASSIFICATION,accuracy,0.98,0.94,0.46,0.18,0.94,0.11,,
1,124_138_cifar100_1858,IMAGE_CLASSIFICATION,accuracy,0.67,0.48,0.42,0.12,0.48,0.01,,
2,124_16_fashion_mnist,IMAGE_CLASSIFICATION,accuracy,0.9,0.83,0.84,0.12,0.85,0.1,,
3,124_174_cifar10_MIN_METADATA,IMAGE_CLASSIFICATION,accuracy,0.88,0.82,0.84,0.27,0.8,0.1,,
4,124_188_usps_MIN_METADATA,IMAGE_CLASSIFICATION,accuracy,0.96,0.95,0.94,0.26,0.92,0.18,0.11,


In [10]:
# Utils

def calculate_rank(performances, ascending):
    ranks = performances.T
    ranks.columns = ranks.loc['Dataset'] # Set the column labels to equal the values in the 'Dataset' row
    ranks = ranks.drop(['Dataset', 'Task', 'Metric']) # Remove 'Dataset', 'Task', and 'Metric' rows
    ranks = ranks.rank(ascending=ascending, method='min')
    worst_rank = float(ranks.shape[0]) # Number of AutoML Systems
    ranks.fillna(worst_rank, inplace=True) # Add the worst rank to the systems that didn't produce pipelines
    
    return ranks

In [11]:
filter_by_metric = performances['Metric'].isin(['mean_absolute_error', 'mean_squared_error', 'root_mean_squared_error'])
error_based_tasks = performances[filter_by_metric]
nonerror_based_tasks = performances[~filter_by_metric]

In [12]:
nonerror_based_tasks.head()

Unnamed: 0,Dataset,Task,Metric,AlphaD3M,AutonML,Ensemble,Aika,Distil AutoML,Autoflow,Axolotl,Drori
0,124_120_mnist_8747,IMAGE_CLASSIFICATION,accuracy,0.98,0.94,0.46,0.18,0.94,0.11,,
1,124_138_cifar100_1858,IMAGE_CLASSIFICATION,accuracy,0.67,0.48,0.42,0.12,0.48,0.01,,
2,124_16_fashion_mnist,IMAGE_CLASSIFICATION,accuracy,0.9,0.83,0.84,0.12,0.85,0.1,,
3,124_174_cifar10_MIN_METADATA,IMAGE_CLASSIFICATION,accuracy,0.88,0.82,0.84,0.27,0.8,0.1,,
4,124_188_usps_MIN_METADATA,IMAGE_CLASSIFICATION,accuracy,0.96,0.95,0.94,0.26,0.92,0.18,0.11,


In [13]:
error_based_tasks.head()

Unnamed: 0,Dataset,Task,Metric,AlphaD3M,AutonML,Ensemble,Aika,Distil AutoML,Autoflow,Axolotl,Drori
10,196_autoMpg_MIN_METADATA,TABULAR_REGRESSION,mean_squared_error,6.57,9.12,5.74,11.95,7.49,6.01,15.36,7.03
11,22_handgeometry_MIN_METADATA,IMAGE_REGRESSION,mean_squared_error,0.24,0.23,0.23,0.14,0.8,0.36,0.36,
12,26_radon_seed_MIN_METADATA,TABULAR_REGRESSION,root_mean_squared_error,0.02,0.02,0.24,0.03,0.02,0.06,1.4,0.02
23,534_cps_85_wages_MIN_METADATA,TABULAR_REGRESSION,mean_squared_error,20.11,20.35,22.07,23.15,24.86,21.44,,20.7
24,56_sunspots_MIN_METADATA,TIME_SERIES_FORECASTING,root_mean_squared_error,34.55,11.82,8.64,8.45,58.3,9.4,90.6,


In [14]:
rank1 = calculate_rank(nonerror_based_tasks, False)
rank1

Dataset,124_120_mnist_8747,124_138_cifar100_1858,124_16_fashion_mnist,124_174_cifar10_MIN_METADATA,124_188_usps_MIN_METADATA,124_214_coil20_MIN_METADATA,124_95_uc_merced_land_use_MIN_METADATA,1491_one_hundred_plants_margin_MIN_METADATA,1567_poker_hand_MIN_METADATA,185_baseball_MIN_METADATA,...,SEMI_1459_artificial_characters_MIN_METADATA,SEMI_155_pokerhand_MIN_METADATA,loan_status_MIN_METADATA,political_instability_MIN_METADATA,uu1_datasmash_MIN_METADATA,uu4_SPECT_MIN_METADATA,uu5_heartstatlog_MIN_METADATA,uu6_hepatitis_MIN_METADATA,uu7_pima_diabetes_MIN_METADATA,uu_101_object_categories_MIN_METADATA
AlphaD3M,1.0,1.0,1.0,1.0,1.0,1.0,1.0,6.0,1.0,5.0,...,4.0,4.0,6.0,6.0,1.0,7.0,3.0,7.0,4.0,1.0
AutonML,2.0,2.0,4.0,3.0,2.0,1.0,8.0,7.0,2.0,1.0,...,1.0,1.0,2.0,1.0,1.0,1.0,4.0,2.0,5.0,2.0
Ensemble,4.0,4.0,3.0,2.0,3.0,1.0,2.0,2.0,4.0,7.0,...,3.0,3.0,1.0,1.0,1.0,1.0,1.0,1.0,2.0,3.0
Aika,5.0,5.0,5.0,5.0,5.0,5.0,3.0,1.0,3.0,2.0,...,1.0,7.0,5.0,1.0,1.0,3.0,6.0,4.0,5.0,4.0
Distil AutoML,2.0,2.0,2.0,4.0,4.0,4.0,8.0,8.0,7.0,2.0,...,5.0,2.0,7.0,1.0,7.0,5.0,7.0,6.0,2.0,8.0
Autoflow,6.0,6.0,6.0,6.0,6.0,8.0,5.0,3.0,6.0,4.0,...,6.0,6.0,8.0,8.0,1.0,3.0,1.0,5.0,1.0,5.0
Axolotl,8.0,8.0,8.0,8.0,7.0,8.0,4.0,5.0,8.0,5.0,...,7.0,5.0,3.0,5.0,1.0,6.0,5.0,3.0,5.0,8.0
Drori,8.0,8.0,8.0,8.0,8.0,8.0,8.0,3.0,5.0,8.0,...,8.0,8.0,4.0,8.0,8.0,8.0,8.0,8.0,8.0,8.0


In [15]:
rank2 = calculate_rank(error_based_tasks, True)
rank2

Dataset,196_autoMpg_MIN_METADATA,22_handgeometry_MIN_METADATA,26_radon_seed_MIN_METADATA,534_cps_85_wages_MIN_METADATA,56_sunspots_MIN_METADATA,56_sunspots_monthly_MIN_METADATA,60_jester_MIN_METADATA,LL0_207_autoPrice_MIN_METADATA,LL1_736_population_spawn_MIN_METADATA,LL1_736_population_spawn_simpler_MIN_METADATA,...,LL1_retail_sales_total_MIN_METADATA,LL1_terra_canopy_height_long_form_s4_100_MIN_METADATA,LL1_terra_canopy_height_long_form_s4_70_MIN_METADATA,LL1_terra_canopy_height_long_form_s4_80_MIN_METADATA,LL1_terra_canopy_height_long_form_s4_90_MIN_METADATA,LL1_terra_leaf_angle_mean_long_form_s4_MIN_METADATA,kaggle_music_hackathon_MIN_METADATA,uu2_gp_hyperparameter_estimation_MIN_METADATA,uu3_world_development_indicators_MIN_METADATA,uu3_world_development_indicators_raw
AlphaD3M,3.0,4.0,1.0,1.0,5.0,6.0,3.0,1.0,1.0,2.0,...,5.0,5.0,1.0,6.0,6.0,1.0,5.0,3.0,1.0,3.0
AutonML,6.0,2.0,1.0,2.0,4.0,3.0,8.0,4.0,4.0,4.0,...,1.0,3.0,6.0,4.0,4.0,5.0,1.0,2.0,4.0,2.0
Ensemble,1.0,2.0,7.0,5.0,2.0,4.0,2.0,6.0,3.0,6.0,...,2.0,1.0,2.0,1.0,1.0,3.0,2.0,1.0,2.0,1.0
Aika,7.0,1.0,5.0,6.0,1.0,2.0,1.0,7.0,2.0,3.0,...,3.0,2.0,3.0,3.0,2.0,1.0,6.0,3.0,8.0,8.0
Distil AutoML,5.0,7.0,1.0,7.0,6.0,8.0,8.0,2.0,8.0,1.0,...,6.0,8.0,8.0,8.0,8.0,8.0,3.0,8.0,3.0,8.0
Autoflow,2.0,5.0,6.0,4.0,3.0,5.0,4.0,3.0,5.0,5.0,...,8.0,4.0,5.0,5.0,5.0,8.0,8.0,8.0,8.0,8.0
Axolotl,8.0,5.0,8.0,8.0,7.0,1.0,8.0,8.0,8.0,7.0,...,4.0,6.0,4.0,2.0,3.0,3.0,8.0,8.0,8.0,8.0
Drori,4.0,8.0,1.0,3.0,8.0,8.0,8.0,5.0,8.0,8.0,...,7.0,8.0,8.0,8.0,8.0,8.0,4.0,3.0,8.0,8.0


In [16]:
ranks = pd.concat([rank1, rank2], axis=1, join='inner')
ranks

Dataset,124_120_mnist_8747,124_138_cifar100_1858,124_16_fashion_mnist,124_174_cifar10_MIN_METADATA,124_188_usps_MIN_METADATA,124_214_coil20_MIN_METADATA,124_95_uc_merced_land_use_MIN_METADATA,1491_one_hundred_plants_margin_MIN_METADATA,1567_poker_hand_MIN_METADATA,185_baseball_MIN_METADATA,...,LL1_retail_sales_total_MIN_METADATA,LL1_terra_canopy_height_long_form_s4_100_MIN_METADATA,LL1_terra_canopy_height_long_form_s4_70_MIN_METADATA,LL1_terra_canopy_height_long_form_s4_80_MIN_METADATA,LL1_terra_canopy_height_long_form_s4_90_MIN_METADATA,LL1_terra_leaf_angle_mean_long_form_s4_MIN_METADATA,kaggle_music_hackathon_MIN_METADATA,uu2_gp_hyperparameter_estimation_MIN_METADATA,uu3_world_development_indicators_MIN_METADATA,uu3_world_development_indicators_raw
AlphaD3M,1.0,1.0,1.0,1.0,1.0,1.0,1.0,6.0,1.0,5.0,...,5.0,5.0,1.0,6.0,6.0,1.0,5.0,3.0,1.0,3.0
AutonML,2.0,2.0,4.0,3.0,2.0,1.0,8.0,7.0,2.0,1.0,...,1.0,3.0,6.0,4.0,4.0,5.0,1.0,2.0,4.0,2.0
Ensemble,4.0,4.0,3.0,2.0,3.0,1.0,2.0,2.0,4.0,7.0,...,2.0,1.0,2.0,1.0,1.0,3.0,2.0,1.0,2.0,1.0
Aika,5.0,5.0,5.0,5.0,5.0,5.0,3.0,1.0,3.0,2.0,...,3.0,2.0,3.0,3.0,2.0,1.0,6.0,3.0,8.0,8.0
Distil AutoML,2.0,2.0,2.0,4.0,4.0,4.0,8.0,8.0,7.0,2.0,...,6.0,8.0,8.0,8.0,8.0,8.0,3.0,8.0,3.0,8.0
Autoflow,6.0,6.0,6.0,6.0,6.0,8.0,5.0,3.0,6.0,4.0,...,8.0,4.0,5.0,5.0,5.0,8.0,8.0,8.0,8.0,8.0
Axolotl,8.0,8.0,8.0,8.0,7.0,8.0,4.0,5.0,8.0,5.0,...,4.0,6.0,4.0,2.0,3.0,3.0,8.0,8.0,8.0,8.0
Drori,8.0,8.0,8.0,8.0,8.0,8.0,8.0,3.0,5.0,8.0,...,7.0,8.0,8.0,8.0,8.0,8.0,4.0,3.0,8.0,8.0


In [17]:
ranks['average_rank'] = ranks.mean(axis=1) # Add a column with average rank
ranks = ranks.round({'average_rank': 3})
ranks['winner_pipelines'] = ranks[ranks == 1.0].count(axis=1) # Add a column with winner pipelines
ranks

Dataset,124_120_mnist_8747,124_138_cifar100_1858,124_16_fashion_mnist,124_174_cifar10_MIN_METADATA,124_188_usps_MIN_METADATA,124_214_coil20_MIN_METADATA,124_95_uc_merced_land_use_MIN_METADATA,1491_one_hundred_plants_margin_MIN_METADATA,1567_poker_hand_MIN_METADATA,185_baseball_MIN_METADATA,...,LL1_terra_canopy_height_long_form_s4_70_MIN_METADATA,LL1_terra_canopy_height_long_form_s4_80_MIN_METADATA,LL1_terra_canopy_height_long_form_s4_90_MIN_METADATA,LL1_terra_leaf_angle_mean_long_form_s4_MIN_METADATA,kaggle_music_hackathon_MIN_METADATA,uu2_gp_hyperparameter_estimation_MIN_METADATA,uu3_world_development_indicators_MIN_METADATA,uu3_world_development_indicators_raw,average_rank,winner_pipelines
AlphaD3M,1.0,1.0,1.0,1.0,1.0,1.0,1.0,6.0,1.0,5.0,...,1.0,6.0,6.0,1.0,5.0,3.0,1.0,3.0,2.848,49
AutonML,2.0,2.0,4.0,3.0,2.0,1.0,8.0,7.0,2.0,1.0,...,6.0,4.0,4.0,5.0,1.0,2.0,4.0,2.0,2.893,39
Ensemble,4.0,4.0,3.0,2.0,3.0,1.0,2.0,2.0,4.0,7.0,...,2.0,1.0,1.0,3.0,2.0,1.0,2.0,1.0,2.902,30
Aika,5.0,5.0,5.0,5.0,5.0,5.0,3.0,1.0,3.0,2.0,...,3.0,3.0,2.0,1.0,6.0,3.0,8.0,8.0,3.991,21
Distil AutoML,2.0,2.0,2.0,4.0,4.0,4.0,8.0,8.0,7.0,2.0,...,8.0,8.0,8.0,8.0,3.0,8.0,3.0,8.0,4.679,20
Autoflow,6.0,6.0,6.0,6.0,6.0,8.0,5.0,3.0,6.0,4.0,...,5.0,5.0,5.0,8.0,8.0,8.0,8.0,8.0,5.321,11
Axolotl,8.0,8.0,8.0,8.0,7.0,8.0,4.0,5.0,8.0,5.0,...,4.0,2.0,3.0,3.0,8.0,8.0,8.0,8.0,5.732,10
Drori,8.0,8.0,8.0,8.0,8.0,8.0,8.0,3.0,5.0,8.0,...,8.0,8.0,8.0,8.0,4.0,3.0,8.0,8.0,6.848,7


In [18]:
ranks = ranks[['winner_pipelines', 'average_rank']]
ranks

Dataset,winner_pipelines,average_rank
AlphaD3M,49,2.848
AutonML,39,2.893
Ensemble,30,2.902
Aika,21,3.991
Distil AutoML,20,4.679
Autoflow,11,5.321
Axolotl,10,5.732
Drori,7,6.848
