# Results:

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

Table of content : 
1. Global performances
2. Time
3. Top1 Count
4. Per Dataset analysis
5. Per architecture analysis

In [2]:
# path is different from output to circumvent the gitignore instructions
output_path = "results/"
save_repository = "results/"

In [3]:
# We also ran an experiment on a smaller set of datasets, using bagging over 10 weak learners for each method
use_bagging_results = 0 # 0 means no bagging, 1 means using bagging
suffix_save_path = "bagging_" * use_bagging_results

#### We ran 8060 experiments in regression, corresponding to 26 datasets for 31 methods with 10 random seeds.  In global average RMSE; AdaCap+SNN is first, GLU is second, CAT is fifth. 

In [7]:
if use_bagging_results:
    regression_df_formated = pd.read_csv(output_path+"regression_bagging.csv", index_col = 0)
    # New method name = method name + bagging 
    regression_df_formated["method"] = [value + "_bagging"*isbag for value, isbag in zip(regression_df_formated["method"].values,regression_df_formated["Use_bagging"].values)]
    # keep only first method seed to evaluate single learners
    regression_df_formated = regression_df_formated[regression_df_formated["method_seed"]==0]
else:
    regression_df_formated = pd.read_csv(output_path+"regression_benchmark.csv", index_col = 0)
regression_df_formated["R2"] = regression_df_formated["R2"].values.astype(float)
regression_df_formated["RMSE"] = np.sqrt(1. - regression_df_formated["R2"].values)

# 1. Global stats across all datasets averaged over 10 seeds


### 1.A Global stats: R2 score
In global average R2; CAT is first, AdaCap+SNN is second, GLU is third.

In [8]:
global_r2_order = regression_df_formated.groupby("method").mean().sort_values("R2").iloc[::-1].index.values
regression_df_formated[['dataset_seed', 'dataset_name',"method","R2"]].set_index("method").pivot_table(values="R2", index=['dataset_seed', 'dataset_name'],
                    columns=['method'], aggfunc=np.sum)[global_r2_order].describe().loc[["mean","std","min","25%","50%","75%","max"]]

method,CAT,adacapnetselu,adacapnetresblock,regularnetglu,regularnetstandard,adacapnetbatchresblock,adacapnetstandard,adacapnetfastselu,adacapnetfast,adacapnetglu,...,NuSVM,XRF,Enet,Lasso,regularnetbatchstandard,MARS,Ridge,CART,Kernel,Intercept
mean,0.772448,0.771019,0.766774,0.766223,0.7653,0.764078,0.763541,0.763255,0.761453,0.76133,...,0.72855,0.720501,0.702501,0.702078,0.701647,0.697824,0.661025,0.570802,0.542907,-0.01576047
std,0.223197,0.228893,0.22653,0.232569,0.239274,0.224077,0.240056,0.233555,0.231419,0.252776,...,0.220941,0.361331,0.248037,0.249883,0.216932,0.295101,0.374696,0.461553,0.916477,0.04387568
min,-0.402508,-0.267542,-0.100717,-0.391394,-0.698491,-0.100717,-0.264898,-0.356381,-0.228752,-0.386675,...,-0.174411,-1.65111,-0.360848,-0.441897,-0.21125,-1.096739,-1.792123,-2.800023,-6.440563,-0.4926711
25%,0.664486,0.621369,0.633756,0.631819,0.642985,0.635799,0.630829,0.614657,0.625586,0.632503,...,0.598709,0.607137,0.562069,0.563544,0.586803,0.584504,0.542258,0.399216,0.52288,-0.01141847
50%,0.822513,0.849194,0.839521,0.854901,0.849839,0.828021,0.839951,0.833515,0.828784,0.856584,...,0.790562,0.821593,0.777346,0.777697,0.728246,0.777522,0.760216,0.694567,0.754622,-0.001557771
75%,0.948972,0.958954,0.951684,0.952774,0.950078,0.954102,0.948954,0.959755,0.945448,0.953509,...,0.901779,0.922504,0.888873,0.889988,0.865729,0.894973,0.891403,0.837555,0.893591,-0.0001844964
max,0.996929,0.999133,0.999264,0.999099,0.999191,0.999524,0.999198,0.999572,0.998602,0.999556,...,0.997028,0.995957,0.986195,0.986017,0.978427,0.984711,0.98586,0.985416,0.985887,-1.54166e-07


### 1.B Global stats: RMSE score

In [9]:
global_r2_order = regression_df_formated.groupby("method").mean().sort_values("RMSE").index.values
regression_df_formated[['dataset_seed', 'dataset_name',"method","RMSE"]].set_index("method").pivot_table(values="RMSE", index=['dataset_seed', 'dataset_name'],
                    columns=['method'], aggfunc=np.sum)[global_r2_order].describe().loc[["mean","std","min","25%","50%","75%","max"]]

method,adacapnetselu,regularnetglu,adacapnetglu,adacapnetresblock,CAT,regularnetstandard,adacapnetstandard,adacapnetfastselu,adacapnetbatchresblock,regularnetselu,...,xgb_sklearn,NuSVM,MARS,Enet,Lasso,regularnetbatchstandard,Ridge,Kernel,CART,Intercept
mean,0.414782,0.420134,0.42063,0.421447,0.422151,0.423022,0.423317,0.424583,0.425708,0.426012,...,0.456943,0.474233,0.495034,0.500131,0.500362,0.508979,0.521812,0.559008,0.583542,1.007642
std,0.239075,0.239761,0.248954,0.236268,0.222555,0.236575,0.239755,0.238101,0.234318,0.240294,...,0.237744,0.216178,0.239454,0.218061,0.218502,0.198608,0.258737,0.381001,0.29836,0.020477
min,0.029453,0.030016,0.02108,0.027131,0.055419,0.02844,0.028313,0.020691,0.021808,0.027692,...,0.055007,0.054514,0.123648,0.117495,0.11825,0.146879,0.118912,0.1188,0.120765,1.0
25%,0.202551,0.217291,0.215582,0.219807,0.225883,0.223372,0.225895,0.200578,0.214189,0.204294,...,0.295625,0.313356,0.324079,0.333355,0.331681,0.366425,0.32954,0.326204,0.403044,1.000092
50%,0.388337,0.380917,0.378703,0.400598,0.421289,0.387505,0.400056,0.407997,0.414703,0.406079,...,0.42624,0.457644,0.471674,0.471862,0.471489,0.521297,0.489661,0.495354,0.552646,1.000779
75%,0.61533,0.606779,0.606215,0.605181,0.579236,0.597507,0.607595,0.620759,0.60349,0.62507,...,0.632234,0.633475,0.644561,0.661764,0.660648,0.642804,0.676565,0.690727,0.775101,1.005693
max,1.125852,1.179574,1.177572,1.049151,1.184275,1.303262,1.124677,1.164638,1.049151,1.112255,...,1.395383,1.083702,1.448012,1.166554,1.20079,1.100568,1.670965,2.72774,1.949365,1.221749


### 1.C RMSE ratio 
- RMSE measures an error. It makes more sense to compare RMSE values in relative terms. If one dataset 1 the RMSE of new method is 0.001 and the RMSE of old method is 0.002, new method cuts error in half, which is massive. Meanwhile if on dataset 2, the RMSE of new method is 0.101 and the RMSE of old method is 0.1, new method increases error by 1%, which is negligeable. Yet when averaging raw RMSE values accross the two datasets, both methods have the same average RMSE of 0.051, which would indicate that new method is not usefull.
- Note that when averaging over 10 seeds, the method which performs best can have an average "Ratio_Best_RMSE_seed" above 1, if it did not uniformly outperformed all others over all seeds.

Metric "Ratio_Best_RMSE_seed" compares RMSE of a method on a dataset for a seed to minimal RMSE on same dataset with same seed : 
$$r_{best}(method_i, seed) = R\!M\!S\!E(method_i, seed) / min_{method_j}R\!M\!S\!E(method_j, seed)$$

In [10]:
regression_df_formated.set_index(["dataset_name","dataset_seed"], inplace = True)
regression_df_formated["Best_RMSE_seed"] = regression_df_formated.reset_index().groupby(["dataset_name","dataset_seed"]).min()["RMSE"]
regression_df_formated.reset_index(inplace = True)
regression_df_formated["Ratio_Best_RMSE_seed"] = (regression_df_formated["RMSE"] / regression_df_formated["Best_RMSE_seed"])

$\overline{r}_{best}$ is the average of the ratio $r_{best}(method, seed)$ over the 10 seeds.

In [9]:
regression_df_formated[['dataset_seed', 'dataset_name',"method","Ratio_Best_RMSE_seed"]].set_index("method").pivot_table(values="Ratio_Best_RMSE_seed", index=['dataset_name','dataset_seed'],
                    columns=['method'], aggfunc=np.mean)[regression_df_formated.groupby('method').mean().sort_values("Ratio_Best_RMSE_seed").index.values].describe().loc[["mean","std","min","25%","50%","75%","max"]]

method,adacapnetselu,adacapnetglu,regularnetglu,regularnetselu,adacapnetresblock,adacapnetfastselu,adacapnetstandard,adacapnetbatchresblock,regularnetstandard,adacapnetfast,...,LGBM,MARS,NuSVM,Enet,Lasso,Ridge,Kernel,CART,regularnetbatchstandard,Intercept
mean,1.139175,1.152071,1.165654,1.179055,1.182268,1.186416,1.195867,1.200271,1.220299,1.22821,...,1.599467,1.688426,1.716447,1.834633,1.835863,1.863537,1.902652,2.007749,2.34344,5.369018
std,0.148134,0.203107,0.201748,0.177228,0.22682,0.184181,0.260941,0.21949,0.365312,0.299203,...,1.31563,1.078132,1.645271,1.530073,1.532184,1.504229,1.491512,1.594833,3.599478,7.234634
min,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.0,...,1.0,1.0,1.0,1.0,1.0,1.0,1.0,1.002347,1.0,1.031421
25%,1.036208,1.037692,1.044721,1.056265,1.064823,1.044513,1.068379,1.06797,1.053135,1.076666,...,1.04511,1.078865,1.071141,1.076575,1.076541,1.122867,1.126098,1.385358,1.067128,1.803875
50%,1.098299,1.094647,1.092601,1.13092,1.112487,1.131039,1.118322,1.121335,1.107551,1.140767,...,1.121756,1.284192,1.204499,1.265571,1.25951,1.326181,1.421718,1.488266,1.183701,2.917815
75%,1.186219,1.163815,1.205491,1.246737,1.203532,1.272601,1.20824,1.281662,1.215019,1.270311,...,1.50782,1.899551,1.495622,1.912961,1.912842,1.919075,2.108355,1.703126,1.779089,6.404077
max,2.129441,2.505845,2.295633,2.116076,2.742705,1.938783,2.78026,2.42261,4.322058,3.22866,...,8.539295,8.901962,12.126352,12.061967,12.114954,11.925094,11.938569,11.661189,30.555804,50.60558


Metric "Ratio_CAT_RMSE_seed" compares RMSE of a method on a dataset for a seed to CatBoost RMSE on same dataset with same seed. This provides a comparison to assess in relative terms :
$$r_{C\!A\!T}(method_i, seed) = R\!M\!S\!E(method_i, seed) / R\!M\!S\!E(C\!A\!T, seed)$$

In [12]:
regression_df_formated.set_index(["dataset_name","dataset_seed"], inplace = True)
regression_df_formated["CAT_RMSE_seed"] = regression_df_formated[regression_df_formated["method"] == "CAT"]["RMSE"]
regression_df_formated.reset_index(inplace = True)
regression_df_formated["Ratio_CAT_RMSE_seed"] = regression_df_formated["RMSE"] / regression_df_formated["CAT_RMSE_seed"]

$\overline{r}_{Cat}$ is the average of the ratio $r_{Cat}(method_i, seed)$ over the 10 seeds.

In [13]:
regression_df_formated[['dataset_seed', 'dataset_name',"method","Ratio_CAT_RMSE_seed"]].set_index("method").pivot_table(values="Ratio_CAT_RMSE_seed", index=['dataset_name','dataset_seed'],
                    columns=['method'], aggfunc=np.mean)[regression_df_formated.groupby('method').mean().sort_values("Ratio_CAT_RMSE_seed").index.values].describe().loc[["mean","std","min","25%","50%","75%","max"]]

method,adacapnetselu,regularnetglu,adacapnetglu,adacapnetresblock,CAT,regularnetstandard,adacapnetstandard,regularnetselu,adacapnetbatchresblock,regularnetresblock,...,CATfast,NuSVM,MARS,Enet,Lasso,Ridge,Kernel,CART,regularnetbatchstandard,Intercept
mean,0.983867,0.991217,0.991892,0.999954,1.0,1.003704,1.007508,1.010595,1.016476,1.018829,...,1.139577,1.217576,1.31262,1.365432,1.365949,1.396717,1.435758,1.459175,1.467487,3.591573
std,0.258397,0.245082,0.266702,0.238916,0.0,0.212262,0.240388,0.26007,0.239299,0.25213,...,0.242351,0.409188,0.51051,0.585903,0.586973,0.577642,0.588434,0.376277,1.030454,2.921886
min,0.145729,0.187779,0.145356,0.134242,1.0,0.193143,0.140874,0.137017,0.107902,0.278597,...,0.688023,0.551066,0.356957,0.453914,0.455638,0.442467,0.437531,0.82314,0.853352,0.851708
25%,0.948626,0.942252,0.943057,0.958389,1.0,0.966055,0.960554,0.976444,0.970584,0.964934,...,0.997108,1.011812,1.017878,1.002067,1.004111,1.04102,1.052809,1.285265,1.01859,1.733133
50%,1.015272,1.027238,1.020433,1.01923,1.0,1.018989,1.0225,1.043607,1.029649,1.02995,...,1.047229,1.12176,1.112944,1.119348,1.128461,1.213158,1.255861,1.365491,1.115814,2.402214
75%,1.10328,1.107552,1.101933,1.10082,1.0,1.10649,1.099304,1.126986,1.126504,1.102198,...,1.291643,1.301138,1.499615,1.572579,1.545959,1.556393,1.637336,1.513899,1.446477,4.4346
max,1.677345,1.63857,1.770698,1.860777,1.0,1.789125,1.803003,2.116076,1.746896,3.163954,...,2.57509,4.07533,4.451063,4.476598,4.519085,4.417699,4.414622,2.779147,10.335458,18.386053


### 1.D Global Ranking All metrics sorted by RMSE

In [12]:
regression_df_formated.groupby('method').mean().sort_values("Ratio_Best_RMSE_seed")[["RMSE","R2","Ratio_Best_RMSE_seed", "Ratio_CAT_RMSE_seed"]]

Unnamed: 0_level_0,RMSE,R2,Ratio_Best_RMSE_seed,Ratio_CAT_RMSE_seed
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
adacapnetselu,0.414782,0.771019,1.139175,0.983867
adacapnetglu,0.42063,0.76133,1.152071,0.991892
regularnetglu,0.420134,0.766223,1.165654,0.991217
regularnetselu,0.426012,0.760994,1.179055,1.010595
adacapnetresblock,0.421447,0.766774,1.182268,0.999954
adacapnetfastselu,0.424583,0.763255,1.186416,1.023041
adacapnetstandard,0.423317,0.763541,1.195867,1.007508
adacapnetbatchresblock,0.425708,0.764078,1.200271,1.016476
regularnetstandard,0.423022,0.7653,1.220299,1.003704
adacapnetfast,0.428157,0.761453,1.22821,1.024992


- Note that when averaging over 10 seeds, the method which performs best can have an average "Ratio_Best_RMSE_seed" above 1, if it did not uniformly outperformed all others over all seeds.

# 2. Time comparison of top k best methods
- Time in seconds averaged over all datasets

In [5]:
k = 10 #Up to 31 methods
regression_df_formated.groupby("method").mean().sort_values("RMSE")["time"].iloc[:k]

method
adacapnetselu              19.798833
regularnetglu               9.891143
adacapnetglu               22.355343
adacapnetresblock          17.192343
CAT                        92.518085
regularnetstandard          4.058149
adacapnetstandard          17.208227
adacapnetfastselu           7.667071
adacapnetbatchresblock    194.728147
regularnetselu              7.189570
Name: time, dtype: float64

# 3. Top 1 performance count

In [14]:
metric_id = 0 # Value between 0 and 4
metric = ["RMSE","R2","Ratio_Best_RMSE_seed", "Ratio_CAT_RMSE_seed", "time"][metric_id]

granularity_id = 0 # Value between 0 and 1
granularity = ["method","category"][granularity_id]

metric_order = [1,-1,1,1,1][metric_id]
pd.DataFrame.from_dict(data = {**{key:0 for key in regression_df_formated[granularity].unique()},**{key:value for key, value in regression_df_formated.groupby(["dataset_name","method","category"]).mean().reset_index().sort_values(["dataset_name",metric]).iloc[::metric_order].groupby(["dataset_name"]).first()[[granularity,metric]].groupby(granularity).count().sort_values(metric).iloc[::metric_order][metric].iteritems()}}, orient= "index", columns = ["#Datasets with "+ metric +" Top1 performance"]).sort_values("#Datasets with "+ metric +" Top1 performance").iloc[::-1]

Unnamed: 0,#Datasets with RMSE Top1 performance
CAT,8
adacapnetglu,3
xgb_sklearn,3
XRF,2
Kernel,1
adacapnetselu,1
adacapnetfastselu,1
CATfast,1
Ridge,1
regularnetfastselu,1


# 4. Per Dataset Granularity

### 4.A Best performing method per dataset (averaged over 10 seeds)

In [15]:
regression_df_formated.groupby(["dataset_name","method","category"]).mean().reset_index().sort_values(["dataset_name","R2"]).groupby("dataset_name").last()[["category","method","n","p", "RMSE","R2","Ratio_Best_RMSE_seed", "Ratio_CAT_RMSE_seed"]]

Unnamed: 0_level_0,category,method,n,p,RMSE,R2,Ratio_Best_RMSE_seed,Ratio_CAT_RMSE_seed
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
Abaloneabalone.data_task_R_target_-1,NN,regularnetfastselu,3341.0,10.0,0.649171,0.578289,1.003166,0.955582
Appliances energy predictionenergydata_complete.csv_task_R_target_2,RF,XRF,15788.0,25.0,0.552713,0.694424,1.0,0.932537
Beijing PM2.5 DataPRSA_data_2010.1.1-2014.12.31.csv_task_R_target_5,NN,regularnetglu,33405.0,31.0,0.346919,0.879557,1.005422,0.735132
Communities and Crimecommunities.data_task_R_target_-1,GLM,Ridge,1595.0,106.0,0.565659,0.67932,1.013607,0.99528
Computer Hardwaremachine.data_task_R_target_-1,ADACAP,adacapnetselu,167.0,7.0,0.035146,0.998727,1.217201,0.276781
Concrete Slump Testslump_test.data_task_R_target_-1,ADACAP,adacapnetselu,82.0,9.0,0.103122,0.988984,1.127952,0.246305
Concrete Slump Testslump_test.data_task_R_target_-2,ADACAP,adacapnetfastselu,82.0,9.0,0.302555,0.904868,1.099777,0.804671
Concrete Slump Testslump_test.data_task_R_target_-3,SVM,Kernel,82.0,9.0,0.354831,0.870956,1.253163,0.902549
Electrical Grid Stability Simulated Data Data_for_UCI_named.csv_task_R_target_-2,ADACAP,adacapnetglu,8000.0,13.0,0.150528,0.97733,1.000967,0.882269
Optical Interconnection Network optical_interconnection_network.csv_task_R_target_5,GBDT,CAT,512.0,26.0,0.196413,0.955883,1.041563,1.0


### 4.B Metric score for each method for each dataset averaged over 10 seeds

In [6]:
metric_id = 0 # Value between 0 and 5
metric = ["RMSE","R2","Ratio_Best_RMSE_seed", "Ratio_CAT_RMSE_seed", "time"][metric_id]
metric_order = [1,-1,1,1,-1][metric_id]
global_order = regression_df_formated.groupby("method").mean().sort_values(metric).iloc[::metric_order].index.values
RMSEs = regression_df_formated[['dataset_seed', 'dataset_name',"method",metric]].set_index("method").pivot_table(values=metric, index=['dataset_name'],
                    columns=['method'], aggfunc=np.mean)[global_order]
RMSEs

method,adacapnetselu,regularnetglu,adacapnetglu,adacapnetresblock,CAT,regularnetstandard,adacapnetstandard,adacapnetfastselu,adacapnetbatchresblock,regularnetselu,...,xgb_sklearn,NuSVM,MARS,Enet,Lasso,regularnetbatchstandard,Ridge,Kernel,CART,Intercept
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Abaloneabalone.data_task_R_target_-1,0.65105,0.657238,0.657996,0.654828,0.679428,0.660559,0.654895,0.650508,0.654287,0.654178,...,0.679379,0.671506,0.673291,0.692378,0.692459,0.657462,0.69255,0.692035,0.934662,1.000639
Appliances energy predictionenergydata_complete.csv_task_R_target_2,0.602415,0.586115,0.60826,0.604862,0.5928,0.583152,0.603077,0.612378,0.601225,0.627199,...,0.773532,0.813809,0.856833,0.873976,0.873976,0.656765,0.874,0.873993,0.783925,1.000054
Beijing PM2.5 DataPRSA_data_2010.1.1-2014.12.31.csv_task_R_target_5,0.397356,0.346919,0.374913,0.400682,0.471885,0.377963,0.411323,0.452292,0.461522,0.493298,...,0.691874,0.626109,0.7745,0.77637,0.776382,0.465838,0.776408,0.776379,0.540359,1.000112
Communities and Crimecommunities.data_task_R_target_-1,0.573783,0.57538,0.576933,0.569835,0.568329,0.574174,0.567072,0.573748,0.576958,0.577995,...,0.576708,0.578169,0.576165,0.567998,0.568225,0.58287,0.565659,0.566298,0.817342,1.001434
Computer Hardwaremachine.data_task_R_target_-1,0.035146,0.037094,0.039566,0.047885,0.137633,0.065551,0.05241,0.037502,0.045831,0.037332,...,0.133898,0.23556,0.169818,0.234473,0.234599,0.486901,0.231516,0.230188,0.203114,1.009458
Concrete Slump Testslump_test.data_task_R_target_-1,0.103122,0.141798,0.102961,0.143647,0.429482,0.186639,0.145935,0.106327,0.144081,0.123295,...,0.419894,0.408674,0.243656,0.312916,0.314441,0.604032,0.308412,0.306951,0.617375,1.044278
Concrete Slump Testslump_test.data_task_R_target_-2,0.307871,0.369433,0.313973,0.339846,0.376951,0.357551,0.327102,0.302555,0.340864,0.309741,...,0.34372,0.397004,0.318837,0.322895,0.326743,0.5782,0.318009,0.31271,0.437305,1.025038
Concrete Slump Testslump_test.data_task_R_target_-3,0.35923,0.427332,0.407884,0.427404,0.405045,0.413975,0.403988,0.388682,0.417552,0.399237,...,0.389802,0.493816,0.373258,0.364399,0.363313,0.590508,0.364309,0.354831,0.479356,1.034824
Electrical Grid Stability Simulated Data Data_for_UCI_named.csv_task_R_target_-2,0.168163,0.158926,0.150528,0.169019,0.170645,0.167175,0.17391,0.179011,0.166485,0.170603,...,0.292378,0.177782,0.36981,0.429073,0.429064,0.153466,0.429105,0.429039,0.450558,1.000401
Optical Interconnection Network optical_interconnection_network.csv_task_R_target_5,0.243593,0.223706,0.243341,0.245066,0.196413,0.230551,0.252638,0.256411,0.255645,0.245942,...,0.329422,0.317074,0.39094,0.464347,0.46441,0.383144,0.464367,0.4634,0.412027,1.006573


In [8]:
# filtering columns to retain only adacap and regularnet
cols = RMSEs.columns
net_methods = []
for col in cols:
    if col.startswith("adacapnet"):
        net_methods.append(col.replace("adacapnet", ""))
new_columns = []
for method in net_methods:
    new_columns.append("adacapnet"+method)
    new_columns.append("regularnet"+method)

In [9]:
RMSEs[new_columns]

method,adacapnetselu,regularnetselu,adacapnetglu,regularnetglu,adacapnetresblock,regularnetresblock,adacapnetstandard,regularnetstandard,adacapnetfastselu,regularnetfastselu,adacapnetbatchresblock,regularnetbatchresblock,adacapnetfast,regularnetfast,adacapnetbatchstandard,regularnetbatchstandard
dataset_name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
Abaloneabalone.data_task_R_target_-1,0.65105,0.654178,0.657996,0.657238,0.654828,0.669875,0.654895,0.660559,0.650508,0.649171,0.654287,0.658111,0.653607,0.659251,0.669597,0.657462
Appliances energy predictionenergydata_complete.csv_task_R_target_2,0.602415,0.627199,0.60826,0.586115,0.604862,0.574776,0.603077,0.583152,0.612378,0.724921,0.601225,0.688639,0.600794,0.598097,0.699707,0.656765
Beijing PM2.5 DataPRSA_data_2010.1.1-2014.12.31.csv_task_R_target_5,0.397356,0.493298,0.374913,0.346919,0.400682,0.347352,0.411323,0.377963,0.452292,0.566605,0.461522,0.39832,0.445058,0.454772,0.564504,0.465838
Communities and Crimecommunities.data_task_R_target_-1,0.573783,0.577995,0.576933,0.57538,0.569835,0.57193,0.567072,0.574174,0.573748,0.582975,0.576958,0.58891,0.567806,0.587329,0.567837,0.58287
Computer Hardwaremachine.data_task_R_target_-1,0.035146,0.037332,0.039566,0.037094,0.047885,0.071855,0.05241,0.065551,0.037502,0.04835,0.045831,0.128122,0.058577,0.075801,0.07717,0.486901
Concrete Slump Testslump_test.data_task_R_target_-1,0.103122,0.123295,0.102961,0.141798,0.143647,0.201905,0.145935,0.186639,0.106327,0.144354,0.144081,0.239334,0.159923,0.193641,0.165302,0.604032
Concrete Slump Testslump_test.data_task_R_target_-2,0.307871,0.309741,0.313973,0.369433,0.339846,0.363445,0.327102,0.357551,0.302555,0.31785,0.340864,0.365015,0.326057,0.365166,0.325188,0.5782
Concrete Slump Testslump_test.data_task_R_target_-3,0.35923,0.399237,0.407884,0.427332,0.427404,0.41934,0.403988,0.413975,0.388682,0.391062,0.417552,0.450164,0.420966,0.437293,0.412151,0.590508
Electrical Grid Stability Simulated Data Data_for_UCI_named.csv_task_R_target_-2,0.168163,0.170603,0.150528,0.158926,0.169019,0.163134,0.17391,0.167175,0.179011,0.205344,0.166485,0.155102,0.173986,0.167869,0.245666,0.153466
Optical Interconnection Network optical_interconnection_network.csv_task_R_target_5,0.243593,0.245942,0.243341,0.223706,0.245066,0.224229,0.252638,0.230551,0.256411,0.252751,0.255645,0.26374,0.26018,0.24443,0.272858,0.383144


In [17]:
for metric_id in range(5):
    metric = ["RMSE","R2","Ratio_Best_RMSE_seed", "Ratio_CAT_RMSE_seed", "time"][metric_id]
    metric_order = [1,-1,1,1,-1][metric_id]
    global_order = regression_df_formated.groupby("method").mean().sort_values(metric).iloc[::metric_order].index.values
    regression_df_formated[['dataset_seed', 'dataset_name',"method",metric]].set_index("method").pivot_table(values=metric, index=['dataset_name'],
                        columns=['method'], aggfunc=np.mean)[global_order].to_csv(output_path+save_repository+"dataset_by_method_"+suffix_save_path+metric+".csv")

### 4.C Zoom on a specific dataset

In [18]:
dataset_index = 0 # between 0 and 25
dataset_name = regression_df_formated["dataset_name"].unique()[dataset_index]
print("DATASET : "+dataset_name)
table_mean = regression_df_formated.groupby(["dataset_name","method","category"]).mean().reset_index().set_index("dataset_name").loc[dataset_name].sort_values("RMSE").set_index("method")[['category', 'R2', 'RMSE', 'n', 'p', 'time']]
table_std = regression_df_formated.groupby(["dataset_name","method","category"]).std().reset_index().set_index("dataset_name").loc[dataset_name].sort_values("RMSE").set_index("method")[['category', 'R2', 'RMSE', 'n', 'p', 'time']]
table_min = regression_df_formated.groupby(["dataset_name","method","category"]).min().reset_index().set_index("dataset_name").loc[dataset_name].sort_values("RMSE").set_index("method")[['category', 'R2', 'RMSE', 'n', 'p', 'time']]
table_max = regression_df_formated.groupby(["dataset_name","method","category"]).max().reset_index().set_index("dataset_name").loc[dataset_name].sort_values("RMSE").set_index("method")[['category', 'R2', 'RMSE', 'n', 'p', 'time']]
for metric in ["R2", "RMSE"]:
    table_mean[metric + ":std"] = table_std[metric]
    table_mean[metric + ":min"] = table_min[metric]
    table_mean[metric + ":max"] = table_max[metric]
table_mean[['category']+[metric+suffix for metric in ["RMSE","R2"] for suffix in ["", ":std",":min", ":max"]]+ [ 'n', 'p', 'time']]

DATASET : mirichoi0218_insuranceinsurance.csv_task_R_target_-1


Unnamed: 0_level_0,category,RMSE,RMSE:std,RMSE:min,RMSE:max,R2,R2:std,R2:min,R2:max,n,p,time
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
NuSVM,SVM,0.957468,0.068535,0.855221,1.083702,0.079027,0.13328,-0.174411,0.268597,109.0,330.0,0.009157
adacapnetbatchresblock,ADACAP,0.977853,0.066826,0.81304,1.049151,0.039784,0.124371,-0.100717,0.338966,109.0,330.0,2.193559
adacapnetresblock,ADACAP,0.980095,0.067166,0.81304,1.049151,0.035354,0.124966,-0.100717,0.338966,109.0,330.0,3.429815
adacapnetselu,ADACAP,0.987899,0.082149,0.819413,1.125852,0.017981,0.160006,-0.267542,0.328562,109.0,330.0,16.690391
adacapnetfast,ADACAP,0.991107,0.086218,0.834141,1.108491,0.011017,0.167162,-0.228752,0.304209,109.0,330.0,1.436123
CATfast,GBDT,0.991513,0.117019,0.838159,1.200091,0.004578,0.235758,-0.440217,0.297489,109.0,330.0,0.815761
regularnetfast,NN,0.993271,0.085683,0.829759,1.112328,0.006805,0.166676,-0.237274,0.3115,109.0,330.0,0.838841
regularnetfastselu,NN,0.997177,0.064996,0.92017,1.148371,0.001835,0.134553,-0.318755,0.153287,109.0,330.0,15.621853
regularnetbatchstandard,NN,0.997442,0.064485,0.882504,1.100568,0.001366,0.128835,-0.21125,0.221187,109.0,330.0,1.590106
regularnetbatchresblock,NN,0.997497,0.103698,0.873633,1.205637,-0.004678,0.217799,-0.453561,0.236766,109.0,330.0,1.782917


# 5. Neural Networks with and without AdaCap

### 5.A Global performance

In [14]:
regression_df_formated[np.isin(regression_df_formated["category"],["ADACAP","NN"])].groupby('method').mean().sort_values("RMSE")[["RMSE","R2","Ratio_Best_RMSE_seed", "Ratio_CAT_RMSE_seed"]]

Unnamed: 0_level_0,RMSE,R2,Ratio_Best_RMSE_seed,Ratio_CAT_RMSE_seed
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
adacapnetselu,0.414782,0.771019,1.139175,0.983867
regularnetglu,0.420134,0.766223,1.165654,0.991217
adacapnetglu,0.42063,0.76133,1.152071,0.991892
adacapnetresblock,0.421447,0.766774,1.182268,0.999954
regularnetstandard,0.423022,0.7653,1.220299,1.003704
adacapnetstandard,0.423317,0.763541,1.195867,1.007508
adacapnetfastselu,0.424583,0.763255,1.186416,1.023041
adacapnetbatchresblock,0.425708,0.764078,1.200271,1.016476
regularnetselu,0.426012,0.760994,1.179055,1.010595
adacapnetfast,0.428157,0.761453,1.22821,1.024992


## Impact of using AdaCap during training on performance 
- For each architecture each dataset and each seed, we compute the ratio of AdaCap over Regular net in terms of RMSE and time. Note that regular nets used Batch Norm and Drop out and AdaCap did not. 

### 5.B Global comparison over all datasets and all seeds

In [20]:
regression_df_formated["architecture"]=[value.replace("adacapnet","").replace("regularnet","")* bool("adacap" in value or "regular" in value) for value in regression_df_formated["method"].values]
adacap_perf = regression_df_formated[regression_df_formated["category"]== "ADACAP"].set_index("architecture")
regular_perf = regression_df_formated[regression_df_formated["category"]== "NN"].set_index("architecture")
for metric in ["RMSE", "time"]:
    adacap_perf[metric] = adacap_perf[metric].values / regular_perf[metric].values
for metric in ["RMSE", "time"]:
    adacap_perf["ratio : "+metric] = adacap_perf[metric]

### Best error reduction achieved by Adacap training over regular training for a given NN architecture when AdaCap ranked first against all methods

In [21]:
best_by_ds = regression_df_formated.groupby(["dataset_name","method","category"]).mean().reset_index().sort_values(["dataset_name","R2"]).groupby("dataset_name").last().reset_index().set_index(["dataset_name", "method"])
best_by_ds = best_by_ds[best_by_ds["category"]=="ADACAP"]
adacap_perf.groupby(["dataset_name", "method"]).mean().loc[best_by_ds.index].sort_values("ratio : RMSE")[["ratio : RMSE", "ratio : time"]]

Unnamed: 0_level_0,Unnamed: 1_level_0,ratio : RMSE,ratio : time
dataset_name,method,Unnamed: 2_level_1,Unnamed: 3_level_1
Concrete Slump Testslump_test.data_task_R_target_-1,adacapnetselu,0.858558,1.50381
Yacht Hydrodynamicsyacht_hydrodynamics.data_task_R_target_-1,adacapnetglu,0.938864,1.300586
Electrical Grid Stability Simulated Data Data_for_UCI_named.csv_task_R_target_-2,adacapnetglu,0.947631,2.05658
Computer Hardwaremachine.data_task_R_target_-1,adacapnetselu,0.948213,1.536705
Concrete Slump Testslump_test.data_task_R_target_-2,adacapnetfastselu,0.957994,1.114593


### Average Gain on datasets where AdaCap won against all methods

In [22]:
adacap_perf.groupby("architecture").mean().sort_values("RMSE").iloc[::-1][["ratio : RMSE", "ratio : time"]]

Unnamed: 0_level_0,ratio : RMSE,ratio : time
architecture,Unnamed: 1_level_1,Unnamed: 2_level_1
standard,1.001047,2.541383
glu,0.997156,1.685655
fast,0.982008,2.035864
resblock,0.978925,0.872823
selu,0.972981,1.95994
fastselu,0.957026,1.356103
batchresblock,0.952732,27.049304
batchstandard,0.895591,41.420039


### Average Gain on datasets where AdaCap won against NN

In [23]:
adacap_perf[adacap_perf["RMSE"]< 1.].groupby("architecture").mean().sort_values("RMSE").iloc[::-1][["ratio : RMSE", "ratio : time"]]

Unnamed: 0_level_0,ratio : RMSE,ratio : time
architecture,Unnamed: 1_level_1,Unnamed: 2_level_1
selu,0.93897,2.183972
fast,0.936266,2.145614
standard,0.926621,2.246681
glu,0.92009,1.317918
fastselu,0.90798,1.320588
resblock,0.907092,0.868544
batchresblock,0.840737,54.060377
batchstandard,0.722039,68.356523


### Average Gain on datasets where AdaCap won against all methods

In [24]:
adacap_perf[adacap_perf["Ratio_Best_RMSE_seed"]== 1.].groupby("architecture").mean().sort_values("RMSE").iloc[::-1][["ratio : RMSE", "ratio : time"]]

Unnamed: 0_level_0,ratio : RMSE,ratio : time
architecture,Unnamed: 1_level_1,Unnamed: 2_level_1
fast,0.930717,1.549389
resblock,0.882422,0.863456
selu,0.87405,1.543464
glu,0.866324,1.593488
batchresblock,0.812096,1.068475
fastselu,0.809114,1.109175
standard,0.768627,2.073468


### 5.C Zoom on a specific dataset

In [25]:
dataset_index = 0 # between 0 and 25
dataset_name = adacap_perf["dataset_name"].unique()[dataset_index]
print("DATASET : "+dataset_name)
table_mean = adacap_perf.groupby(["dataset_name","architecture"]).mean().reset_index().set_index("dataset_name").loc[dataset_name].sort_values("RMSE").set_index("architecture")[['RMSE', 'time',"n","p"]]
table_std = adacap_perf.groupby(["dataset_name","architecture"]).std().reset_index().set_index("dataset_name").loc[dataset_name].sort_values("RMSE").set_index("architecture")[['RMSE', 'time',"n","p"]]
table_min = adacap_perf.groupby(["dataset_name","architecture"]).min().reset_index().set_index("dataset_name").loc[dataset_name].sort_values("RMSE").set_index("architecture")[['RMSE', 'time',"n","p"]]
table_max = adacap_perf.groupby(["dataset_name","architecture"]).max().reset_index().set_index("dataset_name").loc[dataset_name].sort_values("RMSE").set_index("architecture")[['RMSE','time',"n","p"]]
for metric in ["RMSE", "time"]:
    table_mean[metric + ":std"] = table_std[metric]
    table_mean[metric + ":min"] = table_min[metric]
    table_mean[metric + ":max"] = table_max[metric]
table_mean[[metric+suffix for metric in ["RMSE", "time"] for suffix in ["",]]+["n", "p"]]

DATASET : mirichoi0218_insuranceinsurance.csv_task_R_target_-1


Unnamed: 0_level_0,RMSE,time,n,p
architecture,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
resblock,0.948221,0.855805,109.0,330.0
selu,0.978913,1.073155,109.0,330.0
batchresblock,0.984929,1.371958,109.0,330.0
fast,0.999734,1.734509,109.0,330.0
standard,1.008511,1.807712,109.0,330.0
fastselu,1.012888,1.012483,109.0,330.0
batchstandard,1.019623,1.866549,109.0,330.0
glu,1.066076,1.076792,109.0,330.0


### 5.D Count of Top 1 performance

In [26]:
metric_id = 0 # Value between 0 and 5
metric = ["RMSE","R2","Ratio_Best_RMSE_seed", "Ratio_CAT_RMSE_seed", "time"][metric_id]

granularity_id = 1 # Value between 0 and 1
granularity = ["method","category"][granularity_id]

metric_order = [1,-1,1,1,1][metric_id]
regression_df_formated[np.isin(regression_df_formated["category"],["ADACAP","NN"])].groupby(["dataset_name","category","method"]).mean().reset_index().sort_values(["dataset_name",metric]).iloc[::metric_order].groupby(["dataset_name"]).first()[[granularity,metric]].groupby(granularity).count().sort_values(metric).iloc[::-1]

Unnamed: 0_level_0,RMSE
category,Unnamed: 1_level_1
ADACAP,14
NN,12


### 5.E Count of Top 1 performance by architecture

In [27]:
metric_id = 0 # Value between 0 and 5
metric = ["RMSE","R2","Ratio_Best_RMSE_seed", "Ratio_CAT_RMSE_seed", "time"][metric_id]

granularity_id = 0 # Value between 0 and 1
granularity = ["method","category"][granularity_id]

metric_order = [1,-1,1,1,1][metric_id]
pd.DataFrame.from_dict(data = {**{key:0 for key in regression_df_formated[np.isin(regression_df_formated["category"],["ADACAP","NN"])][granularity].unique()},**{key:value for key, value in regression_df_formated[np.isin(regression_df_formated["category"],["ADACAP","NN"])].groupby(["dataset_name","method","category"]).mean().reset_index().sort_values(["dataset_name",metric]).iloc[::metric_order].groupby(["dataset_name"]).first()[[granularity,metric]].groupby(granularity).count().sort_values(metric).iloc[::metric_order][metric].iteritems()}}, orient= "index", columns = ["#Datasets with "+ metric +" Top1 performance"]).sort_values("#Datasets with "+ metric +" Top1 performance").iloc[::-1]

Unnamed: 0,#Datasets with RMSE Top1 performance
adacapnetselu,5
adacapnetglu,4
regularnetfastselu,4
regularnetresblock,3
regularnetglu,3
regularnetbatchresblock,2
adacapnetstandard,1
adacapnetresblock,1
adacapnetfastselu,1
adacapnetfast,1


### 5.F Zoom on a specific dataset

In [28]:
dataset_index = 0 # between 0 and 25
dataset_name = regression_df_formated[np.isin(regression_df_formated["category"],["ADACAP","NN"])]["dataset_name"].unique()[dataset_index]
print("DATASET : "+dataset_name)
table_mean = regression_df_formated[np.isin(regression_df_formated["category"],["ADACAP","NN"])].groupby(["dataset_name","method","category"]).mean().reset_index().set_index("dataset_name").loc[dataset_name].sort_values("RMSE").set_index("method")[['category', 'R2', 'RMSE', 'n', 'p', 'time']].iloc[::-1]
table_std = regression_df_formated[np.isin(regression_df_formated["category"],["ADACAP","NN"])].groupby(["dataset_name","method","category"]).std().reset_index().set_index("dataset_name").loc[dataset_name].sort_values("RMSE").set_index("method")[['category', 'R2', 'RMSE', 'n', 'p', 'time']].iloc[::-1]
table_min = regression_df_formated[np.isin(regression_df_formated["category"],["ADACAP","NN"])].groupby(["dataset_name","method","category"]).min().reset_index().set_index("dataset_name").loc[dataset_name].sort_values("RMSE").set_index("method")[['category', 'R2', 'RMSE', 'n', 'p', 'time']].iloc[::-1]
table_max = regression_df_formated[np.isin(regression_df_formated["category"],["ADACAP","NN"])].groupby(["dataset_name","method","category"]).max().reset_index().set_index("dataset_name").loc[dataset_name].sort_values("RMSE").set_index("method")[['category', 'R2', 'RMSE', 'n', 'p', 'time']].iloc[::-1]
for metric in ["R2", "RMSE"]:
    table_mean[metric + ":std"] = table_std[metric]
    table_mean[metric + ":min"] = table_min[metric]
    table_mean[metric + ":max"] = table_max[metric]
table_mean[['category']+[metric+suffix for metric in ["RMSE","R2"] for suffix in ["", ":std",":min", ":max"]]+ [ 'n', 'p', 'time']]

DATASET : mirichoi0218_insuranceinsurance.csv_task_R_target_-1


Unnamed: 0_level_0,category,RMSE,RMSE:std,RMSE:min,RMSE:max,R2,R2:std,R2:min,R2:max,n,p,time
method,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
adacapnetglu,ADACAP,1.06525,0.05543,1.010076,1.177572,-0.137522,0.121479,-0.386675,-0.020255,109.0,330.0,19.175377
regularnetresblock,NN,1.041671,0.124313,0.877948,1.252758,-0.098986,0.262068,-0.569403,0.229207,109.0,330.0,4.007936
regularnetstandard,NN,1.017816,0.118279,0.883976,1.303262,-0.048541,0.261143,-0.698491,0.218586,109.0,330.0,2.122999
adacapnetstandard,ADACAP,1.017145,0.082787,0.864908,1.124677,-0.040752,0.164643,-0.264898,0.251934,109.0,330.0,3.820403
adacapnetbatchstandard,ADACAP,1.015803,0.085081,0.864908,1.147966,-0.03837,0.170393,-0.317827,0.251934,109.0,330.0,2.126351
regularnetselu,NN,1.009929,0.061894,0.916555,1.112255,-0.023405,0.125633,-0.23711,0.159927,109.0,330.0,15.549521
adacapnetfastselu,ADACAP,1.008141,0.081821,0.884006,1.164638,-0.022373,0.169191,-0.356381,0.218534,109.0,330.0,15.814718
regularnetglu,NN,1.002677,0.073788,0.933532,1.179574,-0.010262,0.155439,-0.391394,0.128518,109.0,330.0,17.805353
regularnetbatchresblock,NN,0.997497,0.103698,0.873633,1.205637,-0.004678,0.217799,-0.453561,0.236766,109.0,330.0,1.782917
regularnetbatchstandard,NN,0.997442,0.064485,0.882504,1.100568,0.001366,0.128835,-0.21125,0.221187,109.0,330.0,1.590106


# 6. Generate TABLE 3 

In [1]:
# Load data
import numpy as np
import pandas as pd
regression_df_formated = pd.read_csv("results/regression_benchmark.csv", index_col = 0)
regression_df_formated["R2"] = regression_df_formated["R2"].values.astype(float)
regression_df_formated["RMSE"] = np.sqrt(1. - regression_df_formated["R2"].values)

# Compute ratio over best performing method in terms of RMSE 
# See section 1.C for explanation
regression_df_formated.set_index(["dataset_name","dataset_seed"], inplace = True)
regression_df_formated["Best_RMSE_seed"] = regression_df_formated.reset_index().groupby(["dataset_name","dataset_seed"]).min()["RMSE"]
regression_df_formated.reset_index(inplace = True)
regression_df_formated["Ratio_Best_RMSE_seed"] = (regression_df_formated["RMSE"] / regression_df_formated["Best_RMSE_seed"])

# Extract best method of each category
best_of_each = {}
for category in regression_df_formated["category"].unique():
    best_of_each[category] = regression_df_formated[regression_df_formated["category"]== category].groupby("method").mean().sort_values("Ratio_Best_RMSE_seed").index.values[0]
only_best_df = regression_df_formated[np.isin(regression_df_formated["method"].values,list(best_of_each.values()))]

# For each method get list of datasets on which it outperforms all others
sort_metric = "Ratio_Best_RMSE_seed"
datasets_of_each = {}
selected_categories = only_best_df.groupby(["dataset_name","method","category"]).mean().reset_index().sort_values(["dataset_name",sort_metric]).groupby("dataset_name").first().reset_index()["category"].unique()
for category in selected_categories:
    datasets_of_each[category] = only_best_df.groupby(["dataset_name","method","category"]).mean().reset_index().sort_values(["dataset_name",sort_metric]).groupby("dataset_name").first().reset_index().set_index("category").loc[[category]]["dataset_name"].values

# Aggregate datasets based on which method outperform all others    
table_by_category = {}
for category in selected_categories:
    table_by_category[category] = only_best_df.set_index("dataset_name").loc[set(datasets_of_each[category])].reset_index().groupby(["category", "method"]).mean().reset_index()
    table_by_category[category]["dataset_best_method"] = category
    table_by_category[category].set_index(["dataset_best_method","method"], inplace = True)
    table_by_category[category]["count"] = len(set(datasets_of_each[category]))
concatenated = pd.concat(list(table_by_category.values())).reset_index()
best_methods = concatenated.sort_values(["dataset_best_method",sort_metric]).groupby("dataset_best_method").first()["method"].values
final_format = concatenated.pivot_table(values=sort_metric, index=["dataset_best_method"],
                    columns=["method"], aggfunc=np.mean)[best_methods]

# renormalize metric : divide by baseline value, transform into %
values = final_format.values.copy()
values = values / np.diag(values).reshape((-1,1))
values = values * 100
values = values - 100

# Prettify table values
table_matrix_values = np.array([[(str(np.around(col,decimals=1))+"%")*(col > 0) + 'Best' * (col <= 0) for col in cols] for cols in values])
final_format[final_format.columns] = table_matrix_values

# Count for each method on how many datasets it achieve top1 performance
final_format["# Datasets"] = [table_by_category[category]["count"].values[0] for category in final_format.index.values]

# Prettify column and index names
cat_to_method = {'GBDT':"CAT",'ADACAP':"adacapnetselu", 'NN':"regularnetglu",'GLM':"Enet",'RF':"XRF", 'SVM':"NuSVM"}
final_format.index = [cat_to_method[value] for value in final_format.index.values]
final_format = final_format[["# Datasets"]+["CAT","adacapnetselu","regularnetglu","Enet","XRF","NuSVM"]].loc[["CAT","adacapnetselu","regularnetglu","Enet","XRF","NuSVM"]]
final_format

  table_matrix_values = np.array([[(str(np.around(col,decimals=1))+"%")*(col > 0) + 'Best' * (col <= 0) for col in cols] for cols in values])


method,# Datasets,CAT,adacapnetselu,regularnetglu,Enet,XRF,NuSVM
CAT,9,Best,11.6%,13.4%,56.8%,12.6%,21.8%
adacapnetselu,5,141.3%,Best,13.6%,176.6%,103.3%,197.9%
regularnetglu,4,11.5%,5.8%,Best,92.1%,31.3%,58.0%
Enet,4,0.9%,11.6%,10.3%,Best,21.4%,12.6%
XRF,3,12.2%,11.2%,13.2%,39.0%,Best,40.9%
NuSVM,1,4.1%,3.1%,4.7%,6.8%,36.8%,Best


# 7. Exotic Pricing

Code for experiment in ExoticPricing.ipynb

In [3]:
princings = pd.read_csv("outputs/exotic.csv")

pricings.sort_values("RMSE")

FileNotFoundError: [Errno 2] No such file or directory: 'outputs/exotic.csv'