# Analysis of Inverse Generational Distance
This notebook demonstrates how optimization algorithm benchmarking data can be loaded from disk, metrics (such as inverse generational distance) can be calculated for all reported populations, and the resulting data analyzed for comparisons between experiements. We demonstrate the process by comparing three algorithms against eachother on a single test problem. The resulting comparisons are displayed and it is shown how the table can be exported to latex for publication.

In [1]:
import paretobench as pb

In [2]:
%%time
# Evaluate the IGD metric on all populations stored in three saved experiments
df = pb.eval_metrics_experiments(
    ['data/NSGAII.h5', 'data/CPSMOEA.h5', 'data/ParEGO.h5'], 
    {'igd': pb.InverseGenerationalDistance()}, 
    n_procs=8
)
df.head()

CPU times: total: 93.8 ms
Wall time: 5.9 s


Unnamed: 0,problem,fevals,run_idx,pop_idx,igd,exp_name,exp_idx,fname
0,"WFG1 (m=2,n=16,k=2)",20,0,0,2.207231,NSGA-II,0,data/NSGAII.h5
1,"WFG1 (m=2,n=16,k=2)",40,0,1,1.545975,NSGA-II,0,data/NSGAII.h5
2,"WFG1 (m=2,n=16,k=2)",60,0,2,1.530251,NSGA-II,0,data/NSGAII.h5
3,"WFG1 (m=2,n=16,k=2)",80,0,3,1.529974,NSGA-II,0,data/NSGAII.h5
4,"WFG1 (m=2,n=16,k=2)",100,0,4,1.528507,NSGA-II,0,data/NSGAII.h5


In [3]:
# Calculate aggregate statistics of the metric on each history object including individuals up until a budget of `max_feval` 
# function evaluations has been exceeded. Use the Wilcoxon rank sum test to compare algorithms against ParEGO.
dfa = pb.aggregate_metrics_feval_budget(df, max_feval=300, wilcoxon_idx=2)

# Turn the aggregate stats into a table comparing metric values for each problem between algorithms.
dfc = pb.construct_metric_comparison_table(
    dfa, 
    problem_params='n',
    mean_fmt_kwargs={'precision': 3, 'exp_digits': 1}, 
    std_fmt_kwargs={'precision': 3, 'exp_digits': 1}
)
dfc

  if ranksums(x.to_numpy(), y.to_numpy(), 'less')[1] < wilcoxon_p:
  if ranksums(x.to_numpy(), y.to_numpy(), 'greater')[1] < wilcoxon_p:


Unnamed: 0_level_0,Unnamed: 1_level_0,CPS-MOEA,NSGA-II,ParEGO
Problem,n,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
WFG1,16,1.366e+0 (1.491e-2) -,1.582e+0 (3.386e-1) -,\cellbold 1.382e+0 (1.597e-1)
WFG1,32,1.371e+0 (1.284e-2) -,1.572e+0 (3.089e-1) -,\cellbold 1.348e+0 (1.023e-1)
WFG1,64,\cellbold 1.379e+0 (1.039e-2) =,1.494e+0 (2.471e-1) =,-


In [4]:
# Demonstrate printing to latex
print(pb.comparison_table_to_latex(dfc))

\begin{tabular}{lllll}
\toprule
\multicolumn{1}{c}{\textbf{Problem }} &\multicolumn{1}{c}{\textbf{ n }}  &  \multicolumn{1}{c}{\textbf{ CPS-MOEA }}&  \multicolumn{1}{c}{\textbf{ NSGA-II }}&  \multicolumn{1}{c}{\textbf{ ParEGO }}\\
\midrule
\multirow{3}{*}{WFG1} & 16 & 1.366e+0 (1.491e-2) - & 1.582e+0 (3.386e-1) - & \cellbold 1.382e+0 (1.597e-1) \\
 & 32 & 1.371e+0 (1.284e-2) - & 1.572e+0 (3.089e-1) - & \cellbold 1.348e+0 (1.023e-1) \\
 & 64 & \cellbold 1.379e+0 (1.039e-2) $\approx$ & 1.494e+0 (2.471e-1) $\approx$ & - \\
\cline{1-5}
 \multicolumn{2}{c}{+/-/$\approx$} & \multicolumn{1}{c}{0/2/1} & \multicolumn{1}{c}{0/2/1} & \\
\bottomrule
\end{tabular}



  comparisons = df.applymap(lambda x: (x[-1] if len(x) > 4 else '')).apply(pd.Series.value_counts).fillna(0)
