# Validate Ranking
It is important that the ranking between the subsets, dev and test (and X), stay consistent.
Therefore we calulcate the ranking (via statistical tests/autorank) for those sets and compare.

## Prequisites
Full data needed (full set on which the subselection is performed on).
Run this notebook for each scenario as defined in the next code cell.
It generates Latex tables.

## Define Scenario

In [1]:
# subsets = ["BBfull", "BBsubset"]
# scenario = "blackbox"
# optimizer_ids = ["RandomSearch", "SMAC3-BlackBoxFacade", "Nevergrad-CMA-ES"]

# subsets = ["MOfull", "MOsubset"]
# scenario = "multiobjective"
# optimizer_ids = ["RandomSearch", "Optuna-MO", "Nevergrad-DE"]

# subsets = ["MFfull", "MFsubset"]
# scenario = "multifidelity"
# optimizer_ids = ["SMAC3-Hyperband", "SMAC3-MultiFidelityFacade", "DEHB"]
from __future__ import annotations

subsets = ["MOMFfull", "MOMFsubset"]
scenario = "MOMF"
optimizer_ids = ["RandomSearch", "SMAC3-MOMF-GP", "Nevergrad-DE"]

## Load Data
Load the full set and the subset data

In [2]:
import pandas as pd
from carps.analysis.gather_data import load_set, normalize_logs

paths = {
    "BBfull": {
        "full": [
        "../runs/SMAC3-BlackBoxFacade",
        "../runs/RandomSearch",
        "../runs/Nevergrad-CMA-ES",
    ]},
    "MFfull": {
        "full": [
        "../runs/SMAC3-Hyperband",
        "../runs/DEHB",
        "../runs/SMAC3-MultiFidelityFacade",
    ]},
    "MOfull": {
        "full": [
        "../runs_MO/Optuna-MO",
        "../runs_MO/RandomSearch",
        "../runs_MO/Nevergrad-DE",
    ]},
    "MOMFfull": {
        "full": [
        "../runs_MOMF/SMAC3-MOMF-GP",
        "../runs_MOMF/RandomSearch",
        "../runs_MOMF/Nevergrad-DE",
    ]},
    "BBsubset": {
        "dev": ["../runs_subset_BB/dev"],
        "test": ["../runs_subset_BB/test"],
    },
    "MFsubset": {
        "dev": ["../runs_subset_MF/dev"],
        "test": ["../runs_subset_MF/test"],
    },
    "MOsubset": {
        "dev": ["../runs_subset_MO/dev"],
        "test": ["../runs_subset_MO/test"],
    },
    "MOMFsubset": {
        "dev": ["../runs_subset_MOMF/dev"],
        "test": ["../runs_subset_MOMF/test"],
    },
}


D = []
for subset in subsets:
    print("loading", subset)
    loaded = [load_set(paths=ps, set_id=set_id) for set_id, ps in paths[subset].items()]
    df = pd.concat([d for d, _ in loaded]).reset_index(drop=True)
    df = df[df["optimizer_id"].isin(optimizer_ids)]
    D.append(df)
df = pd.concat(D).reset_index(drop=True)
df = normalize_logs(df)
del D

loading MOMFfull
loading MOMFsubset


## Calc Ranks per Set
With autorank

In [5]:
%%capture --no-display

from autorank._util import get_sorted_rank_groups
from carps.analysis.run_autorank import calc_critical_difference

perf_col: str = "trial_value__cost_inc_norm"

ranks_per_set = []
for set_id, gdf in df.groupby("set"):
    print(set_id)
    identifier = f"{scenario}_{set_id}"
    result = calc_critical_difference(gdf, identifier=identifier, figsize=(8, 3), perf_col=perf_col, plot_diagram=False)
    sorted_ranks, names, groups = get_sorted_rank_groups(result, reverse=False)
    sorted_ranks["set_id"] = set_id
    ranks_per_set.append(sorted_ranks)
    print(sorted_ranks, names, groups)

## Convert into DataFrame/table
And save to file

In [6]:
df_ranks_per_set = pd.DataFrame(ranks_per_set)
df_t = pd.DataFrame([{r["set_id"]: list(r.index[:3])} for r in ranks_per_set])
new_cols = ["set_id"] + [c for c in df_ranks_per_set.columns if c != "set_id"]
df_ranks_per_set = df_ranks_per_set[new_cols]
df_ranks_per_set.to_csv(f"ranks_per_set_{scenario}.csv", index=False)
df_ranks_per_set

Unnamed: 0,set_id,SMAC3-MOMF-GP,RandomSearch,Nevergrad-DE
meanrank,dev,1.555556,1.777778,2.666667
meanrank,full,1.62963,1.777778,2.592593
meanrank,test,1.444444,2.0,2.555556


## Generate latex tables

In [7]:
from pathlib import Path

import numpy as np
import pandas as pd

fns = ["ranks_per_set_blackbox.csv", "ranks_per_set_MOMF.csv", "ranks_per_set_multifidelity.csv", "ranks_per_set_multiobjective.csv"]
data = []
decimal_places = 2
sorter = ["full", "dev", "test"]

final_str = r"""
\begin{{table}}[h]
    \caption{{{caption}}}
    \label{{{label}}}
    \centering
    %\resizebox{{0.4\textwidth}}{{!}}{{
    {table_string}
    %}}
\end{{table}}
"""

float_format = lambda x: ("{:0." + str(decimal_places) + "f}").format(x) if not np.isnan(x) else "-"
for fn in fns:
    if not Path(fn).is_file():
        continue
    df = pd.read_csv(fn)

    df = df.sort_values(by="set_id", key=lambda column: column.map(lambda e: sorter.index(e)))
    df = df.set_index("set_id")
    df = df.map(lambda x: f"{x:.2f}" if not isinstance(x, str) else x)
    ranks = df.T.rank().T
    for _i, ((_idx, row), (_idx2, row2)) in enumerate(zip(df.iterrows(), ranks.iterrows(), strict=False)):
        for j in range(len(row)):
            row.iloc[j] = row.iloc[j] + f" ({int(row2.iloc[j])})"
    scenario = fn.split("_")[-1].split(".")[0]

    df.index.name = "set"

    table_str = df.to_latex(float_format=float_format, na_rep="-").strip()
    caption = f"Mean Ranking for Scenario {scenario}"
    label = f"tab:ranking_validation_{scenario}"
    table_str = final_str.format(table_string=table_str, label=label, caption=caption)

    with open(fn + ".tex", "w") as file:
        file.write(table_str)
    print(table_str)


\begin{table}[h]
    \caption{Mean Ranking for Scenario blackbox}
    \label{tab:ranking_validation_blackbox}
    \centering
    %\resizebox{0.4\textwidth}{!}{
    \begin{tabular}{llll}
\toprule
 & SMAC3-BlackBoxFacade & RandomSearch & Nevergrad-CMA-ES \\
set &  &  &  \\
\midrule
full & 1.37 (1) & 2.19 (2) & 2.43 (3) \\
dev & 1.23 (1) & 2.08 (2) & 2.68 (3) \\
test & 1.18 (1) & 2.12 (2) & 2.70 (3) \\
\bottomrule
\end{tabular}
    %}
\end{table}


\begin{table}[h]
    \caption{Mean Ranking for Scenario MOMF}
    \label{tab:ranking_validation_MOMF}
    \centering
    %\resizebox{0.4\textwidth}{!}{
    \begin{tabular}{llll}
\toprule
 & SMAC3-MOMF-GP & RandomSearch & Nevergrad-DE \\
set &  &  &  \\
\midrule
full & 1.63 (1) & 1.78 (2) & 2.59 (3) \\
dev & 1.56 (1) & 1.78 (2) & 2.67 (3) \\
test & 1.44 (1) & 2.00 (2) & 2.56 (3) \\
\bottomrule
\end{tabular}
    %}
\end{table}


\begin{table}[h]
    \caption{Mean Ranking for Scenario multifidelity}
    \label{tab:ranking_validation_multifidelity