In [1]:
%load_ext autoreload
%autoreload 2

import plotting_utils
from reports_utils import (
    get_all_results,
    plt_heatmap,
    plot_pulls,
    number_of_pulls,
)
import numpy as np

In [2]:
from collections import defaultdict


def get_group(policy_algorithms):
    grouped = defaultdict(lambda: {"base": [], "ca": []})
    for elem in list(policy_algorithms.keys()):
        if "_CA" in elem:
            base = elem.split("_CA")[0]  # Take everything before _CA
            grouped[base]["ca"].append(elem)
        else:
            base = elem
            grouped[base]["base"].append(elem)
    return grouped
    


def get_total_pulls(pulls, algorithm):
    total_pull = []
    for item in pulls[algorithm]:
        total_pull.append([len(i) for i in item])

    total_pull = np.array(total_pull)
    return np.mean(total_pull)

In [3]:
policy_algorithms = {}
policy_algorithms["PS_PFNs_CA_R"] = 1
policy_algorithms["PS_PFNs"] = 1
policy_algorithms["PS_PFN_CA_R"] = 1
policy_algorithms["PS_PFN"] = 1
policy_algorithms["PS_Max_CA_R"] = 1
policy_algorithms["PS_Max"] = 1
grouped = get_group(policy_algorithms)

In [4]:
dataset_names = [
    "YaHPOGym_100",
    "TabRepoRaw_30",
    "Complex",
]
results = {}
for dataset_name in dataset_names:
    number_of_trails = 32
    if dataset_name == "Complex":
        number_of_trails = 12
    if dataset_name == "Grinsztajn":
        number_of_trails = 12
    if dataset_name == "more":
        number_of_trails = 1

    data = get_all_results(
        dataset_name,
        policy_algorithms,
        result_directory="../results_" + str(number_of_trails) + "/",
    )

    pulls = number_of_pulls(
        data,
        dataset_name,
        policy_algorithms,
        result_directory="../results_" + str(number_of_trails) + "/",
    )
    results[dataset_name] = {}

    for item in grouped:
        results[dataset_name][item] = {
            "base": get_total_pulls(pulls, grouped[item]["base"][0]),
            "ca": get_total_pulls(pulls, grouped[item]["ca"][0]),
        }

In [5]:
import pandas as pd

table_result = {}
for dataset, metrics in results.items():
    table_result[dataset] = {}
    for metric in grouped:
        base = metrics[metric]["base"]
        ca = metrics[metric]["ca"]
        improvement = ((ca - base) / base) * 100
        table_result[dataset][metric] = improvement

# Create DataFrame
df = pd.DataFrame(table_result).T  # Transpose to have datasets as rows
df = df[["PS_PFNs", "PS_PFN", "PS_Max"]]  # Optional: ensure consistent column order
df = df.round(2)  # Round to 2 decimal places

latex_table = df.to_latex(
    index=True,
    header=True,
    float_format=lambda x: f"{x/100.0:.2%}",
    caption="Percentage improvement of 'ca' over 'base' for each metric and dataset.",
    label="tab:percentage_improvement",
)

print(latex_table)

\begin{table}
\centering
\caption{Percentage improvement of 'ca' over 'base' for each metric and dataset.}
\label{tab:percentage_improvement}
\begin{tabular}{lrrr}
\toprule
{} &  PS\_PFNs &  PS\_PFN &  PS\_Max \\
\midrule
YaHPOGym\_100  &    7.50\% &   4.66\% &   2.94\% \\
TabRepoRaw\_30 &   15.36\% &   6.74\% &   7.55\% \\
Complex       &   15.32\% &  17.35\% &   4.69\% \\
\bottomrule
\end{tabular}
\end{table}



  latex_table = df.to_latex(
