# Budget Correlations
to generate the plots below, we utilize the library deepcave. Unfortunately it is incompatible with our processing environment, therefore a new environment has to be created, and used to exectue this code as explained in [the DeepCave documentation](https://automl.github.io/DeepCAVE/main/installation.html)

In [6]:
from deepcave.runs.converters.smac3v2 import SMAC3v2Run
from pathlib import Path
from collections import defaultdict
from deepcave.runs import Status
import pandas as pd
from scipy import stats

In [7]:
def get_budget_correlations(run: SMAC3v2Run, objective_id: int):
    budget_ids = run.get_budget_ids(include_combined=False)

    # Add symmetric correlations; table ready
    correlations_symmetric = defaultdict(dict)

    correlations = defaultdict(dict)
    for budget1_id in budget_ids:
        budget1 = run.get_budget(budget1_id)
        budget1_readable = run.get_budget(budget1_id, human=True)

        for budget2_id in budget_ids:
            budget2 = run.get_budget(budget2_id)
            budget2_readable = run.get_budget(budget2_id, human=True)

            config_ids1 = run.get_configs(budget1, statuses=[Status.SUCCESS]).keys()
            config_ids2 = run.get_configs(budget2, statuses=[Status.SUCCESS]).keys()

            # Combine config ids
            # So it is guaranteed that there is the same number of configs for each budget
            config_ids = set(config_ids1) & set(config_ids2)

            c1, c2 = [], []
            for config_id in config_ids:
                c1 += [
                    run.get_avg_costs(config_id, budget1, statuses=[Status.SUCCESS])[0][
                        objective_id
                    ]
                ]
                c2 += [
                    run.get_avg_costs(config_id, budget2, statuses=[Status.SUCCESS])[0][
                        objective_id
                    ]
                ]

            correlation = round(stats.spearmanr(c1, c2).correlation, 2)
            correlations_symmetric["Budget"][budget2_readable] = budget2_readable  # type: ignore # noqa: E501
            correlations_symmetric[budget1_readable][budget2_readable] = correlation  # type: ignore # noqa: E501

            # Exclude if budget2 is higher than budget1
            if budget2 > budget1:
                continue

            correlations[budget1_readable][budget2_readable] = correlation  # type: ignore
    return correlations, correlations_symmetric

In [8]:
experiments ={
    1: "2 layers, mid",
    2: "4 layers, mid",
    4: "4 layers, end",
}
dataframes: dict = {}

In [9]:
for run_id, experiment in experiments.items():
    run = SMAC3v2Run.from_path(Path(f"fehring_growing_nn_new_seeded/increase_difficulty_n2d/{run_id}/0"))
    correlations, correlations_symmetric = get_budget_correlations(run, 0)
    del correlations_symmetric["Budget"]
    dataframes[experiment] = pd.DataFrame(correlations_symmetric)

Found in item {'name': 'batch_size', 'choices': [32, 64, 128, 256], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'ent_coef', 'log': False, 'lower': 0.0, 'upper': 0.3}
  return decoder(item, cs, _dec)
Found in item {'name': 'learning_rate', 'log': True, 'lower': 0.0001, 'upper': 0.01}
  return decoder(item, cs, _dec)
Found in item {'name': 'n_epochs', 'log': False, 'lower': 5, 'upper': 20}
  return decoder(item, cs, _dec)
Found in item {'name': 'batch_size', 'choices': [32, 64, 128, 256], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'ent_coef', 'log': False, 'lower': 0.0, 'upper': 0.3}
  return decoder(item, cs, _dec)
Found in item {'name': 'learning_rate', 'log': True, 'lower': 0.0001, 'upper': 0.01}
  return decoder(item, cs, _dec)
Found in item {'name': 'n_epochs', 'log': False, 'lower': 5, 'upper': 20}
  return decoder(item, cs, _dec)
Found in item {'name': 'batch_size', 'choices': [32, 64, 128, 256], 'weights': None}
  return 

In [10]:
for experiment, df in dataframes.items():
    df.index.name = "Budget"
    df.columns.name = "Budget"
    if len(df) == 2:
        df.index = ["Budget 1", "Budget 2"]
        df.columns = ["Budget 1", "Budget 2"]
    if len(df) == 4:
        df.index = ["Budget 1", "Budget 2", "Budget 3", "Budget 4"]
        df.columns = ["Budget 1", "Budget 2", "Budget 3", "Budget 4"]
    latex_table = df.to_latex(index=True, header=True, float_format="%.2f", caption=f"Correlation Matrix for {experiment}")
    print(experiment)
    print(latex_table)

2 layers, mid
\begin{table}
\caption{Correlation Matrix for 2 layers, mid}
\begin{tabular}{lrr}
\toprule
 & Budget 1 & Budget 2 \\
\midrule
Budget 1 & 1.00 & 0.54 \\
Budget 2 & 0.54 & 1.00 \\
\bottomrule
\end{tabular}
\end{table}

4 layers, mid
\begin{table}
\caption{Correlation Matrix for 4 layers, mid}
\begin{tabular}{lrrrr}
\toprule
 & Budget 1 & Budget 2 & Budget 3 & Budget 4 \\
\midrule
Budget 1 & 1.00 & 0.67 & 0.67 & 0.45 \\
Budget 2 & 0.67 & 1.00 & 0.91 & 0.70 \\
Budget 3 & 0.67 & 0.91 & 1.00 & 0.74 \\
Budget 4 & 0.45 & 0.70 & 0.74 & 1.00 \\
\bottomrule
\end{tabular}
\end{table}

4 layers, end
\begin{table}
\caption{Correlation Matrix for 4 layers, end}
\begin{tabular}{lrrrr}
\toprule
 & Budget 1 & Budget 2 & Budget 3 & Budget 4 \\
\midrule
Budget 1 & 1.00 & 0.67 & 0.39 & 0.51 \\
Budget 2 & 0.67 & 1.00 & 0.49 & 0.62 \\
Budget 3 & 0.39 & 0.49 & 1.00 & 0.57 \\
Budget 4 & 0.51 & 0.62 & 0.57 & 1.00 \\
\bottomrule
\end{tabular}
\end{table}

