# BBOB

In [None]:
# BBOB
from __future__ import annotations

import shutil
from collections.abc import Callable
from dataclasses import asdict
from pathlib import Path

import numpy as np
from carps.utils.task import InputSpace, OptimizationResources, OutputSpace, Task, TaskMetadata, get_search_space_info
from hydra.utils import instantiate
from carps.utils.generate_tasks import get_dict_input_space, get_dict_metadata, get_dict_opt_resources, get_dict_output_space
from omegaconf import DictConfig, OmegaConf

seeds = range(1, 11)

# Config should match ObjectiveFunction init

base_path = Path()

# ObjectiveFunction ids
benchmark_id_bbob = "BBOB"



target_path = base_path / benchmark_id_bbob

fids = list(range(1, 25))
instances = [0, 1, 2]
dimensions = [2, 4, 8, 16, 32]

def get_n_trials(dimension: int) -> int:
    return int(np.ceil(20 + 40 * np.sqrt(dimension)))

def generate_bbob_configs(
        fids: list[int],
        instances: list[int],
        dimensions: list[int],
        get_n_trials_fn: Callable[[int], int],
        target_path: Path,
        task_base_id: str,
        problem_class: str = "carps.objective_functions.bbob.BBOBObjectiveFunction"
) -> None:
    if target_path.is_dir():
        shutil.rmtree(str(target_path))
    target_path.mkdir(parents=True, exist_ok=True)

    for fid in fids:
        for inst in instances:
            for dim in dimensions:
                task_id = f"{task_base_id}/{dim}/{fid}/{inst}"
                optimization_resources = OptimizationResources(
                    n_trials=get_n_trials_fn(dim),
                    time_budget=None,
                    n_workers=1,
                )
                objective_function_cfg = DictConfig({
                        "_target_": problem_class,
                        "dimension": dim,
                        "fid": fid,
                        "instance": inst,
                        "seed": "${seed}",  # get the seed from global cfg
                    })
                objective_function_cfg_tmp = objective_function_cfg.copy()
                objective_function_cfg_tmp.seed = 234
                objective_function = instantiate(objective_function_cfg_tmp)
                input_space = InputSpace(
                    configuration_space=objective_function.configspace,
                    fidelity_space=FidelitySpace(
                        is_multifidelity=False,
                        fidelity_type=None,
                        min_budget=None,
                        max_budget=None,
                    )
                )
                output_space = OutputSpace(
                    n_objectives=1,
                    objectives=["quality"],
                )
                task_metadata = TaskMetadata(
                    dimensions=dim,
                    has_constraints=False,
                    domain="synthetic",
                    objective_function_approximation="real",
                    has_virtual_time=False,
                    deterministic=True,
                    search_space_n_categoricals=0,
                    search_space_n_ordinals=0,
                    search_space_n_integers=0,
                    search_space_n_floats=dim,
                    search_space_has_conditionals=False,
                    search_space_has_forbiddens=False,
                    search_space_has_priors=False
                )
                cfg = DictConfig({
                    "benchmark_id": benchmark_id_bbob,
                    "task_id": "${task.name}",
                    "task": {
                        "_target_": "carps.utils.task.Task",
                        "name": task_id,
                        "seed": "${seed}",
                        "objective_function": objective_function_cfg,
                        "input_space": get_dict_input_space(input_space),
                        "output_space": get_dict_output_space(output_space),
                        "optimization_resources": get_dict_opt_resources(optimization_resources),
                        "metadata": get_dict_metadata(task_metadata),
                    },
                    # "task": asdict(task)

                })
                fn = target_path / f"cfg_{dim}_{fid}_{inst}.yaml"
                yaml_str = OmegaConf.to_yaml(cfg=cfg)
                yaml_str = "# @package _global_\n" + yaml_str
                fn.write_text(yaml_str)
                print(cfg)

generate_bbob_configs(fids, instances, dimensions, get_n_trials, target_path, "bbob")

# BBOB Vizier

n trials: 100
dim: 20

(or 1,5,10,15,20,25,30,35,40)

Vizier uses [their own implementation](https://github.com/google/vizier/blob/main/vizier/_src/benchmarks/experimenters/synthetic/bbob.py) of the bbob-largescale benchmark so this might not be exactly the same.

In [None]:
# BBOB Vizier
benchmark_id_bbob = "BBOBVizier"
target_path = base_path / benchmark_id_bbob
fids = list(range(1, 25))
instances = [0]
dimensions = [20]

def get_n_trials(dimension: int) -> int:
    return 100

generate_bbob_configs(fids, instances, dimensions, get_n_trials, target_path, "bbob_vizier")

# HPOB

In [None]:
# HPOB
import re
import shutil
from dataclasses import asdict
from operator import attrgetter
from pathlib import Path

import numpy as np
import pandas as pd
from carps.utils.generate_tasks import (
    get_dict_input_space,
    get_dict_metadata,
    get_dict_opt_resources,
    get_dict_output_space,
)
from carps.utils.task import (
    FidelitySpace,
    InputSpace,
    OptimizationResources,
    OutputSpace,
    Task,
    TaskMetadata,
    get_search_space_info,
)
from omegaconf import DictConfig, OmegaConf
from rich.progress import track
from hydra.utils import instantiate

base_path = Path()

# ObjectiveFunction ids
benchmark_id = "HPOB"

surrogates_dir = Path("../../../carps/benchmark_data/HPO-B/saved-surrogates")

all_res = map(attrgetter("name"), surrogates_dir.glob("surrogate-*"))

target_path = base_path / benchmark_id
if target_path.is_dir():
    shutil.rmtree(str(target_path))
target_path.mkdir(parents=True, exist_ok=True)

problem_class = "carps.objective_functions.hpo_b.HPOBObjectiveFunction"
n_trials = 100

combos_mid_did = [re.findall(r"\d+", res) for res in all_res]
combos_mid_did = pd.DataFrame([{"model_id": mid, "dataset_id": did} for mid, did in combos_mid_did])

rng = np.random.default_rng(seed=498415)
n_models = 10
n_datasets = 5
# check how many datasets per model id
n_d_per_model = combos_mid_did.groupby(["model_id"]).apply(len)
# filter only those model ids with enough datasets
n_d_per_model = n_d_per_model[n_d_per_model >= n_datasets]
# select 10 of those models
subselected_models = rng.choice(n_d_per_model.index, size=n_models)
combos_mid_did_reduced = combos_mid_did[combos_mid_did["model_id"].isin(subselected_models)]
# select 5 datasets for each of the 10 models
subselected_combos = combos_mid_did_reduced.groupby("model_id").sample(n=n_datasets, replace=False, random_state=rng)
assert len(subselected_combos) == (n_models * n_datasets)




def to_yaml(combos_mid_did: pd.DataFrame, identifier: str = "all"):
    for _, spec in track(combos_mid_did.iterrows(), total=len(combos_mid_did)):
        model_id, dataset_id = spec
        task_id = f"hpob/{identifier}/{model_id}/{dataset_id}"
        optimization_resources = OptimizationResources(
            n_trials=n_trials,
            time_budget=None,
            n_workers=1,
        )
        objective_function_cfg = DictConfig({
                "_target_": problem_class,
                "model_id": model_id,
                "dataset_id": dataset_id,
                "surrogates_dir": str(surrogates_dir),
                "seed": "${seed}",  # get the seed from global cfg
            })
        objective_function_cfg_tmp = objective_function_cfg.copy()
        objective_function_cfg_tmp.seed = 234
        objective_function = instantiate(objective_function_cfg_tmp)
        input_space = InputSpace(
            configuration_space=objective_function.configspace,
            fidelity_space=FidelitySpace(is_multifidelity=False, fidelity_type=None, min_budget=None, max_budget=None)
        )
        output_space = OutputSpace(
            n_objectives=1,
            objectives=["quality"],
        )
        search_space_kwargs = get_search_space_info(configspace=objective_function.configspace)
        task_metadata = TaskMetadata(
            has_constraints=False,
            domain="ML",
            objective_function_approximation="surrogate",
            has_virtual_time=False,
            deterministic=True,
            **search_space_kwargs
        )
        cfg = DictConfig({
            "benchmark_id": benchmark_id,
            "task_id": "${task.name}",
            "task": {
                "_target_": "carps.utils.task.Task",
                "name": task_id,
                "seed": "${seed}",
                "objective_function": objective_function_cfg,
                "input_space": get_dict_input_space(input_space),
                "output_space": get_dict_output_space(output_space),
                "optimization_resources": get_dict_opt_resources(optimization_resources),
                "metadata": get_dict_metadata(task_metadata),
            },
            # "task": asdict(task)

        })
        fn = target_path / f"{identifier}/cfg_{model_id}_{dataset_id}.yaml"
        fn.parent.mkdir(parents=True, exist_ok=True)
        yaml_str = OmegaConf.to_yaml(cfg=cfg)
        yaml_str = "# @package _global_\n" + yaml_str
        fn.write_text(yaml_str)

to_yaml(combos_mid_did=combos_mid_did, identifier="all")
to_yaml(combos_mid_did=subselected_combos, identifier="subset")

# HPOBench

In [5]:
import shutil
from pathlib import Path

import numpy as np
import pandas as pd
from omegaconf import DictConfig, OmegaConf
from carps.utils.generate_tasks import get_dict_input_space, get_dict_metadata, get_dict_opt_resources, get_dict_output_space
try:
    from carps.objective_functions.hpo_bench import HPOBenchObjectiveFunction
except ModuleNotFoundError:
    print("HPOBench is not installed. This means the task information can only be partly retrieved.")
    HPOBenchObjectiveFunction = None
from dataclasses import asdict

from carps.utils.task import Task
from carps.utils.task import InputSpace, OptimizationResources, OutputSpace, Task, TaskMetadata, get_search_space_info, FidelitySpace
from hydra.utils import instantiate

base_path = Path()

# Set up directories
benchmark_id_hpobench = "HPOBench"
target_path = base_path / benchmark_id_hpobench
if target_path.is_dir():
    shutil.rmtree(str(target_path))
target_path_blackbox = target_path / "blackbox"
target_path_multifidelity = target_path / "multifidelity"
target_path.mkdir(parents=True, exist_ok=True)
target_path_blackbox.mkdir(parents=True, exist_ok=True)
target_path_multifidelity.mkdir(parents=True, exist_ok=True)

problem_class = "carps.objective_functions.hpo_bench.HPOBenchObjectiveFunction"

# NO TIME BUDGET SPECIFIED ATM
# durations_path = "../../../durations.csv"
# if not Path(durations_path).is_file():
#     raise ValueError("Budgets for multi-fidelity have not yet been calculated. "\
#                      "Please run `python notebooks/define_runtimes.py` to measure "\
#                     "how long the highest budget takes on your system.")
# durations = pd.read_csv(durations_path)

# Formula from YAHPO paper
def get_n_trials(dimension: int) -> int:
    return int(np.ceil(20 + 40 * np.sqrt(dimension)))


# Create Configs for blackbox/tab/ml and multifidelity
MODELS = ["lr", "nn", "rf", "svm", "xgboost"]

TASK_IDS = [10101, 53, 146818, 146821, 9952, 146822, 31, 3917]
TASK_IDS_LARGE = [168912, 3, 167119, 12, 146212, 168911, 9981, 167120, 14965, 146606, 7592, 9977]

# Choices according to https://arxiv.org/pdf/2109.06716.pdf, page 22
UPPER = {"subsample": 1.0,
         "iter":
             {
                 "lr": 1000,
                 "nn": 243,
         },
         "n_estimators": {
             "xgboost": 2000,
             "rf": 512,
         }}
LOWER = {"subsample": 0.1,
         "iter":
             {
                 "lr": 10,
                 "nn": 3,
         },
         "n_estimators": {
             "xgboost": 50,
             "rf": 16,
         }}

FIDELITY_TYPES = {"lr": ["iter", "subsample"],
                  "nn": ["iter", "subsample"],
                  "rf": ["n_estimators", "subsample"],
                  "svm": ["subsample"],
                  "xgboost": ["n_estimators", "subsample"]}



def write_hpobench_taskconfig(
        objective_function_cfg: DictConfig,
        fid_type: str,
        lower: float | None,
        upper: float | None,
        objective_function_approximation: str,
        task_id: str,
        filename: Path,
        time_budget: float | None = None # minutes
) -> None:
    objective_function_cfg_tmp = objective_function_cfg.copy()
    objective_function_cfg_tmp.seed = 234
    objective_function = instantiate(objective_function_cfg_tmp)
    input_space = InputSpace(
        configuration_space=objective_function.configspace,
        fidelity_space=FidelitySpace(
            is_multifidelity=fid_type is not None, fidelity_type=fid_type, min_budget=lower, max_budget=upper)
    )
    output_space = OutputSpace(
        n_objectives=1,
        objectives=["quality"],
    )
    search_space_kwargs = get_search_space_info(configspace=objective_function.configspace)
    optimization_resources = OptimizationResources(
        n_trials=get_n_trials(search_space_kwargs["dimensions"]),
        time_budget=time_budget,
        n_workers=1,
    )
    task_metadata = TaskMetadata(
        has_constraints=False,
        domain="ML",
        objective_function_approximation=objective_function_approximation,
        has_virtual_time=False,
        deterministic=False,
        **search_space_kwargs
    )
    cfg = DictConfig({
        "benchmark_id": benchmark_id_hpobench,
        "task_id": "${task.name}",
        "task": {
            "_target_": "carps.utils.task.Task",
            "name": task_id,
            "seed": "${seed}",
            "objective_function": objective_function_cfg,
            "input_space": get_dict_input_space(input_space),
            "output_space": get_dict_output_space(output_space),
            "optimization_resources": get_dict_opt_resources(optimization_resources),
            "metadata": get_dict_metadata(task_metadata),
        },
    })

    yaml_str = OmegaConf.to_yaml(cfg=cfg)
    yaml_str = "# @package _global_\n" + yaml_str
    # print(yaml_str)
    filename.write_text(yaml_str)

for model in MODELS:
    # task_ids_large should be used for all models except the neural network
    tids = TASK_IDS if model=="nn" else TASK_IDS + TASK_IDS_LARGE
    for tid in tids:
        for fid_type in FIDELITY_TYPES[model] + [None]:
            if fid_type == "n_estimators":
                lower = LOWER["n_estimators"][model]
                upper = UPPER["n_estimators"][model]
            elif fid_type == "iter":
                lower = LOWER["iter"][model]
                upper = UPPER["iter"][model]
            elif fid_type == "subsample":
                lower = LOWER[fid_type]
                upper = UPPER[fid_type]
            else:
                lower = None
                upper = None

            benchtype = "tabular" if fid_type is None else "real"
            problemtype = "bb" if fid_type is None else "mf"
            task_id = f"hpobench/{problemtype}/{benchtype}/ml/{model}/{tid}"
            if fid_type is not None:
                task_id += f"/{fid_type}"

            # Create filename
            if fid_type is None:
                filename = target_path_blackbox / "tabular" / "ml" / f"cfg_ml_{model}_{tid}.yaml"
            else:
                filename = target_path_multifidelity / f"cfg_ml_{model}_{tid}_{fid_type}.yaml"
            filename.parent.mkdir(exist_ok=True, parents=True)

            # Calculate time budget for multi-fidelity (NO TIME BUDGET SPECIFIED ATM)
            # if fn in durations["config_fn"]:
            #     idx = np.where(durations["config_fn"] == fn)[0][0]
            #     duration_s = durations["duration"].iloc[idx]
            #     # As specified in the HPOBench paper:
            #     # The time budget is calculated as 100 x the avg
            #     # runtime on the highest fidelity.
            #     # We avg 5 runs on the highest fidelity to get `duration_s`.
            #     time_budget_s = duration_s * 100
            #     time_budget_min = time_budget_s / 60
            #     time_budget = time_budget_min

            objective_function_cfg = DictConfig({
                    "_target_": problem_class,
                    "model": model,
                    "task_id": tid,
                    "budget_type": fid_type,
                    "problem": None,
                    "seed": "${seed}",  # get the seed from global cfg
                })
            break
            write_hpobench_taskconfig(
                objective_function_cfg=objective_function_cfg,
                fid_type=fid_type,
                lower=lower,
                upper=upper,
                objective_function_approximation=benchtype,
                task_id=task_id,
                filename=filename,
                time_budget=None
            )


In [6]:
# Create configs for blackbox/surr
target_path_surrogate = target_path_blackbox / "surr"
target_path_surrogate.mkdir(exist_ok=True, parents=True)
MODELS = ["ParamNet", "SVM"]
DATASETS = ["Adult", "Higgs", "Letter", "Mnist", "Optdigits", "Poker"]
generated_svm_once = False
for model in MODELS:
    # task_ids_large should be used for all models except the neural network
    for dataset in DATASETS:
            benchtype = "surr"

            if model == "ParamNet": # 8 HPs, 1 int, 7 float
                # n_trials = get_n_trials(8)
                target = f"hpobench.container.benchmarks.surrogates.{model.lower()}_benchmark.{model}{dataset}OnStepsBenchmark"
                task_id = f"hpobench/bb/{benchtype}/{model}/{dataset}"
                dimension = 8

                search_space_kwargs = {
                    "dimensions": dimension,
                    "search_space_n_categoricals": 0,
                    "search_space_n_ordinals": 0,
                    "search_space_n_integers": 1,
                    "search_space_n_floats": 7,
                    "search_space_has_conditionals": False,
                    "search_space_has_forbiddens": False,
                    "search_space_has_priors": False,
                }
            elif model == "SVM":  # 2 float HPs
                if generated_svm_once:
                    continue
                # n_trials = get_n_trials(2)
                dataset = "default"
                target = "hpobench.container.benchmarks.surrogates.svm_benchmark.SurrogateSVMBenchmark"
                task_id = f"hpobench/bb/{benchtype}/{model}/{dataset}"
                generated_svm_once = True
                # dimension = 2

                # search_space_kwargs = {
                #     "dimensions": dimension,
                #     "search_space_n_categoricals": 0,
                #     "search_space_n_ordinals": 0,
                #     "search_space_n_integers": 0,
                #     "search_space_n_floats": 2,
                #     "search_space_has_conditionals": False,
                #     "search_space_has_forbiddens": False,
                #     "search_space_has_priors": False,
                # }

            objective_function_cfg = DictConfig({
                    "_target_": problem_class,
                    "model": model,
                    "task_id": dataset,
                    "budget_type": None,
                    "problem": {
                        "_target_": target,
                        "rng": "${seed}",
                    },
                    "seed": "${seed}",  # get the seed from global cfg
            })

            # objective_function_cfg_tmp = objective_function_cfg
            # objective_function_cfg_tmp.seed = 234
            # objective_function = instantiate(objective_function_cfg_tmp)

            write_hpobench_taskconfig(
                objective_function_cfg=objective_function_cfg,
                fid_type=None,
                lower=None,
                upper=None,
                objective_function_approximation="surrogate",
                task_id=task_id,
                filename=target_path_surrogate / f"cfg_{model}_{dataset}.yaml",
                time_budget=None
            )

INFO:    /etc/singularity/ exists; cleanup by system administrator is not complete (see https://apptainer.org/docs/admin/latest/singularity_migration.html)
INFO:    Environment variable SINGULARITYENV_HPOBENCH_DEBUG is set, but APPTAINERENV_HPOBENCH_DEBUG is preferred


pandas is not installed, can't download datasets for the ml.tabular_benchmarks (not needed for containers)


[INFO] BenchmarkServer at 2025-03-06 11:25:50,734 --- Logging level: 0
[INFO] Paramnet at 2025-03-06 11:25:53,793 --- Start Benchmark on dataset adult
[INFO] DataManager at 2025-03-06 11:25:53,793 --- Start to load the data from /var/lib/hpobench/data/Surrogates for dataset adult
[INFO] DataManager at 2025-03-06 11:25:54,400 --- Finished loading the data for paramenet - dataset: adult
[INFO] BenchmarkServer at 2025-03-06 11:25:54,400 --- Server: Connected Successfully
Found in item {'name': 'average_units_per_layer_log2', 'log': False, 'lower': 4.0, 'upper': 8.0}
  return decoder(item, cs, _dec)
Found in item {'name': 'batch_size_log2', 'log': False, 'lower': 3.0, 'upper': 8.0}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_0', 'log': False, 'lower': 0.0, 'upper': 0.5}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_1', 'log': False, 'lower': 0.0, 'upper': 0.5}
  return decoder(item, cs, _dec)
Found in item {'name': 'final_lr_fraction_log2', 'log': Fa

INFO:    /etc/singularity/ exists; cleanup by system administrator is not complete (see https://apptainer.org/docs/admin/latest/singularity_migration.html)
INFO:    Environment variable SINGULARITYENV_HPOBENCH_DEBUG is set, but APPTAINERENV_HPOBENCH_DEBUG is preferred


pandas is not installed, can't download datasets for the ml.tabular_benchmarks (not needed for containers)


[INFO] BenchmarkServer at 2025-03-06 11:25:58,774 --- Logging level: 0
[INFO] Paramnet at 2025-03-06 11:26:01,844 --- Start Benchmark on dataset higgs
[INFO] DataManager at 2025-03-06 11:26:01,844 --- Start to load the data from /var/lib/hpobench/data/Surrogates for dataset higgs
[INFO] DataManager at 2025-03-06 11:26:02,569 --- Finished loading the data for paramenet - dataset: higgs
[INFO] BenchmarkServer at 2025-03-06 11:26:02,570 --- Server: Connected Successfully
Found in item {'name': 'average_units_per_layer_log2', 'log': False, 'lower': 4.0, 'upper': 8.0}
  return decoder(item, cs, _dec)
Found in item {'name': 'batch_size_log2', 'log': False, 'lower': 3.0, 'upper': 8.0}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_0', 'log': False, 'lower': 0.0, 'upper': 0.5}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_1', 'log': False, 'lower': 0.0, 'upper': 0.5}
  return decoder(item, cs, _dec)
Found in item {'name': 'final_lr_fraction_log2', 'log': Fa

INFO:    /etc/singularity/ exists; cleanup by system administrator is not complete (see https://apptainer.org/docs/admin/latest/singularity_migration.html)
INFO:    Environment variable SINGULARITYENV_HPOBENCH_DEBUG is set, but APPTAINERENV_HPOBENCH_DEBUG is preferred


pandas is not installed, can't download datasets for the ml.tabular_benchmarks (not needed for containers)


[INFO] BenchmarkServer at 2025-03-06 11:26:06,918 --- Logging level: 0
[INFO] Paramnet at 2025-03-06 11:26:10,053 --- Start Benchmark on dataset letter
[INFO] DataManager at 2025-03-06 11:26:10,053 --- Start to load the data from /var/lib/hpobench/data/Surrogates for dataset letter
[INFO] DataManager at 2025-03-06 11:26:10,677 --- Finished loading the data for paramenet - dataset: letter
[INFO] BenchmarkServer at 2025-03-06 11:26:10,677 --- Server: Connected Successfully
Found in item {'name': 'average_units_per_layer_log2', 'log': False, 'lower': 4.0, 'upper': 8.0}
  return decoder(item, cs, _dec)
Found in item {'name': 'batch_size_log2', 'log': False, 'lower': 3.0, 'upper': 8.0}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_0', 'log': False, 'lower': 0.0, 'upper': 0.5}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_1', 'log': False, 'lower': 0.0, 'upper': 0.5}
  return decoder(item, cs, _dec)
Found in item {'name': 'final_lr_fraction_log2', 'log':

INFO:    /etc/singularity/ exists; cleanup by system administrator is not complete (see https://apptainer.org/docs/admin/latest/singularity_migration.html)
INFO:    Environment variable SINGULARITYENV_HPOBENCH_DEBUG is set, but APPTAINERENV_HPOBENCH_DEBUG is preferred


pandas is not installed, can't download datasets for the ml.tabular_benchmarks (not needed for containers)


[INFO] BenchmarkServer at 2025-03-06 11:26:15,140 --- Logging level: 0
[INFO] Paramnet at 2025-03-06 11:26:18,130 --- Start Benchmark on dataset mnist
[INFO] DataManager at 2025-03-06 11:26:18,130 --- Start to load the data from /var/lib/hpobench/data/Surrogates for dataset mnist
[INFO] DataManager at 2025-03-06 11:26:18,829 --- Finished loading the data for paramenet - dataset: mnist
[INFO] BenchmarkServer at 2025-03-06 11:26:18,830 --- Server: Connected Successfully
Found in item {'name': 'average_units_per_layer_log2', 'log': False, 'lower': 4.0, 'upper': 8.0}
  return decoder(item, cs, _dec)
Found in item {'name': 'batch_size_log2', 'log': False, 'lower': 3.0, 'upper': 8.0}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_0', 'log': False, 'lower': 0.0, 'upper': 0.5}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_1', 'log': False, 'lower': 0.0, 'upper': 0.5}
  return decoder(item, cs, _dec)
Found in item {'name': 'final_lr_fraction_log2', 'log': Fa

INFO:    /etc/singularity/ exists; cleanup by system administrator is not complete (see https://apptainer.org/docs/admin/latest/singularity_migration.html)
INFO:    Environment variable SINGULARITYENV_HPOBENCH_DEBUG is set, but APPTAINERENV_HPOBENCH_DEBUG is preferred


pandas is not installed, can't download datasets for the ml.tabular_benchmarks (not needed for containers)


[INFO] BenchmarkServer at 2025-03-06 11:26:23,128 --- Logging level: 0
[INFO] Paramnet at 2025-03-06 11:26:26,258 --- Start Benchmark on dataset optdigits
[INFO] DataManager at 2025-03-06 11:26:26,259 --- Start to load the data from /var/lib/hpobench/data/Surrogates for dataset optdigits
[INFO] DataManager at 2025-03-06 11:26:26,953 --- Finished loading the data for paramenet - dataset: optdigits
[INFO] BenchmarkServer at 2025-03-06 11:26:26,953 --- Server: Connected Successfully
Found in item {'name': 'average_units_per_layer_log2', 'log': False, 'lower': 4.0, 'upper': 8.0}
  return decoder(item, cs, _dec)
Found in item {'name': 'batch_size_log2', 'log': False, 'lower': 3.0, 'upper': 8.0}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_0', 'log': False, 'lower': 0.0, 'upper': 0.5}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_1', 'log': False, 'lower': 0.0, 'upper': 0.5}
  return decoder(item, cs, _dec)
Found in item {'name': 'final_lr_fraction_log2

INFO:    /etc/singularity/ exists; cleanup by system administrator is not complete (see https://apptainer.org/docs/admin/latest/singularity_migration.html)
INFO:    Environment variable SINGULARITYENV_HPOBENCH_DEBUG is set, but APPTAINERENV_HPOBENCH_DEBUG is preferred


pandas is not installed, can't download datasets for the ml.tabular_benchmarks (not needed for containers)


[INFO] BenchmarkServer at 2025-03-06 11:26:31,365 --- Logging level: 0
[INFO] Paramnet at 2025-03-06 11:26:34,413 --- Start Benchmark on dataset poker
[INFO] DataManager at 2025-03-06 11:26:34,414 --- Start to load the data from /var/lib/hpobench/data/Surrogates for dataset poker
[INFO] DataManager at 2025-03-06 11:26:35,053 --- Finished loading the data for paramenet - dataset: poker
[INFO] BenchmarkServer at 2025-03-06 11:26:35,053 --- Server: Connected Successfully
Found in item {'name': 'average_units_per_layer_log2', 'log': False, 'lower': 4.0, 'upper': 8.0}
  return decoder(item, cs, _dec)
Found in item {'name': 'batch_size_log2', 'log': False, 'lower': 3.0, 'upper': 8.0}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_0', 'log': False, 'lower': 0.0, 'upper': 0.5}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_1', 'log': False, 'lower': 0.0, 'upper': 0.5}
  return decoder(item, cs, _dec)
Found in item {'name': 'final_lr_fraction_log2', 'log': Fa

INFO:    /etc/singularity/ exists; cleanup by system administrator is not complete (see https://apptainer.org/docs/admin/latest/singularity_migration.html)
INFO:    Environment variable SINGULARITYENV_HPOBENCH_DEBUG is set, but APPTAINERENV_HPOBENCH_DEBUG is preferred


pandas is not installed, can't download datasets for the ml.tabular_benchmarks (not needed for containers)


[INFO] BenchmarkServer at 2025-03-06 11:26:39,955 --- Logging level: 0
[INFO] DataManager at 2025-03-06 11:26:42,491 --- Start to load the data from /var/lib/hpobench/data/Surrogates for dataset svm
[INFO] DataManager at 2025-03-06 11:26:43,153 --- Finished loading the data for paramenet - dataset: svm
[INFO] BenchmarkServer at 2025-03-06 11:26:43,159 --- Server: Connected Successfully
Found in item {'name': 'C', 'log': False, 'lower': -10.0, 'upper': 10.0}
  return decoder(item, cs, _dec)
Found in item {'name': 'gamma', 'log': False, 'lower': -10.0, 'upper': 10.0}
  return decoder(item, cs, _dec)


In [7]:
# Create Configs for blackbox/tab/nas
target_path_nas = target_path_blackbox / "tab" / "nas"
target_path_nas.mkdir(exist_ok=True, parents=True)

benchmarks = ["SliceLocalizationBenchmark", "ProteinStructureBenchmark", "NavalPropulsionBenchmark", "ParkinsonsTelemonitoringBenchmark",
                "NASCifar10ABenchmark", "NASCifar10BBenchmark", "NASCifar10CBenchmark",
                "Cifar10ValidNasBench201Benchmark", "Cifar100NasBench201Benchmark", "ImageNetNasBench201Benchmark",
                # "NASBench1shot1SearchSpace1Benchmark", "NASBench1shot1SearchSpace2Benchmark", "NASBench1shot1SearchSpace3Benchmark"
                ]

for benchmark in benchmarks:
    if benchmark in ["SliceLocalizationBenchmark", "ProteinStructureBenchmark", "NavalPropulsionBenchmark", "ParkinsonsTelemonitoringBenchmark"]:
        benchmark_file = "tabular_benchmarks"
    elif benchmark in ["NASCifar10ABenchmark", "NASCifar10BBenchmark", "NASCifar10CBenchmark"]:
        benchmark_file = "nasbench_101"
    elif benchmark in ["Cifar10ValidNasBench201Benchmark", "Cifar100NasBench201Benchmark", "ImageNetNasBench201Benchmark"]:
        benchmark_file = "nasbench_201"
    elif benchmark in ["NASBench1shot1SearchSpace1Benchmark", "NASBench1shot1SearchSpace2Benchmark", "NASBench1shot1SearchSpace3Benchmark"]:
        benchmark_file = "nasbench_1shot1"
    target = f"hpobench.container.benchmarks.nas.{benchmark_file}.{benchmark}"
    task_id = f"hpobench/bb/tab/nas/{benchmark}"

    write_hpobench_taskconfig(
        objective_function_cfg=DictConfig({
            "_target_": problem_class,
            "problem": {
                "_target_": target,
                "rng": "${seed}",
            },
            "seed": "${seed}",  # get the seed from global cfg
        }),
        fid_type=None,
        lower=None,
        upper=None,
        objective_function_approximation="tabular",
        task_id=task_id,
        filename=target_path_nas / f"cfg_{benchmark}.yaml",
        time_budget=None
    )

INFO:    /etc/singularity/ exists; cleanup by system administrator is not complete (see https://apptainer.org/docs/admin/latest/singularity_migration.html)
INFO:    Environment variable SINGULARITYENV_HPOBENCH_DEBUG is set, but APPTAINERENV_HPOBENCH_DEBUG is preferred



[INFO] BenchmarkServer at 2025-03-06 11:27:10,484 --- Logging level: 0
[INFO] BenchmarkServer at 2025-03-06 11:27:13,593 --- Server: Connected Successfully
Found in item {'name': 'activation_fn_1', 'choices': ['tanh', 'relu'], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'activation_fn_2', 'choices': ['tanh', 'relu'], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'batch_size', 'sequence': [8, 16, 32, 64]}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_1', 'sequence': [0.0, 0.3, 0.6]}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_2', 'sequence': [0.0, 0.3, 0.6]}
  return decoder(item, cs, _dec)
Found in item {'name': 'init_lr', '

INFO:    /etc/singularity/ exists; cleanup by system administrator is not complete (see https://apptainer.org/docs/admin/latest/singularity_migration.html)
INFO:    Environment variable SINGULARITYENV_HPOBENCH_DEBUG is set, but APPTAINERENV_HPOBENCH_DEBUG is preferred



[INFO] BenchmarkServer at 2025-03-06 11:27:22,276 --- Logging level: 0
[INFO] BenchmarkServer at 2025-03-06 11:27:26,059 --- Server: Connected Successfully
Found in item {'name': 'activation_fn_1', 'choices': ['tanh', 'relu'], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'activation_fn_2', 'choices': ['tanh', 'relu'], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'batch_size', 'sequence': [8, 16, 32, 64]}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_1', 'sequence': [0.0, 0.3, 0.6]}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_2', 'sequence': [0.0, 0.3, 0.6]}
  return decoder(item, cs, _dec)
Found in item {'name': 'init_lr', '

INFO:    /etc/singularity/ exists; cleanup by system administrator is not complete (see https://apptainer.org/docs/admin/latest/singularity_migration.html)
INFO:    Environment variable SINGULARITYENV_HPOBENCH_DEBUG is set, but APPTAINERENV_HPOBENCH_DEBUG is preferred



[INFO] BenchmarkServer at 2025-03-06 11:27:34,760 --- Logging level: 0
[INFO] BenchmarkServer at 2025-03-06 11:27:38,532 --- Server: Connected Successfully
Found in item {'name': 'activation_fn_1', 'choices': ['tanh', 'relu'], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'activation_fn_2', 'choices': ['tanh', 'relu'], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'batch_size', 'sequence': [8, 16, 32, 64]}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_1', 'sequence': [0.0, 0.3, 0.6]}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_2', 'sequence': [0.0, 0.3, 0.6]}
  return decoder(item, cs, _dec)
Found in item {'name': 'init_lr', '

INFO:    /etc/singularity/ exists; cleanup by system administrator is not complete (see https://apptainer.org/docs/admin/latest/singularity_migration.html)
INFO:    Environment variable SINGULARITYENV_HPOBENCH_DEBUG is set, but APPTAINERENV_HPOBENCH_DEBUG is preferred



[INFO] BenchmarkServer at 2025-03-06 11:27:47,044 --- Logging level: 0
[INFO] BenchmarkServer at 2025-03-06 11:27:51,024 --- Server: Connected Successfully
Found in item {'name': 'activation_fn_1', 'choices': ['tanh', 'relu'], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'activation_fn_2', 'choices': ['tanh', 'relu'], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'batch_size', 'sequence': [8, 16, 32, 64]}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_1', 'sequence': [0.0, 0.3, 0.6]}
  return decoder(item, cs, _dec)
Found in item {'name': 'dropout_2', 'sequence': [0.0, 0.3, 0.6]}
  return decoder(item, cs, _dec)
Found in item {'name': 'init_lr', '

INFO:    /etc/singularity/ exists; cleanup by system administrator is not complete (see https://apptainer.org/docs/admin/latest/singularity_migration.html)
INFO:    Environment variable SINGULARITYENV_HPOBENCH_DEBUG is set, but APPTAINERENV_HPOBENCH_DEBUG is preferred



[INFO] BenchmarkServer at 2025-03-06 11:28:00,950 --- Logging level: 0
[INFO] DataManager at 2025-03-06 11:28:03,465 --- NasBench101DataManager: Data already available. Skip downloading.
Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Loading dataset from file... This may take a few minutes...
Loaded dataset in 208 seconds


[INFO] BenchmarkServer at 2025-03-06 11:31:32,466 --- Server: Connected Successfully
Found in item {'name': 'edge_0', 'choices': [0, 1], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'edge_1', 'choices': [0, 1], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'edge_10', 'choices': [0, 1], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'edge_11', 'choices': [0, 1], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'edge_12', 'choices': [0, 1], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'edge_13', 'choices': [0, 1], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'edge_14', 'choices': [0, 1], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'edge_15', 'choices': [0, 1], 'weights': None}
  return decoder(item, cs, _dec)
Found in item {'name': 'edge_16', 'choices': [0, 1], 'weights': None}
  return decoder(item, 

INFO:    /etc/singularity/ exists; cleanup by system administrator is not complete (see https://apptainer.org/docs/admin/latest/singularity_migration.html)
INFO:    Environment variable SINGULARITYENV_HPOBENCH_DEBUG is set, but APPTAINERENV_HPOBENCH_DEBUG is preferred



[INFO] BenchmarkServer at 2025-03-06 11:31:41,907 --- Logging level: 0
[INFO] DataManager at 2025-03-06 11:31:44,942 --- NasBench101DataManager: Data already available. Skip downloading.
Instructions for updating:
Use eager execution and: 
`tf.data.TFRecordDataset(path)`


Loading dataset from file... This may take a few minutes...


KeyboardInterrupt: 

In [None]:
# -> Cartpole removed for now, as the benchmark does not run

# Create Configs for blackbox/real/cartpole
# target_path_nas = target_path_blackbox / "real" / "cartpole"
#
# benchmark_dimensions = {
#     "CartpoleReduced": {
#         "dimensions": 7,
#         "search_space_n_categoricals": 0,
#         "search_space_n_ordinals": 0,
#         "search_space_n_integers": 3,
#         "search_space_n_floats": 4
#     },
#     "CartpoleFull": {
#         "dimensions": 7,
#         "search_space_n_categoricals": 0,
#         "search_space_n_ordinals": 0,
#         "search_space_n_integers": 3,
#         "search_space_n_floats": 4
#     },
# }
#
# for benchmark, dimensions_dict in benchmark_dimensions.items():
#     target = f"hpobench.container.benchmarks.rl.cartpole.{benchmark}"
#     task_id = f"hpobench/bb/real/rl/{benchmark}"
#
#     n_trials = get_n_trials(dimensions_dict["dimensions"])
#
#     search_space_kwargs = {
#         "dimensions": dimensions_dict["dimensions"],
#         "search_space_n_categoricals": dimensions_dict["search_space_n_categoricals"],
#         "search_space_n_ordinals": dimensions_dict["search_space_n_ordinals"],
#         "search_space_n_integers": dimensions_dict["search_space_n_integers"],
#         "search_space_n_floats": dimensions_dict["search_space_n_floats"],
#         "search_space_has_conditionals": False,
#         "search_space_has_forbiddens": False,
#         "search_space_has_priors": False,
#     }
#
#     task = Task(
#         n_trials=n_trials,
#         time_budget=None,
#         n_workers=1,
#         n_objectives=1,
#         objectives=["quality"],
#         is_multifidelity=False,
#         fidelity_type=None,
#         min_budget=None,
#         max_budget=None,
#         has_constraints=False,
#         domain="RL",
#         objective_function_approximation="real",
#         has_virtual_time=True,
#         deterministic=True,
#         **search_space_kwargs
#     )
#
#     cfg = DictConfig({
#         "benchmark_id": benchmark_id_hpobench,
#         "task_id": task_id,
#         "problem": {
#                 "_target_": problem_class,
#                 "problem": {
#                     "_target_": target,
#                     "rng": 1,#"${seed}",
#                 },
#                 "seed": "${seed}",  # get the seed from global cfg
#         },
#         "task": asdict(task)
#
#     })
#
#     fn = target_path_blackbox / f"real/rl/cfg_real_rl_{benchmark}.yaml"
#     fn.parent.mkdir(exist_ok=True, parents=True)
#     yaml_str = OmegaConf.to_yaml(cfg=cfg)
#     yaml_str = "# @package _global_\n" + yaml_str
#     fn.write_text(yaml_str)

# YAHPO

In [1]:
import contextlib
import itertools
import shutil
from dataclasses import asdict
from pathlib import Path
from carps.utils.generate_tasks import get_dict_input_space, get_dict_metadata, get_dict_opt_resources, get_dict_output_space

import numpy as np
from carps.utils.task import InputSpace, OptimizationResources, OutputSpace, TaskMetadata, get_search_space_info, FidelitySpace
from hydra.utils import instantiate
from omegaconf import DictConfig, OmegaConf
from rich.progress import Progress
# from yahpo_gym.configuration import config_dict

base_path = Path()

target_path = base_path / "YAHPO"

if target_path.is_dir():
    shutil.rmtree(str(target_path))
target_path_blackbox = target_path / "blackbox"
target_path_multifidelity = target_path / "multifidelity"
target_path.mkdir(parents=True, exist_ok=True)
target_path_blackbox.mkdir(parents=True, exist_ok=True)
target_path_multifidelity.mkdir(parents=True, exist_ok=True)

problem_class = "carps.objective_functions.yahpo.YahpoObjectiveFunction"

COMBIS = {"lcbench": ["3945", "7593", "34539", "126025", "126026", "126029", "146212", "167104",
                      "167149", "167152", "167161", "167168", "167181", "167184", "167185",
                      "167190", "167200", "167201", "168329", "168330", "168331", "168335",
                      "168868", "168908", "168910", "189354", "189862", "189865", "189866",
                      "189873", "189905", "189906", "189908", "189909"],
        #   "fcnet": ["fcnet_naval_propulsion", "fcnet_protein_structure",
                    # "fcnet_slice_localization", "fcnet_parkinsons_telemonitoring"],
          "nb301": ["CIFAR10"],
          "rbv2_svm": ["40981", "4134", "1220", "40978", "40966", "40536", "41156", "458",
                       "41157", "40975", "40994", "1468", "6332", "40670", "151", "1475",
                       "1476", "1478", "1479", "41212", "1480", "1053", "1067", "1056", "12",
                       "1487", "1068", "32", "470", "312", "38", "40982", "50", "41216", "307",
                       "40498", "181", "1464", "41164", "16", "1461", "41162", "6", "14",
                       "1494", "54", "375", "1590", "23", "41163", "1111", "41027", "40668",
                       "41138", "4135", "4538", "40496", "4534", "40900", "1457", "11", "1462",
                       "41142", "40701", "29", "37", "23381", "188", "41143", "1063", "3", "18",
                       "40979", "22", "1515", "334", "24", "1493", "28", "1050", "1049",
                       "40984", "40685", "42", "44", "46", "1040", "41146", "377", "40499",
                       "1497", "60", "40983", "4154", "469", "31", "41278", "1489", "1501",
                       "15", "300", "1485", "1486", "1510", "182", "41169"],
          "rbv2_ranger": ["4135", "40981", "4134", "1220", "4154", "4538", "40978", "375",
                          "40496", "40966", "4534", "40900", "40536", "41156", "1590", "1457",
                          "458", "469", "41157", "11", "1461", "1462", "1464", "15", "40975",
                          "41142", "40701", "40994", "23", "1468", "40668", "29", "31", "6332",
                          "37", "40670", "23381", "151", "188", "41164", "1475", "1476", "1478",
                          "1479", "41212", "1480", "41143", "1053", "41027", "1067", "1063",
                          "3", "6", "1485", "1056", "12", "14", "16", "18", "40979", "22",
                          "1515", "334", "24", "1486", "41278", "28", "1487", "1068", "1050",
                          "1049", "32", "1489", "470", "1494", "182", "312", "40984", "1501",
                          "40685", "38", "42", "44", "46", "40982", "1040", "41146", "377",
                          "40499", "50", "54", "41216", "307", "1497", "60", "1510", "40983",
                          "40498", "181", "41138", "41163", "1111", "41159", "300", "41162",
                          "23517", "41165", "4541", "41161", "41166", "40927", "41150", "23512",
                          "41168", "1493", "40996", "554", "40923", "41169"],
          "rbv2_rpart": ["41138", "4135", "40981", "4134", "40927", "1220", "4154", "40923",
                         "41163", "40996", "4538", "40978", "375", "1111", "40496", "40966",
                         "41150", "4534", "40900", "40536", "41156", "1590", "1457", "458",
                         "469", "41157", "11", "1461", "1462", "1464", "15", "40975", "41142",
                         "40701", "40994", "23", "1468", "40668", "29", "31", "6332", "37",
                         "4541", "40670", "23381", "151", "188", "41164", "1475", "1476",
                         "41159", "1478", "41169", "23512", "1479", "41212", "1480", "300",
                         "41168", "41143", "1053", "41027", "1067", "1063", "41162", "3", "6",
                         "1485", "1056", "12", "14", "16", "18", "40979", "22", "1515", "554",
                         "334", "24", "1486", "23517", "1493", "28", "1487", "1068", "1050",
                         "1049", "32", "1489", "470", "1494", "41161", "41165", "182", "312",
                         "40984", "1501", "40685", "38", "42", "44", "46", "40982", "1040",
                         "41146", "377", "40499", "50", "54", "41166", "307", "1497", "60",
                         "1510", "40983", "40498", "181"],
          "rbv2_glmnet": ["41138", "4135", "40981", "4134", "1220", "4154", "41163", "4538",
                          "40978", "375", "1111", "40496", "40966", "41150", "4534", "40900",
                          "40536", "41156", "1590", "1457", "458", "469", "41157", "11", "1461",
                          "1462", "1464", "15", "40975", "41142", "40701", "40994", "23",
                          "1468", "40668", "29", "31", "6332", "37", "4541", "40670", "23381",
                          "151", "188", "41164", "1475", "1476", "41159", "1478", "41169",
                          "23512", "1479", "41212", "1480", "300", "41168", "41143", "1053",
                          "41027", "1067", "1063", "41162", "3", "6", "1485", "1056", "12",
                          "14", "16", "18", "40979", "22", "1515", "334", "24", "1486", "23517",
                          "41278", "1493", "28", "1487", "1068", "1050", "1049", "32", "1489",
                          "470", "1494", "41161", "182", "312", "40984", "1501", "40685", "38",
                          "42", "44", "46", "40982", "1040", "41146", "377", "40499", "50",
                          "54", "41216", "41166", "307", "1497", "60", "1510", "40983", "40498",
                          "181", "554"],
          "rbv2_xgboost": ["16", "40923", "41143", "470", "1487", "40499", "40966", "41164",
                           "1497", "40975", "1461", "41278", "11", "54", "300", "40984", "31",
                           "1067", "1590", "40983", "41163", "41165", "182", "1220", "41159",
                           "41169", "42", "188", "1457", "1480", "6332", "181", "1479", "40670",
                           "40536", "41138", "41166", "6", "14", "29", "458", "1056", "1462",
                           "1494", "40701", "12", "1493", "44", "307", "334", "40982", "41142",
                           "38", "1050", "469", "23381", "41157", "15", "4541", "23", "4134",
                           "40927", "40981", "41156", "3", "1049", "40900", "1063", "23512",
                           "40979", "1040", "1068", "41161", "22", "1489", "41027", "24",
                           "4135", "23517", "1053", "1468", "312", "377", "1515", "18", "1476",
                           "1510", "41162", "28", "375", "1464", "40685", "40996", "41146",
                           "41216", "40668", "41212", "32", "60", "4538", "40496", "41150",
                           "37", "46", "554", "1475", "1485", "1501", "1111", "4534", "41168",
                           "151", "4154", "40978", "40994", "50", "1478", "1486", "40498"],
          "rbv2_aknn": ["41138", "40981", "4134", "40927", "1220", "4154", "41163", "40996",
                        "4538", "40978", "375", "1111", "40496", "40966", "41150", "4534",
                        "40900", "40536", "41156", "1590", "1457", "458", "469", "41157", "11",
                        "1461", "1462", "1464", "15", "40975", "41142", "40701", "40994", "23",
                        "1468", "40668", "29", "31", "6332", "37", "4541", "40670", "23381",
                        "151", "188", "41164", "1475", "1476", "41159", "1478", "41169",
                        "23512", "1479", "41212", "1480", "300", "41168", "41143", "1053",
                        "41027", "1067", "1063", "41162", "3", "6", "1485", "1056", "12", "14",
                        "16", "18", "40979", "22", "1515", "554", "334", "24", "1486", "23517",
                        "41278", "1493", "28", "1487", "1068", "1050", "1049", "32", "1489",
                        "470", "1494", "41161", "41165", "182", "312", "40984", "1501", "40685",
                        "38", "42", "44", "46", "40982", "1040", "41146", "377", "40499", "50",
                        "54", "41216", "41166", "307", "1497", "60", "1510", "40983", "40498",
                        "181", "40923"],
          "rbv2_super": ["41138", "40981", "4134", "1220", "4154", "41163", "4538", "40978",
                         "375", "1111", "40496", "40966", "4534", "40900", "40536", "41156",
                         "1590", "1457", "458", "469", "41157", "11", "1461", "1462", "1464",
                         "15", "40975", "41142", "40701", "40994", "23", "1468", "40668", "29",
                         "31", "6332", "37", "40670", "23381", "151", "188", "41164", "1475",
                         "1476", "1478", "41169", "1479", "41212", "1480", "300", "41143",
                         "1053", "41027", "1067", "1063", "41162", "3", "6", "1485", "1056",
                         "12", "14", "16", "18", "40979", "22", "1515", "334", "24", "1486",
                         "1493", "28", "1487", "1068", "1050", "1049", "32", "1489", "470",
                         "1494", "182", "312", "40984", "1501", "40685", "38", "42", "44", "46",
                         "40982", "1040", "41146", "377", "40499", "50", "54", "307", "1497",
                         "60", "1510", "40983", "40498", "181"],
          "iaml_ranger": ["40981", "41146", "1489", "1067"],
          "iaml_rpart": ["40981", "41146", "1489", "1067"],
          "iaml_glmnet": ["40981", "41146", "1489", "1067"],
          "iaml_xgboost": ["40981", "41146", "1489", "1067"],
          "iaml_super": ["40981", "41146", "1489", "1067"]}

UPPER = {"trainsize": 1.0,
         "repl": 10,
         "epochs": {
             "lcbench": 52,
             "fcnet": 100,
             "nb301": 98,
         }}
LOWER = {"trainsize": 0.03,
         "repl": 1,
         "epochs": {
             "lcbench": 1,
             "fcnet": 1,
             "nb301": 1,
         }}

FIDELITY_TYPES = {"lcbench": ["epoch"],
                  "fcnet": ["epoch"],
                  "nb301": ["epoch"],
                  "rbv2_svm": ["trainsize", "repl"],
                  "rbv2_ranger": ["trainsize", "repl"],
                  "rbv2_rpart": ["trainsize", "repl"],
                  "rbv2_glmnet": ["trainsize", "repl"],
                  "rbv2_xgboost": ["trainsize", "repl"],
                  "rbv2_aknn": ["trainsize", "repl"],
                  "rbv2_super": ["trainsize", "repl"],
                  "iaml_ranger": ["trainsize"],
                  "iaml_rpart": ["trainsize"],
                  "iaml_glmnet": ["trainsize"],
                  "iaml_xgboost": ["trainsize"],
                  "iaml_super": ["trainsize"]}

METRICS = {"lcbench": ["time", "val_accuracy", "val_cross_entropy", "val_balanced_accuracy",
                       "test_cross_entropy", "test_balanced_accuracy"],
           "fcnet": ["valid_loss", "valid_mse", "runtime", "n_params"],
           "nb301": ["val_accuracy", "runtime"],
           "rbv2_svm": ["acc", "bac", "auc", "brier", "f1", "logloss", "timetrain", "timepredict",
                        "memory"],
           "rbv2_ranger": ["acc", "bac", "auc", "brier", "f1", "logloss", "timetrain",
                           "timepredict", "memory"],
           "rbv2_rpart": ["acc", "bac", "auc", "brier", "f1", "logloss", "timetrain", "timepredict",
                          "memory"],
           "rbv2_glmnet": ["acc", "bac", "auc", "brier", "f1", "logloss", "timetrain",
                           "timepredict", "memory"],
           "rbv2_xgboost": ["acc", "bac", "auc", "brier", "f1", "logloss", "timetrain",
                            "timepredict", "memory"],
           "rbv2_aknn": ["acc", "bac", "auc", "brier", "f1", "logloss", "timetrain", "timepredict",
                         "memory"],
           "rbv2_super": ["acc", "bac", "auc", "brier", "f1", "logloss", "timetrain", "timepredict",
                          "memory"],
           "iaml_ranger": ["mmce", "f1", "auc", "logloss", "ramtrain", "rammodel", "rampredict",
                           "timetrain", "timepredict", "mec", "ias", "nf"],
           "iaml_rpart": ["mmce", "f1", "auc", "logloss", "ramtrain", "rammodel", "rampredict",
                          "timetrain", "timepredict", "mec", "ias", "nf"],
           "iaml_glmnet": ["mmce", "f1", "auc", "logloss", "ramtrain", "rammodel", "rampredict",
                           "timetrain", "timepredict", "mec", "ias", "nf"],
           "iaml_xgboost": ["mmce", "f1", "auc", "logloss", "ramtrain", "rammodel", "rampredict",
                            "timetrain", "timepredict", "mec", "ias", "nf"],
           "iaml_super": ["mmce", "f1", "auc", "logloss", "ramtrain", "rammodel", "rampredict",
                          "timetrain", "timepredict", "mec", "ias", "nf"]}

# select your prefered target variable
iaml = "f1"
rbv2 = "acc"
TARGETMETRIC = {"lcbench": "val_accuracy",
                "fcnet": "valid_mse",
                "nb301": "val_accuracy",
                "rbv2_svm": rbv2,
                "rbv2_ranger": rbv2,
                "rbv2_rpart": rbv2,
                "rbv2_glmnet": rbv2,
                "rbv2_xgboost": rbv2,
                "rbv2_aknn": rbv2,
                "rbv2_super": rbv2,
                "iaml_ranger": iaml,
                "iaml_rpart": iaml,
                "iaml_glmnet": iaml,
                "iaml_xgboost": iaml,
                "iaml_super": iaml}

def get_n_trials(dimension: int) -> int:
    return int(np.ceil(20 + 40 * np.sqrt(dimension)))

def write_yahpo_taskconfig(
        objective_function_cfg: DictConfig,
        fid_type: str,
        lower: float | None,
        upper: float | None,
        objective_function_approximation: str,
        task_id: str,
        filename: Path,
        benchmark_id: str,
        metric: list[str],
        time_budget: float | None = None # minutes
) -> None:
    # # Suppress "cite me" print statement
    # cfg = config_dict.get_item(objective_function_cfg.bench)
    # cfg.config["citation"] = None
    # config_dict.update({objective_function_cfg.bench: cfg})


    objective_function_cfg_tmp = objective_function_cfg.copy()
    objective_function_cfg_tmp.seed = 234
    with contextlib.redirect_stdout(None):
        objective_function = instantiate(objective_function_cfg_tmp)
    input_space = InputSpace(
        configuration_space=objective_function.configspace,
        fidelity_space=FidelitySpace(is_multifidelity=fid_type is not None, fidelity_type=fid_type, min_budget=lower, max_budget=upper)
    )
    output_space = OutputSpace(
        n_objectives=len(metric),
        objectives=metric,
    )
    search_space_kwargs = get_search_space_info(configspace=objective_function.configspace)
    optimization_resources = OptimizationResources(
        n_trials=get_n_trials(search_space_kwargs["dimensions"]),
        time_budget=time_budget,
        n_workers=1,
    )
    task_metadata = TaskMetadata(
        has_constraints=False,
        domain="ML",
        objective_function_approximation=objective_function_approximation,
        has_virtual_time=False,
        deterministic=False,
        **search_space_kwargs
    )
    cfg = DictConfig({
        "benchmark_id": benchmark_id,
        "task_id": "${task.name}",
        "task": {
            "_target_": "carps.utils.task.Task",
            "name": task_id,
            "seed": "${seed}",
            "objective_function": objective_function_cfg,
            "input_space": get_dict_input_space(input_space),
            "output_space": get_dict_output_space(output_space),
            "optimization_resources": get_dict_opt_resources(optimization_resources),
            "metadata": get_dict_metadata(task_metadata),
        },
    })


    yaml_str = OmegaConf.to_yaml(cfg=cfg)
    yaml_str = "# @package _global_\n" + yaml_str
    # print(yaml_str)
    filename.parent.mkdir(exist_ok=True, parents=True)
    filename.write_text(yaml_str)

In [None]:
time_budget = None

n = 0

def write_y(instance, fid_type, bench):
    if fid_type == "epoch":
        lower = LOWER["epochs"][bench]
        upper = UPPER["epochs"][bench]
    elif fid_type is None:
        lower = None
        upper = None
    else:
        lower = LOWER[fid_type]
        upper = UPPER[fid_type]

    task_id = f"yahpo/{bench}/{instance}/{fid_type}"

    if fid_type is None:
        filename = target_path_blackbox / f"cfg_{bench}_{instance}.yaml"
    else:
        filename = target_path_multifidelity / "all" / f"cfg_{bench}_{instance}_{fid_type}.yaml"

    metric = TARGETMETRIC[bench]

    write_yahpo_taskconfig(
        objective_function_cfg=DictConfig({
            "_target_": problem_class,
            "bench": bench,
            "instance": instance,
            "budget_type": fid_type,
            "metric": metric,
            "seed": "${seed}",
        }),
        fid_type=fid_type,
        lower=lower,
        upper=upper,
        objective_function_approximation="surrogate",
        task_id=task_id,
        filename=filename,
        time_budget=time_budget,
        benchmark_id="YAHPO",
        metric=metric,
    )

from multiprocessing import Pool, Manager

# with Progress(redirect_stdout=False) as progress:
#     task = progress.add_task("[cyan]Processing algo on dataset", total=len(COMBIS))
#     for bench, instance_list in COMBIS.items():
#         fidelity_list = FIDELITY_TYPES[bench] + [None]
#         combos = list(itertools.product(instance_list, fidelity_list))
#         with Pool() as p:
#             p.starmap(write_y, itertools.product(instance_list, fidelity_list, [bench])) 

#         progress.update(task, advance=1)  # Advance the task by 1 unit

def worker_task(instance, fidelity, bench, progress_queue):
    # Simulate the work done by the worker
    write_y(instance, fidelity, bench)
    progress_queue.put(1)  # Notify that one task is complete

with Progress(redirect_stdout=False) as progress:
    task = progress.add_task("[cyan]Processing algo on dataset", total=len(COMBIS))

    # Create a multiprocessing manager to share data between processes
    with Manager() as manager:
        progress_queue = manager.Queue()  # Queue to track progress in workers
        for bench, instance_list in COMBIS.items():
            fidelity_list = FIDELITY_TYPES[bench] + [None]
            combos = list(itertools.product(instance_list, fidelity_list))

            # Inner progress bar for the multiprocessing part
            inner_task = progress.add_task(f"[green]Processing {bench}", total=len(combos))

            with Pool(processes=1) as pool:
                # Use starmap to apply the worker_task function in parallel
                pool.starmap(worker_task, itertools.product(instance_list, fidelity_list, [bench], [progress_queue]))

            # Update the inner progress bar
            while not progress_queue.empty():
                progress_queue.get()  # Consume the completed tasks from the queue
                progress.update(inner_task, advance=1)

            # Update the outer progress bar after the inner loop completes
            progress.update(task, advance=1)

Output()

In [2]:
# YAHPO gym Single-objective collection
yahpo_gym_so_collection = [
    {"id": 1, "scenario": "lcbench", "instance": "167168", "target": "val_accuracy", "rho": 0.94, "budget": 126},
    {"id": 2, "scenario": "lcbench", "instance": "189873", "target": "val_accuracy", "rho": 0.97, "budget": 126},
    {"id": 3, "scenario": "lcbench", "instance": "189906", "target": "val_accuracy", "rho": 0.97, "budget": 126},
    {"id": 4, "scenario": "nb301", "instance": "CIFAR10", "target": "val_accuracy", "rho": 0.98, "budget": 250},
    {"id": 5, "scenario": "rbv2_glmnet", "instance": "375", "target": "acc", "rho": 0.80, "budget": 90},
    {"id": 6, "scenario": "rbv2_glmnet", "instance": "458", "target": "acc", "rho": 0.85, "budget": 90},
    {"id": 7, "scenario": "rbv2_ranger", "instance": "16", "target": "acc", "rho": 0.93, "budget": 134},
    {"id": 8, "scenario": "rbv2_ranger", "instance": "42", "target": "acc", "rho": 0.98, "budget": 134},
    {"id": 9, "scenario": "rbv2_rpart", "instance": "14", "target": "acc", "rho": 0.92, "budget": 110},
    {"id": 10, "scenario": "rbv2_rpart", "instance": "40499", "target": "acc", "rho": 0.97, "budget": 110},
    {"id": 11, "scenario": "rbv2_super", "instance": "1053", "target": "acc", "rho": 0.31, "budget": 267},
    {"id": 12, "scenario": "rbv2_super", "instance": "1457", "target": "acc", "rho": 0.70, "budget": 267},
    {"id": 13, "scenario": "rbv2_super", "instance": "1063", "target": "acc", "rho": 0.57, "budget": 267},
    {"id": 14, "scenario": "rbv2_super", "instance": "1479", "target": "acc", "rho": 0.36, "budget": 267},
    {"id": 15, "scenario": "rbv2_super", "instance": "15", "target": "acc", "rho": 0.75, "budget": 267},
    {"id": 16, "scenario": "rbv2_super", "instance": "1468", "target": "acc", "rho": 0.77, "budget": 267},
    {"id": 17, "scenario": "rbv2_xgboost", "instance": "12", "target": "acc", "rho": 0.93, "budget": 170},
    {"id": 18, "scenario": "rbv2_xgboost", "instance": "1501", "target": "acc", "rho": 0.89, "budget": 170},
    {"id": 19, "scenario": "rbv2_xgboost", "instance": "16", "target": "acc", "rho": 0.91, "budget": 170},
    {"id": 20, "scenario": "rbv2_xgboost", "instance": "40499", "target": "acc", "rho": 0.96, "budget": 170},
]
target_path_soc = target_path / "SO"

# YAHPO Gym multi-objective
yahpo_gym_mo_collection = [
    {"id": 1, "scenario": "iaml_glmnet", "instance": "1489", "target": ["mmce", "nf"], "rho": 0.86, "budget": 77},
    {"id": 2, "scenario": "iaml_glmnet", "instance": "1067", "target": ["mmce", "nf"], "rho": 0.73, "budget": 77},
    {"id": 3, "scenario": "iaml_ranger", "instance": "1489", "target": ["mmce", "nf", "ias"], "rho": 0.93, "budget": 134},
    {"id": 4, "scenario": "iaml_ranger", "instance": "1067", "target": ["mmce", "nf", "ias"], "rho": 0.92, "budget": 134},
    {"id": 5, "scenario": "iaml_super", "instance": "1489", "target": ["mmce", "nf", "ias"], "rho": 0.82, "budget": 232},
    {"id": 6, "scenario": "iaml_super", "instance": "1067", "target": ["mmce", "nf", "ias"], "rho": 0.82, "budget": 232},
    {"id": 7, "scenario": "iaml_xgboost", "instance": "40981", "target": ["mmce", "nf", "ias"], "rho": 0.88, "budget": 165},
    {"id": 8, "scenario": "iaml_xgboost", "instance": "1489", "target": ["mmce", "nf", "ias"], "rho": 0.92, "budget": 165},
    {"id": 9, "scenario": "iaml_xgboost", "instance": "40981", "target": ["mmce", "nf", "ias", "rammodel"], "rho": 0.89, "budget": 165},
    {"id": 10, "scenario": "iaml_xgboost", "instance": "1489", "target": ["mmce", "nf", "ias", "rammodel"], "rho": 0.92, "budget": 165},
    {"id": 11, "scenario": "lcbench", "instance": "167152", "target": ["val_accuracy", "val_cross_entropy"], "rho": 0.98, "budget": 126},
    {"id": 12, "scenario": "lcbench", "instance": "167185", "target": ["val_accuracy", "val_cross_entropy"], "rho": 0.91, "budget": 126},
    {"id": 13, "scenario": "lcbench", "instance": "189873", "target": ["val_accuracy", "val_cross_entropy"], "rho": 0.93, "budget": 126},
    {"id": 14, "scenario": "rbv2_ranger", "instance": "6", "target": ["acc", "memory"], "rho": 0.90, "budget": 134},
    {"id": 15, "scenario": "rbv2_ranger", "instance": "40979", "target": ["acc", "memory"], "rho": 0.73, "budget": 134},
    {"id": 16, "scenario": "rbv2_ranger", "instance": "375", "target": ["acc", "memory"], "rho": 0.85, "budget": 134},
    {"id": 17, "scenario": "rbv2_rpart", "instance": "41163", "target": ["acc", "memory"], "rho": 0.85, "budget": 110},
    {"id": 18, "scenario": "rbv2_rpart", "instance": "1476", "target": ["acc", "memory"], "rho": 0.80, "budget": 110},
    {"id": 19, "scenario": "rbv2_rpart", "instance": "40499", "target": ["acc", "memory"], "rho": 0.83, "budget": 110},
    {"id": 20, "scenario": "rbv2_super", "instance": "1457", "target": ["acc", "memory"], "rho": 0.66, "budget": 267},
    {"id": 21, "scenario": "rbv2_super", "instance": "6", "target": ["acc", "memory"], "rho": 0.68, "budget": 267},
    {"id": 22, "scenario": "rbv2_super", "instance": "1053", "target": ["acc", "memory"], "rho": 0.45, "budget": 267},
    {"id": 23, "scenario": "rbv2_xgboost", "instance": "28", "target": ["acc", "memory"], "rho": 0.80, "budget": 170},
    {"id": 24, "scenario": "rbv2_xgboost", "instance": "182", "target": ["acc", "memory"], "rho": 0.79, "budget": 170},
    {"id": 25, "scenario": "rbv2_xgboost", "instance": "12", "target": ["acc", "memory"], "rho": 0.76, "budget": 170},
]
target_path_moc = target_path / "MO"



for _target_path, collection, identifier in zip(
        [target_path_soc, target_path_moc],
        [yahpo_gym_so_collection, yahpo_gym_mo_collection],
        ["so", "mo"], strict=False
):
    for I in collection:
        fid_type = None
        bench = I["scenario"]
        instance = I["instance"]
        budget = I["budget"]
        metric = I["target"]
        if not isinstance(metric, list):
            metric = [metric]


        if fid_type == "epoch":
            lower = LOWER["epochs"][bench]
            upper = UPPER["epochs"][bench]
        elif fid_type is None:
            lower = None
            upper = None
        else:
            lower = LOWER[fid_type]
            upper = UPPER[fid_type]

        task_id = f"yahpo/{identifier}/{bench}/{instance}/{fid_type}"

        write_yahpo_taskconfig(
            objective_function_cfg=DictConfig({
                "_target_": problem_class,
                "bench": bench,
                "instance": instance,
                "budget_type": fid_type,
                "metric": metric,
                "seed": "${seed}",
            }),
            fid_type=fid_type,
            lower=lower,
            upper=upper,
            objective_function_approximation="surrogate",
            task_id=task_id,
            filename=_target_path / f"cfg_{bench}_{instance}.yaml",
            time_budget=budget,
            benchmark_id="YAHPO",
            metric=metric
        )


In [3]:
target_path_mf_soc = base_path / "YAHPO" / "multifidelity" / "SO"

for I in yahpo_gym_so_collection:
    for fid_type in FIDELITY_TYPES[I["scenario"]]:
        bench = I["scenario"]
        instance = I["instance"]
        budget = I["budget"]
        metric = I["target"]
        if not isinstance(metric, list):
            metric = [metric]

        if fid_type == "epoch":
            lower = LOWER["epochs"][bench]
            upper = UPPER["epochs"][bench]
        else:
            lower = LOWER[fid_type]
            upper = UPPER[fid_type]

        task_id = f"yahpo/multifidelity/so/{fid_type}/{bench}/{instance}/{fid_type}"
        problem_class = "carps.objective_functions.yahpo.YahpoObjectiveFunction"

        budget_type_path = target_path_mf_soc #/ fid_type

        fn = budget_type_path / f"cfg_{bench}_{instance}_{fid_type}_mf.yaml"

        write_yahpo_taskconfig(
            objective_function_cfg=DictConfig({
                "_target_": problem_class,
                "bench": bench,
                "instance": instance,
                "budget_type": fid_type,
                "metric": metric,
                "seed": "${seed}",
            }),
            fid_type=fid_type,
            lower=lower,
            upper=upper,
            objective_function_approximation="surrogate",
            task_id=task_id,
            filename=fn,
            time_budget=budget,
            benchmark_id="YAHPO",
            metric=metric
        )

# YAHPO-MFMO

In [4]:
# YAHPO Gym multi-fidelity Multi-objective collection

yahpo_gym_mfmo_collection = yahpo_gym_mo_collection
target_path_mfmo = target_path / "MOMF"

for I in yahpo_gym_mfmo_collection:
    for fid_type in FIDELITY_TYPES[I["scenario"]]:
        bench = I["scenario"]
        instance = I["instance"]
        budget = I["budget"]
        metric = I["target"]
        if not isinstance(metric, list):
            metric = [metric]

        if fid_type == "epoch":
            lower = LOWER["epochs"][bench]
            upper = UPPER["epochs"][bench]
        else:
            lower = LOWER[fid_type]
            upper = UPPER[fid_type]

        task_id = f"yahpo/MOMF/{fid_type}/{bench}/{instance}/{fid_type}"
        problem_class = "carps.objective_functions.yahpo.YahpoObjectiveFunction"

        budget_type_path = target_path_mfmo #/ fid_type

        fn = budget_type_path / f"cfg_{bench}_{instance}_{fid_type}_mf.yaml"
        write_yahpo_taskconfig(
            objective_function_cfg=DictConfig({
                "_target_": problem_class,
                "bench": bench,
                "instance": instance,
                "budget_type": fid_type,
                "metric": metric,
                "seed": "${seed}",
            }),
            fid_type=fid_type,
            lower=lower,
            upper=upper,
            objective_function_approximation="surrogate",
            task_id=task_id,
            filename=fn,
            time_budget=budget,
            benchmark_id="YAHPO",
            metric=metric
        )

# MFPBench

In [5]:
import itertools
import os
from dataclasses import asdict
from pathlib import Path
from carps.utils.generate_tasks import get_dict_input_space, get_dict_metadata, get_dict_opt_resources, get_dict_output_space

import numpy as np
from carps.utils.task import InputSpace, OptimizationResources, OutputSpace, Task, TaskMetadata, get_search_space_info, FidelitySpace
from hydra.utils import instantiate
from omegaconf import DictConfig, OmegaConf


# Formula from YAHPO paper
def get_n_trials(dimension: int) -> int:
    return int(np.ceil(20 + 40 * np.sqrt(dimension)))

# Fill with task info

# Those fields already are filled
specified_keys = ["n_trials", "n_objectives", "is_multifidelity", "fidelity_type", "min_budget", "max_budget"]

# Get all configs
problem_path = Path("MFPBench")
data_dir = "carps/benchmark_data/mfpbench"
config_fns = list(problem_path.glob("**/*.yaml"))

problem_class = "carps.objective_functions.mfpbench.MFPBenchObjectiveFunction"

obj_fun_kwargs_pd1 = {
    "benchmark_name": "pd1",
    "metric": ["valid_error_rate"],
    "benchmark": "imagenet_resnet_512",
    "budget_type": "epoch",
    "prior": None,
    "perturb_prior": None,
    "benchmark_kwargs": {
        "datadir": data_dir}
    }
pd1_benchmarks = [
    "translatewmt_xformer_64",
    "cifar100_wideresnet_2048",
    "imagenet_resnet_512",
    "lm1b_transformer_2048",
]

obj_fun_kwargs_mfh = {
    "benchmark_name": "mfh",
    "metric": ["value"],
    "benchmark": "mfh3_terrible",
    "budget_type": "z",
    "prior": None,
    "perturb_prior": None,
    "benchmark_kwargs": {
        "bias": None,
        "noise": None
    }
}
#TODO check whether seeds goes into benchmark

mfh_benchmarks = itertools.product(["mfh3", "mfh6"], ["good", "moderate", "bad", "terrible"])
mfh_benchmarks = [f"{a}_{b}" for a, b in mfh_benchmarks]

general_info = {
    "objective_function_approximation": "surrogate",
    "has_virtual_time": False,
    "deterministic": True,
    "has_virtual_time": True,
}
domain = {
    "mfpbench/mfh": "synthetic",
    "mfpbench/jahs": "JAHS",
    "mfpbench/pd1": "DL",
}
get_n_trials_pd1 = lambda x: 100

objectives_pd1 = [["valid_error_rate"], ["valid_error_rate", "train_cost"]]
objectives_mfh = [["value"]]

fidelity_info = {
    "translatewmt_xformer_64": {
        "fidelity_type": "epoch",
        "min_budget": 1,
        "max_budget": 19,
    },
    "cifar100_wideresnet_2048": {
        "fidelity_type": "epoch",
        "min_budget": 1,
        "max_budget": 199,
    },
    "imagenet_resnet_512": {
        "fidelity_type": "epoch",
        "min_budget": 1,
        "max_budget": 99,
    },
    "lm1b_transformer_2048": {
        "fidelity_type": "epoch",
        "min_budget": 1,
        "max_budget": 74,
    },
}
fidelity_info_mfh = {
    "fidelity_type": "z",
    "min_budget": 1,
    "max_budget": 100,
}
for bench in mfh_benchmarks:
    fidelity_info[bench] = fidelity_info_mfh

for benchmarks, objectives, obj_fun_kwargs in zip(
    [pd1_benchmarks, mfh_benchmarks], [objectives_pd1, objectives_mfh], [obj_fun_kwargs_pd1, obj_fun_kwargs_mfh],
    strict=True):
    for benchmark in benchmarks:
        for metric in objectives:

            kwargs = obj_fun_kwargs.copy()
            kwargs["benchmark"] = benchmark
            kwargs["metric"] = metric
            kwargs["_target_"] = problem_class

            if len(metric) == 1:
                fidelity_space = FidelitySpace(**{
                    "is_multifidelity":kwargs["budget_type"] is not None,
                    **fidelity_info[benchmark]
                })
            else:
                fidelity_space = FidelitySpace()

            if os.getcwd().endswith("carps"):
                os.chdir("..")
            if os.getcwd().endswith("carps/configs"):
                os.chdir("../..")
            if os.getcwd().endswith("carps/configs/task"):
                os.chdir("../../..")
            obj_fun = instantiate(kwargs)
            os.chdir("carps/configs/task")

            search_space_kwargs = get_search_space_info(configspace=obj_fun.configspace)

            if benchmark in pd1_benchmarks:
                n_trials = get_n_trials_pd1(len(obj_fun.configspace))
            else:
                n_trials = get_n_trials(len(obj_fun.configspace))

            optimization_resources = OptimizationResources(
                n_trials=n_trials,
                time_budget=None,
                n_workers=1
            )
            output_space = OutputSpace(
                n_objectives=len(metric),
                objectives=metric
            )
            input_space = InputSpace(
                configuration_space=obj_fun.configspace,
                fidelity_space=fidelity_space
            )

            identifier = "pd1" if benchmark in pd1_benchmarks else "mfh"
            no_identifier = "MO" if len(metric) > 1 else "SO"
            config_fn = Path("MFPBench") / no_identifier / identifier / f"{benchmark}.yaml"
            task_id = f"mfpbench/{no_identifier}/{identifier}/{benchmark}"

            task_metadata = TaskMetadata(
                has_constraints=False,
                domain=domain[f"mfpbench/{identifier}"],
                objective_function_approximation="surrogate",
                has_virtual_time=False,
                deterministic=True,
                **search_space_kwargs
            )

            cfg = DictConfig({
                "benchmark_id": "MFPBench",
                "task_id": "${task.name}",
                "task": {
                    "_target_": "carps.utils.task.Task",
                    "name": task_id,
                    "seed": "${seed}",
                    "objective_function": kwargs,
                    "input_space": get_dict_input_space(input_space),
                    "output_space": get_dict_output_space(output_space),
                    "optimization_resources": get_dict_opt_resources(optimization_resources),
                    "metadata": get_dict_metadata(task_metadata),
                },
            })

            yaml_str = OmegaConf.to_yaml(cfg=cfg)
            with open(config_fn, "w") as file:
                file.write(yaml_str)


# Pymoo

In [None]:
import shutil
from dataclasses import asdict
from pathlib import Path
from carps.utils.generate_tasks import get_dict_input_space, get_dict_metadata, get_dict_opt_resources, get_dict_output_space

import numpy as np
from carps.utils.task import InputSpace, OptimizationResources, OutputSpace, TaskMetadata, get_search_space_info, FidelitySpace
from hydra.utils import instantiate
from omegaconf import DictConfig, OmegaConf


# Formula from YAHPO paper
def get_n_trials(dimension: int) -> int:
    return int(np.ceil(20 + 40 * np.sqrt(dimension)))

target_path = Path() # / 'Pymoo' the benchmark name will be added later on

benchmark_id = "Pymoo"
problem_class = "carps.objective_functions.pymoo.PymooObjectiveFunction"
seed = 1

shutil.rmtree("Pymoo", ignore_errors=True)

pymoo_problems = {
    "MO/unconstraint": ["kursawe", "zdt1", "zdt2", "zdt3", "zdt4", "zdt6", "omnitest", "sympart", "sympart_rotated"],
    "ManyO/unconstraint": ["dtlz1", "dtlz2", "dtlz3", "dtlz4", "dtlz5", "dtlz6", "dtlz7", "dtlz1^-1",
              "convex_dtlz2", "convex_dtlz4", "sdtlz1",
              "wfg1", "wfg2", "wfg3", "wfg4", "wfg5", "wfg6", "wfg7", "wfg8", "wfg9"],
    "SO/unconstraint": ["sphere", "ackley", "rastrigin", "rosenbrock", "griewank",
                        "himmelblau", "zakharov",
                        "schwefel"]
}
fid_type = None

wfg_problem_kwargs = {"n_var": 10, "n_obj": 5}

for problem_type, problem_names in pymoo_problems.items():
    for problem_name in problem_names:
        print(problem_type, problem_name)
        problem_kwargs = {}
        if problem_name.startswith("wfg"):
            problem_kwargs = wfg_problem_kwargs

        objective_function_cfg = DictConfig(
            {
            "_target_": problem_class,
            "problem_name": problem_name,
            "problem_kwargs": problem_kwargs,
            "seed": "${seed}",
            },
        )
        objective_function_cfg_tmp = objective_function_cfg.copy()
        objective_function_cfg_tmp.seed = 234
        objective_function = instantiate(objective_function_cfg_tmp)
        search_space_kwargs = get_search_space_info(configspace=objective_function.configspace)
        task_id = f"{benchmark_id}/{problem_type}/{problem_name}"
        task_id = task_id.replace("dtlz1^-1", "dtlz1_inv")
        # from rich import inspect
        # inspect(problem._problem, all=True)
        n_trials = get_n_trials(search_space_kwargs["dimensions"])
        n_objectives = objective_function._problem.n_obj
        objectives = [f"objective_{i}" for i in range(n_objectives)]

        input_space = InputSpace(
            configuration_space=objective_function.configspace,
            fidelity_space=FidelitySpace()
        )
        output_space = OutputSpace(
            n_objectives=n_objectives,
            objectives=objectives,
        )
        optimization_resources = OptimizationResources(
            n_trials=n_trials,
            time_budget=None,
            n_workers=1,
        )


        if problem_name.startswith("wfg"):
            task_id = f"{benchmark_id}/{problem_type}/{problem_name}_{problem_kwargs['n_var']}_{problem_kwargs['n_obj']}"

        task_metadata = TaskMetadata(
            has_constraints=False,
            domain="synthetic",
            objective_function_approximation="real",
            has_virtual_time=True,
            deterministic=True,
            **search_space_kwargs
        )

        cfg = DictConfig({
            "benchmark_id": benchmark_id,
            "task_id": task_id,
            "task": {
                "_target_": "carps.utils.task.Task",
                "seed": "${seed}",
                "objective_function": objective_function_cfg,
                "input_space": get_dict_input_space(input_space),
                "output_space": get_dict_output_space(output_space),
                "optimization_resources": get_dict_opt_resources(optimization_resources),
                "metadata": get_dict_metadata(task_metadata),
            },
        })
        fn = target_path / (task_id + ".yaml")
        fn.parent.mkdir(exist_ok=True, parents=True)
        yaml_str = OmegaConf.to_yaml(cfg=cfg)
        yaml_str = "# @package _global_\n" + yaml_str
        fn.write_text(yaml_str)
        print(cfg)
        print(fn)

# Index Configs
Final step.

In [None]:
from carps.utils.index_configs import index_configs

index_configs()