# BBOB

In [1]:
# BBOB
from pathlib import Path
from carps.utils.task import Task, get_search_space_info
from dataclasses import asdict

seeds = range(1, 11)

# Config should match Problem init

base_path = Path("carps/configs/problem")

# Problem ids
benchmark_id_bbob = "BBOB"

from omegaconf import DictConfig, OmegaConf

import shutil

target_path = base_path / benchmark_id_bbob
if target_path.is_dir():
    shutil.rmtree(str(target_path))
target_path.mkdir(parents=True, exist_ok=True)

problem_class = "carps.benchmarks.bbob.BBOBProblem"

fids = list(range(1, 25))
instances = [0]
dimensions = [2, 5, 10]


def get_n_trials(dim: int) -> int:
    return dim * 25

def get_n_initial_design(dim: int) -> int:
    return dim * 3

for fid in fids:
    for inst in instances:
        for dim in dimensions:
            problem_id = f"noiseless/{dim}/{fid}/{inst}"
            task = Task(
                dimensions=dim,
                n_trials=get_n_trials(dim),
                time_budget=None,
                n_workers=1,
                n_objectives=1,
                objectives=["quality"],
                is_multifidelity=False,
                fidelity_type=None,
                min_budget=None,
                max_budget=None,
                has_constraints=False,
                domain="synthetic",
                objective_function_approximation="real",
                has_virtual_time=False,
                search_space_n_categoricals=0,
                search_space_n_ordinals=0,
                search_space_n_integers=0,
                search_space_n_floats=dim,
                search_space_has_conditionals=False,
                search_space_has_forbiddens=False,
                search_space_has_priors=False
            )
            cfg = DictConfig({
                "benchmark_id": benchmark_id_bbob,
                "problem_id": problem_id,
                "problem": {
                    # "problem_class": {
                        "_target_": problem_class,
                        "dimension": dim,
                        "fid": fid,
                        "instance": inst,
                        "seed": "${seed}",  # get the seed from global cfg
                    # },
                },
                "task": asdict(task)

            })
            fn = target_path / f"cfg_{dim}_{fid}_{dim}_{inst}.yaml"
            yaml_str = OmegaConf.to_yaml(cfg=cfg)
            yaml_str = "# @package _global_\n" + yaml_str
            fn.write_text(yaml_str)
            print(cfg)



{'benchmark_id': 'BBOB', 'problem_id': 'noiseless/2/1/0', 'problem': {'_target_': 'carps.benchmarks.bbob.BBOBProblem', 'dimension': 2, 'fid': 1, 'instance': 0, 'seed': '${seed}'}, 'task': {'dimensions': 2, 'n_trials': 50, 'time_budget': None, 'n_workers': 1, 'n_objectives': 1, 'objectives': ['quality'], 'is_multifidelity': False, 'fidelity_type': None, 'min_budget': None, 'max_budget': None, 'has_constraints': False, 'domain': 'synthetic', 'objective_function_approximation': 'real', 'has_virtual_time': False, 'search_space_n_categoricals': 0, 'search_space_n_ordinals': 0, 'search_space_n_integers': 0, 'search_space_n_floats': 2, 'search_space_has_conditionals': False, 'search_space_has_forbiddens': False, 'search_space_has_priors': False}}
{'benchmark_id': 'BBOB', 'problem_id': 'noiseless/5/1/0', 'problem': {'_target_': 'carps.benchmarks.bbob.BBOBProblem', 'dimension': 5, 'fid': 1, 'instance': 0, 'seed': '${seed}'}, 'task': {'dimensions': 5, 'n_trials': 125, 'time_budget': None, 'n_wor

# HPOB

In [4]:
# HPOB
from operator import attrgetter
import re
from pathlib import Path
from omegaconf import DictConfig, OmegaConf
import shutil
import pandas as pd
import numpy as np
from ConfigSpace.hyperparameters import CategoricalHyperparameter
from carps.utils.task import Task, get_search_space_info
from dataclasses import asdict

base_path = Path("carps/configs/problem")

# Problem ids
benchmark_id = "HPOB"

surrogates_dir = Path('carps/benchmark_data/HPO-B/saved-surrogates')

all_res = map(attrgetter('name'), surrogates_dir.glob('surrogate-*'))

target_path = base_path / benchmark_id
if target_path.is_dir():
    shutil.rmtree(str(target_path))
target_path.mkdir(parents=True, exist_ok=True)

problem_class = "carps.benchmarks.hpo_b.HPOBProblem"
n_trials = 100

combos_mid_did = [re.findall('\d+', res) for res in all_res] 
combos_mid_did = pd.DataFrame([{"model_id": mid, "dataset_id": did} for mid, did in combos_mid_did])
print("models", combos_mid_did["model_id"].unique())
print("dataset", combos_mid_did["dataset_id"].unique())

rng = np.random.default_rng(seed=498415)
n_models = 10
n_datasets = 5
# check how many datasets per model id
n_d_per_model = combos_mid_did.groupby(["model_id"]).apply(len)
# filter only those model ids with enough datasets
n_d_per_model = n_d_per_model[n_d_per_model >= n_datasets]
# select 10 of those models
subselected_models = rng.choice(n_d_per_model.index, size=n_models)
combos_mid_did_reduced = combos_mid_did[combos_mid_did["model_id"].isin(subselected_models)]
# select 5 datasets for each of the 10 models
subselected_combos = combos_mid_did_reduced.groupby("model_id").sample(n=n_datasets, replace=False, random_state=rng)
assert len(subselected_combos) == (n_models * n_datasets)


    

def to_yaml(combos_mid_did: pd.DataFrame, identifier: str = "all"):
    from carps.benchmarks.hpo_b import HPOBProblem
    for _, spec in combos_mid_did.iterrows():
        model_id, dataset_id = spec
        problem_id = f'hpob/{identifier}/{model_id}/{dataset_id}'
        problem = HPOBProblem(dataset_id=dataset_id, model_id=model_id, surrogates_dir="carps/benchmark_data/HPO-B/saved-surrogates", )
        search_space_kwargs = get_search_space_info(configspace=problem.configspace)
        task = Task(
                n_trials=n_trials,
                time_budget=None,
                n_workers=1,
                n_objectives=1,
                objectives=["quality"],
                is_multifidelity=False,
                fidelity_type=None,
                min_budget=None,
                max_budget=None,
                has_constraints=False,
                domain="ML",
                objective_function_approximation="surrogate",
                has_virtual_time=False,
                **search_space_kwargs
        )
        cfg = DictConfig({
            "benchmark_id": benchmark_id,
            "problem_id": problem_id,
            "problem": {
                # "problem_class": {
                "_target_": problem_class,
                "model_id": model_id,
                "dataset_id": dataset_id,
                "surrogates_dir": str(surrogates_dir)
                # },
            },
            "task": asdict(task)

        })
        fn = target_path / f"{identifier}/cfg_{model_id}_{dataset_id}.yaml"
        fn.parent.mkdir(parents=True, exist_ok=True)
        yaml_str = OmegaConf.to_yaml(cfg=cfg)
        yaml_str = "# @package _global_\n" + yaml_str
        fn.write_text(yaml_str)

to_yaml(combos_mid_did=combos_mid_did, identifier="all")
to_yaml(combos_mid_did=subselected_combos, identifier="subset")

models ['5890' '6766' '4796' '5891' '7286' '5965' '6458' '6767' '7609' '6136'
 '6794' '3490' '5889' '7190' '5636' '5926' '5923' '6140' '243' '7607'
 '5859' '2566' '5527' '7188' '5860' '7189' '5971' '5920' '5435' '506'
 '685' '5968' '5970' '5458' '4006' '680' '2799' '2793' '5918' '5886'
 '5988' '6447' '5237' '6156' '5972' '5921' '6154' '3894' '534' '5978'
 '248' '7680' '678' '6073' '5704' '7604' '6007' '5623' '5922' '833' '5624'
 '151' '3737' '5906' '6183' '679' '694' '5604' '6212' '673' '6762' '5969'
 '6105' '5526' '681' '682' '688' '6139' '674' '3442' '6213' '5489' '6345'
 '214' '6376' '695' '5499' '2039' '6075' '3960' '690' '829' '6856' '6765'
 '5253' '5963' '3994' '6024' '6493' '124' '5295' '6131' '7200' '6347'
 '6461' '6182' '6322' '6365' '6308' '245' '3434' '6137' '633' '5919'
 '5964' '5960' '684' '6134' '5502' '5813' '423' '3489' '5788' '6215'
 '6741' '6507' '697' '6190' '158' '5218' '2553' '5503' '683' '6285' '2010'
 '247' '3502' '5626' '2277' '935' '6124' '2614' '2073' '7290' '

  n_d_per_model = combos_mid_did.groupby(["model_id"]).apply(len)


# HPOBench

In [2]:
from pathlib import Path
from omegaconf import DictConfig, OmegaConf
import shutil
try:
    from carps.benchmarks.hpo_bench import HPOBenchProblem
except ModuleNotFoundError:
    print("HPOBench is not installed. This means the task information can only be partly retrieved.")
    HPOBenchProblem = None
from ConfigSpace.hyperparameters import CategoricalHyperparameter
from carps.utils.task import Task, get_search_space_info
from dataclasses import asdict
from hydra.utils import instantiate, get_class


base_path = Path("carps/configs/problem")

# Set up directories
benchmark_id_hpobench = "HPOBench"
target_path = base_path / benchmark_id_hpobench
if target_path.is_dir():
    shutil.rmtree(str(target_path))
target_path_blackbox = target_path / 'blackbox'
target_path_multifidelity = target_path / 'multifidelity'
target_path.mkdir(parents=True, exist_ok=True)
target_path_blackbox.mkdir(parents=True, exist_ok=True)
target_path_multifidelity.mkdir(parents=True, exist_ok=True)

problem_class = "carps.benchmarks.hpo_bench.HPOBenchProblem"


# ML
MODELS = ["lr", "nn", "rf", "svm", "xgboost"]

TASK_IDS = [10101, 53, 146818, 146821, 9952, 146822, 31, 3917]
TASK_IDS_LARGE = [168912, 3, 167119, 12, 146212, 168911, 9981, 167120, 14965, 146606, 7592, 9977]

# Choices according to https://arxiv.org/pdf/2109.06716.pdf, page 22
UPPER = {'subsample': 1.0,
         'iter':
             {
                 'lr': 1000,
                 'nn': 243,
         },
         'n_estimators': {
             'xgboost': 2000,
             'rf': 512,
         }}
LOWER = {'subsample': 0.1,
         'iter':
             {
                 'lr': 10,
                 'nn': 3,
         },
         'n_estimators': {
             'xgboost': 50,
             'rf': 16,
         }}

FIDELITY_TYPES = {'lr': ['iter', 'subsample'],
                  'nn': ['iter', 'subsample'],
                  'rf': ['n_estimators', 'subsample'],
                  'svm': ['subsample'],
                  'xgboost': ['n_estimators', 'subsample']}

n_trials = 100
time_budget = 720 # minutes

for model in MODELS:
    # task_ids_large should be used for all models except the neural network
    tids = TASK_IDS if model=="nn" else TASK_IDS + TASK_IDS_LARGE
    for tid in tids:
        for fid_type in FIDELITY_TYPES[model] + [None]:

            if fid_type == 'n_estimators':
                lower = LOWER['n_estimators'][model]
                upper = UPPER['n_estimators'][model]
            elif fid_type == 'iter':
                lower = LOWER['iter'][model]
                upper = UPPER['iter'][model]
            elif fid_type == 'subsample':
                lower = LOWER[fid_type]
                upper = UPPER[fid_type]
            else:
                lower = None
                upper = None

            benchtype = "tab" if fid_type is None else "real"
            problemtype = "bb" if fid_type is None else "mf"
            problem_id = f"hpobench/{problemtype}/{benchtype}/ml/{model}/{tid}"
            if fid_type is not None:
                problem_id += f"/{fid_type}"

            if HPOBenchProblem is not None:
                problem = HPOBenchProblem(seed=1, model=model, task_id=tid, problem=None, budget_type=fid_type)
                search_space_kwargs = get_search_space_info(configspace=problem.configspace)
                
            task = Task(
                n_trials=n_trials if fid_type is None else None,
                time_budget=None if fid_type is None else time_budget,
                n_workers=1,
                n_objectives=1,
                objectives=["quality"],
                is_multifidelity=False if fid_type is None else True,
                fidelity_type=fid_type,
                min_budget=lower,
                max_budget=upper,
                has_constraints=False,
                domain="ML",
                objective_function_approximation="tabular" if benchtype == "tab" else benchtype,
                has_virtual_time=False,
                **search_space_kwargs
            )

            cfg = DictConfig({
                "benchmark_id": benchmark_id_hpobench,
                "problem_id": problem_id,
                "problem": {
                        "_target_": problem_class,
                        "model": model,
                        "task_id": tid,
                        "budget_type": fid_type,
                        "seed": "${seed}",  # get the seed from global cfg
                },
                "task": asdict(task),
            })
            if fid_type is None:
                fn = target_path_blackbox / "tab" / f"cfg_ml_{model}_{tid}.yaml"
            else:
                fn = target_path_multifidelity / f"cfg_ml_{model}_{tid}_{fid_type}.yaml"
            fn.parent.mkdir(exist_ok=True, parents=True)
            yaml_str = OmegaConf.to_yaml(cfg=cfg)
            yaml_str = "# @package _global_\n" + yaml_str
            fn.write_text(yaml_str)

# Surrogates
target_path_surrogate = target_path_blackbox / "surr"
MODELS = ["ParamNet", "SVM"]
DATASETS = ["Adult", "Higgs", "Letter", "Mnist", "Optdigits", "Poker"]
generated_svm_once = False
for model in MODELS:
    # task_ids_large should be used for all models except the neural network
    for dataset in DATASETS:
            benchtype = "surr"

            if model == "ParamNet": # 8 HPs, 1 int, 7 float
                n_trials = 240
                target = f"hpobench.container.benchmarks.surrogates.{model.lower()}_benchmark.{model}{dataset}OnStepsBenchmark"
                problem_id = f"hpobench/bb/{benchtype}/{model}/{dataset}"
                dimension = 8

                search_space_kwargs = {
                    "dimensions": dimension,
                    "search_space_n_categoricals": 0,
                    "search_space_n_ordinals": 0,
                    "search_space_n_integers": 1,
                    "search_space_n_floats": 7,
                    "search_space_has_conditionals": False,
                    "search_space_has_forbiddens": False,
                    "search_space_has_priors": False,
                }
            elif model == "SVM":  # 2 float HPs
                if generated_svm_once:
                    continue
                n_trials = 60
                dataset = "default"
                target = "hpobench.container.benchmarks.surrogates.svm_benchmark.SurrogateSVMBenchmark"
                problem_id = f"hpobench/bb/{benchtype}/{model}/{dataset}"
                generated_svm_once = True
                dimension = 2

                search_space_kwargs = {
                    "dimensions": dimension,
                    "search_space_n_categoricals": 0,
                    "search_space_n_ordinals": 0,
                    "search_space_n_integers": 0,
                    "search_space_n_floats": 2,
                    "search_space_has_conditionals": False,
                    "search_space_has_forbiddens": False,
                    "search_space_has_priors": False,
                }

            task = Task(
                n_trials=n_trials,
                time_budget=None,
                n_workers=1,
                n_objectives=1,
                objectives=["quality"],
                is_multifidelity=False,
                fidelity_type=None,
                min_budget=None,
                max_budget=None,
                has_constraints=False,
                domain="ML",
                objective_function_approximation="surrogate",
                has_virtual_time=True,
                **search_space_kwargs
            )

            cfg = DictConfig({
                "benchmark_id": benchmark_id_hpobench,
                "problem_id": problem_id,
                "problem": {
                        "_target_": problem_class,
                        "problem": {
                            "_target_": target,
                            "rng": "${seed}",
                        },
                        "seed": "${seed}",  # get the seed from global cfg
                },
                "task": asdict(task)

            })
            fn = target_path_surrogate / f"cfg_surr_{model}_{dataset}.yaml"
            fn.parent.mkdir(exist_ok=True, parents=True)
            yaml_str = OmegaConf.to_yaml(cfg=cfg)
            yaml_str = "# @package _global_\n" + yaml_str
            fn.write_text(yaml_str)



# YAHPO

In [2]:
import shutil
from pathlib import Path
from omegaconf import DictConfig, OmegaConf

base_path = Path("carps/configs/problem")

COMBIS = {'lcbench': ['3945', '7593', '34539', '126025', '126026', '126029', '146212', '167104',
                      '167149', '167152', '167161', '167168', '167181', '167184', '167185',
                      '167190', '167200', '167201', '168329', '168330', '168331', '168335',
                      '168868', '168908', '168910', '189354', '189862', '189865', '189866',
                      '189873', '189905', '189906', '189908', '189909'],
          'fcnet': ['fcnet_naval_propulsion', 'fcnet_protein_structure',
                    'fcnet_slice_localization', 'fcnet_parkinsons_telemonitoring'],
          'nb301': ['CIFAR10'],
          'rbv2_svm': ['40981', '4134', '1220', '40978', '40966', '40536', '41156', '458',
                       '41157', '40975', '40994', '1468', '6332', '40670', '151', '1475',
                       '1476', '1478', '1479', '41212', '1480', '1053', '1067', '1056', '12',
                       '1487', '1068', '32', '470', '312', '38', '40982', '50', '41216', '307',
                       '40498', '181', '1464', '41164', '16', '1461', '41162', '6', '14',
                       '1494', '54', '375', '1590', '23', '41163', '1111', '41027', '40668',
                       '41138', '4135', '4538', '40496', '4534', '40900', '1457', '11', '1462',
                       '41142', '40701', '29', '37', '23381', '188', '41143', '1063', '3', '18',
                       '40979', '22', '1515', '334', '24', '1493', '28', '1050', '1049',
                       '40984', '40685', '42', '44', '46', '1040', '41146', '377', '40499',
                       '1497', '60', '40983', '4154', '469', '31', '41278', '1489', '1501',
                       '15', '300', '1485', '1486', '1510', '182', '41169'],
          'rbv2_ranger': ['4135', '40981', '4134', '1220', '4154', '4538', '40978', '375',
                          '40496', '40966', '4534', '40900', '40536', '41156', '1590', '1457',
                          '458', '469', '41157', '11', '1461', '1462', '1464', '15', '40975',
                          '41142', '40701', '40994', '23', '1468', '40668', '29', '31', '6332',
                          '37', '40670', '23381', '151', '188', '41164', '1475', '1476', '1478',
                          '1479', '41212', '1480', '41143', '1053', '41027', '1067', '1063',
                          '3', '6', '1485', '1056', '12', '14', '16', '18', '40979', '22',
                          '1515', '334', '24', '1486', '41278', '28', '1487', '1068', '1050',
                          '1049', '32', '1489', '470', '1494', '182', '312', '40984', '1501',
                          '40685', '38', '42', '44', '46', '40982', '1040', '41146', '377',
                          '40499', '50', '54', '41216', '307', '1497', '60', '1510', '40983',
                          '40498', '181', '41138', '41163', '1111', '41159', '300', '41162',
                          '23517', '41165', '4541', '41161', '41166', '40927', '41150', '23512',
                          '41168', '1493', '40996', '554', '40923', '41169'],
          'rbv2_rpart': ['41138', '4135', '40981', '4134', '40927', '1220', '4154', '40923',
                         '41163', '40996', '4538', '40978', '375', '1111', '40496', '40966',
                         '41150', '4534', '40900', '40536', '41156', '1590', '1457', '458',
                         '469', '41157', '11', '1461', '1462', '1464', '15', '40975', '41142',
                         '40701', '40994', '23', '1468', '40668', '29', '31', '6332', '37',
                         '4541', '40670', '23381', '151', '188', '41164', '1475', '1476',
                         '41159', '1478', '41169', '23512', '1479', '41212', '1480', '300',
                         '41168', '41143', '1053', '41027', '1067', '1063', '41162', '3', '6',
                         '1485', '1056', '12', '14', '16', '18', '40979', '22', '1515', '554',
                         '334', '24', '1486', '23517', '1493', '28', '1487', '1068', '1050',
                         '1049', '32', '1489', '470', '1494', '41161', '41165', '182', '312',
                         '40984', '1501', '40685', '38', '42', '44', '46', '40982', '1040',
                         '41146', '377', '40499', '50', '54', '41166', '307', '1497', '60',
                         '1510', '40983', '40498', '181'],
          'rbv2_glmnet': ['41138', '4135', '40981', '4134', '1220', '4154', '41163', '4538',
                          '40978', '375', '1111', '40496', '40966', '41150', '4534', '40900',
                          '40536', '41156', '1590', '1457', '458', '469', '41157', '11', '1461',
                          '1462', '1464', '15', '40975', '41142', '40701', '40994', '23',
                          '1468', '40668', '29', '31', '6332', '37', '4541', '40670', '23381',
                          '151', '188', '41164', '1475', '1476', '41159', '1478', '41169',
                          '23512', '1479', '41212', '1480', '300', '41168', '41143', '1053',
                          '41027', '1067', '1063', '41162', '3', '6', '1485', '1056', '12',
                          '14', '16', '18', '40979', '22', '1515', '334', '24', '1486', '23517',
                          '41278', '1493', '28', '1487', '1068', '1050', '1049', '32', '1489',
                          '470', '1494', '41161', '182', '312', '40984', '1501', '40685', '38',
                          '42', '44', '46', '40982', '1040', '41146', '377', '40499', '50',
                          '54', '41216', '41166', '307', '1497', '60', '1510', '40983', '40498',
                          '181', '554'],
          'rbv2_xgboost': ['16', '40923', '41143', '470', '1487', '40499', '40966', '41164',
                           '1497', '40975', '1461', '41278', '11', '54', '300', '40984', '31',
                           '1067', '1590', '40983', '41163', '41165', '182', '1220', '41159',
                           '41169', '42', '188', '1457', '1480', '6332', '181', '1479', '40670',
                           '40536', '41138', '41166', '6', '14', '29', '458', '1056', '1462',
                           '1494', '40701', '12', '1493', '44', '307', '334', '40982', '41142',
                           '38', '1050', '469', '23381', '41157', '15', '4541', '23', '4134',
                           '40927', '40981', '41156', '3', '1049', '40900', '1063', '23512',
                           '40979', '1040', '1068', '41161', '22', '1489', '41027', '24',
                           '4135', '23517', '1053', '1468', '312', '377', '1515', '18', '1476',
                           '1510', '41162', '28', '375', '1464', '40685', '40996', '41146',
                           '41216', '40668', '41212', '32', '60', '4538', '40496', '41150',
                           '37', '46', '554', '1475', '1485', '1501', '1111', '4534', '41168',
                           '151', '4154', '40978', '40994', '50', '1478', '1486', '40498'],
          'rbv2_aknn': ['41138', '40981', '4134', '40927', '1220', '4154', '41163', '40996',
                        '4538', '40978', '375', '1111', '40496', '40966', '41150', '4534',
                        '40900', '40536', '41156', '1590', '1457', '458', '469', '41157', '11',
                        '1461', '1462', '1464', '15', '40975', '41142', '40701', '40994', '23',
                        '1468', '40668', '29', '31', '6332', '37', '4541', '40670', '23381',
                        '151', '188', '41164', '1475', '1476', '41159', '1478', '41169',
                        '23512', '1479', '41212', '1480', '300', '41168', '41143', '1053',
                        '41027', '1067', '1063', '41162', '3', '6', '1485', '1056', '12', '14',
                        '16', '18', '40979', '22', '1515', '554', '334', '24', '1486', '23517',
                        '41278', '1493', '28', '1487', '1068', '1050', '1049', '32', '1489',
                        '470', '1494', '41161', '41165', '182', '312', '40984', '1501', '40685',
                        '38', '42', '44', '46', '40982', '1040', '41146', '377', '40499', '50',
                        '54', '41216', '41166', '307', '1497', '60', '1510', '40983', '40498',
                        '181', '40923'],
          'rbv2_super': ['41138', '40981', '4134', '1220', '4154', '41163', '4538', '40978',
                         '375', '1111', '40496', '40966', '4534', '40900', '40536', '41156',
                         '1590', '1457', '458', '469', '41157', '11', '1461', '1462', '1464',
                         '15', '40975', '41142', '40701', '40994', '23', '1468', '40668', '29',
                         '31', '6332', '37', '40670', '23381', '151', '188', '41164', '1475',
                         '1476', '1478', '41169', '1479', '41212', '1480', '300', '41143',
                         '1053', '41027', '1067', '1063', '41162', '3', '6', '1485', '1056',
                         '12', '14', '16', '18', '40979', '22', '1515', '334', '24', '1486',
                         '1493', '28', '1487', '1068', '1050', '1049', '32', '1489', '470',
                         '1494', '182', '312', '40984', '1501', '40685', '38', '42', '44', '46',
                         '40982', '1040', '41146', '377', '40499', '50', '54', '307', '1497',
                         '60', '1510', '40983', '40498', '181'],
          'iaml_ranger': ['40981', '41146', '1489', '1067'],
          'iaml_rpart': ['40981', '41146', '1489', '1067'],
          'iaml_glmnet': ['40981', '41146', '1489', '1067'],
          'iaml_xgboost': ['40981', '41146', '1489', '1067'],
          'iaml_super': ['40981', '41146', '1489', '1067']}

UPPER = {'trainsize': 1.0,
         'repl': 10,
         'epochs': {
             'lcbench': 52,
             'fcnet': 100,
             'nb301': 98,
         }}
LOWER = {'trainsize': 0.03,
         'repl': 1,
         'epochs': {
             'lcbench': 1,
             'fcnet': 1,
             'nb301': 1,
         }}

FIDELITY_TYPES = {'lcbench': ['epoch'],
                  'fcnet': ['epoch'],
                  'nb301': ['epoch'],
                  'rbv2_svm': ['trainsize', 'repl'],
                  'rbv2_ranger': ['trainsize', 'repl'],
                  'rbv2_rpart': ['trainsize', 'repl'],
                  'rbv2_glmnet': ['trainsize', 'repl'],
                  'rbv2_xgboost': ['trainsize', 'repl'],
                  'rbv2_aknn': ['trainsize', 'repl'],
                  'rbv2_super': ['trainsize', 'repl'],
                  'iaml_ranger': ['trainsize'],
                  'iaml_rpart': ['trainsize'],
                  'iaml_glmnet': ['trainsize'],
                  'iaml_xgboost': ['trainsize'],
                  'iaml_super': ['trainsize']}

METRICS = {'lcbench': ['time', 'val_accuracy', 'val_cross_entropy', 'val_balanced_accuracy',
                       'test_cross_entropy', 'test_balanced_accuracy'],
           'fcnet': ['valid_loss', 'valid_mse', 'runtime', 'n_params'],
           'nb301': ['val_accuracy', 'runtime'],
           'rbv2_svm': ['acc', 'bac', 'auc', 'brier', 'f1', 'logloss', 'timetrain', 'timepredict',
                        'memory'],
           'rbv2_ranger': ['acc', 'bac', 'auc', 'brier', 'f1', 'logloss', 'timetrain',
                           'timepredict', 'memory'],
           'rbv2_rpart': ['acc', 'bac', 'auc', 'brier', 'f1', 'logloss', 'timetrain', 'timepredict',
                          'memory'],
           'rbv2_glmnet': ['acc', 'bac', 'auc', 'brier', 'f1', 'logloss', 'timetrain',
                           'timepredict', 'memory'],
           'rbv2_xgboost': ['acc', 'bac', 'auc', 'brier', 'f1', 'logloss', 'timetrain',
                            'timepredict', 'memory'],
           'rbv2_aknn': ['acc', 'bac', 'auc', 'brier', 'f1', 'logloss', 'timetrain', 'timepredict',
                         'memory'],
           'rbv2_super': ['acc', 'bac', 'auc', 'brier', 'f1', 'logloss', 'timetrain', 'timepredict',
                          'memory'],
           'iaml_ranger': ['mmce', 'f1', 'auc', 'logloss', 'ramtrain', 'rammodel', 'rampredict',
                           'timetrain', 'timepredict', 'mec', 'ias', 'nf'],
           'iaml_rpart': ['mmce', 'f1', 'auc', 'logloss', 'ramtrain', 'rammodel', 'rampredict',
                          'timetrain', 'timepredict', 'mec', 'ias', 'nf'],
           'iaml_glmnet': ['mmce', 'f1', 'auc', 'logloss', 'ramtrain', 'rammodel', 'rampredict',
                           'timetrain', 'timepredict', 'mec', 'ias', 'nf'],
           'iaml_xgboost': ['mmce', 'f1', 'auc', 'logloss', 'ramtrain', 'rammodel', 'rampredict',
                            'timetrain', 'timepredict', 'mec', 'ias', 'nf'],
           'iaml_super': ['mmce', 'f1', 'auc', 'logloss', 'ramtrain', 'rammodel', 'rampredict',
                          'timetrain', 'timepredict', 'mec', 'ias', 'nf']}

# select your prefered target variable
iaml = 'f1'
rbv2 = 'acc'
TARGETMETRIC = {'lcbench': 'val_accuracy',
                'fcnet': 'valid_mse',
                'nb301': 'val_accuracy',
                'rbv2_svm': rbv2,
                'rbv2_ranger': rbv2,
                'rbv2_rpart': rbv2,
                'rbv2_glmnet': rbv2,
                'rbv2_xgboost': rbv2,
                'rbv2_aknn': rbv2,
                'rbv2_super': rbv2,
                'iaml_ranger': iaml,
                'iaml_rpart': iaml,
                'iaml_glmnet': iaml,
                'iaml_xgboost': iaml,
                'iaml_super': iaml}

LOWER_IS_BETTER = {
    'lcbench': False,
    'fcnet': True,
    'nb301': False,
    'rbv2_svm': False,
    'rbv2_ranger': False,
    'rbv2_rpart': False,
    'rbv2_glmnet': False,
    'rbv2_xgboost': False,
    'rbv2_aknn': False,
    'rbv2_super': False,
    'iaml_ranger': False,
    'iaml_rpart': False,
    'iaml_glmnet': False,
    'iaml_xgboost': False,
    'iaml_super': False,

}

In [13]:
try:
    from carps.benchmarks.yahpo import YahpoProblem
except ModuleNotFoundError:
    print("YAHPO-Gym is not installed. This means the task information can only be partly retrieved.")
    YahpoProblem = None
from carps.utils.task import Task, get_search_space_info
from dataclasses import asdict


target_path = base_path / 'YAHPO'

if target_path.is_dir():
    shutil.rmtree(str(target_path))
target_path_blackbox = target_path / 'blackbox'
target_path_multifidelity = target_path / 'multifidelity'
target_path.mkdir(parents=True, exist_ok=True)
target_path_blackbox.mkdir(parents=True, exist_ok=True)
target_path_multifidelity.mkdir(parents=True, exist_ok=True)

problem_class = "carps.benchmarks.yahpo.YahpoProblem"

n_trials = 100
time_budget = 720 # minutes

for bench, instance_list in COMBIS.items():
    for instance in instance_list:
        for fid_type in FIDELITY_TYPES[bench] + [None]:

            if fid_type == 'epoch':
                lower = LOWER['epochs'][bench]
                upper = UPPER['epochs'][bench]
            elif fid_type is None:
                lower = None
                upper = None
            else:
                lower = LOWER[fid_type]
                upper = UPPER[fid_type]

            problem_id = f"yahpo/{bench}/{instance}/{fid_type}"

            search_space_kwargs = dict()
            if YahpoProblem is not None:
                problem = YahpoProblem(bench=bench, instance=instance, budget_type=fid_type, metric=TARGETMETRIC[bench])
                search_space_kwargs = get_search_space_info(configspace=problem.configspace)

            task = Task(
                n_trials=n_trials if fid_type is None else None,
                time_budget=None if fid_type is None else time_budget,
                n_workers=1,
                n_objectives=1,
                objectives=["quality"],
                is_multifidelity=False if fid_type is None else True,
                fidelity_type=fid_type,
                min_budget=lower,
                max_budget=upper,
                has_constraints=False,
                domain="ML",
                objective_function_approximation="surrogate",
                has_virtual_time=True,
                **search_space_kwargs
            )

            cfg = DictConfig({
                'benchmark_id': 'YAHPO',
                'problem_id': problem_id,
                'problem': {
                    '_target_': problem_class,
                    'bench': bench,
                    'instance': instance,
                    'budget_type': fid_type,
                    'metric': TARGETMETRIC[bench],
                },
                'task': asdict(task),
            })

            if fid_type is None:
                fn = target_path_blackbox / f"cfg_{bench}_{instance}.yaml"
            else:
                fn = target_path_multifidelity / "all" / f"cfg_{bench}_{instance}_{fid_type}.yaml"
            fn.parent.mkdir(exist_ok=True, parents=True)
            yaml_str = OmegaConf.to_yaml(cfg=cfg)
            yaml_str = "# @package _global_\n" + yaml_str
            fn.write_text(yaml_str)
            print(cfg)

{'OpenML_task_id': OpenML_task_id, Type: Constant, Value: 3945, 'batch_size': batch_size, Type: UniformInteger, Range: [16, 512], Default: 91, on log-scale, 'learning_rate': learning_rate, Type: UniformFloat, Range: [0.00010000000000000009, 0.10000000000000002], Default: 0.0031622777, on log-scale, 'max_dropout': max_dropout, Type: UniformFloat, Range: [0.0, 1.0], Default: 0.5, 'max_units': max_units, Type: UniformInteger, Range: [64, 1024], Default: 256, on log-scale, 'momentum': momentum, Type: UniformFloat, Range: [0.1, 0.99], Default: 0.545, 'num_layers': num_layers, Type: UniformInteger, Range: [1, 5], Default: 3, 'weight_decay': weight_decay, Type: UniformFloat, Range: [1e-05, 0.1], Default: 0.050005}
{'benchmark_id': 'YAHPO', 'problem_id': 'yahpo/lcbench/3945/epoch', 'problem': {'_target_': 'carps.benchmarks.yahpo.YahpoProblem', 'bench': 'lcbench', 'instance': '3945', 'budget_type': 'epoch', 'metric': 'val_accuracy'}, 'task': {'n_trials': None, 'time_budget': 720, 'n_workers': 1

In [15]:
# YAHPO gym Single-objective collection
yahpo_gym_so_collection = [
    {"id": 1, "scenario": "lcbench", "instance": "167168", "target": "val_accuracy", "rho": 0.94, "budget": 126},
    {"id": 2, "scenario": "lcbench", "instance": "189873", "target": "val_accuracy", "rho": 0.97, "budget": 126},
    {"id": 3, "scenario": "lcbench", "instance": "189906", "target": "val_accuracy", "rho": 0.97, "budget": 126},
    {"id": 4, "scenario": "nb301", "instance": "CIFAR10", "target": "val_accuracy", "rho": 0.98, "budget": 250},
    {"id": 5, "scenario": "rbv2_glmnet", "instance": "375", "target": "acc", "rho": 0.80, "budget": 90},
    {"id": 6, "scenario": "rbv2_glmnet", "instance": "458", "target": "acc", "rho": 0.85, "budget": 90},
    {"id": 7, "scenario": "rbv2_ranger", "instance": "16", "target": "acc", "rho": 0.93, "budget": 134},
    {"id": 8, "scenario": "rbv2_ranger", "instance": "42", "target": "acc", "rho": 0.98, "budget": 134},
    {"id": 9, "scenario": "rbv2_rpart", "instance": "14", "target": "acc", "rho": 0.92, "budget": 110},
    {"id": 10, "scenario": "rbv2_rpart", "instance": "40499", "target": "acc", "rho": 0.97, "budget": 110},
    {"id": 11, "scenario": "rbv2_super", "instance": "1053", "target": "acc", "rho": 0.31, "budget": 267},
    {"id": 12, "scenario": "rbv2_super", "instance": "1457", "target": "acc", "rho": 0.70, "budget": 267},
    {"id": 13, "scenario": "rbv2_super", "instance": "1063", "target": "acc", "rho": 0.57, "budget": 267},
    {"id": 14, "scenario": "rbv2_super", "instance": "1479", "target": "acc", "rho": 0.36, "budget": 267},
    {"id": 15, "scenario": "rbv2_super", "instance": "15", "target": "acc", "rho": 0.75, "budget": 267},
    {"id": 16, "scenario": "rbv2_super", "instance": "1468", "target": "acc", "rho": 0.77, "budget": 267},
    {"id": 17, "scenario": "rbv2_xgboost", "instance": "12", "target": "acc", "rho": 0.93, "budget": 170},
    {"id": 18, "scenario": "rbv2_xgboost", "instance": "1501", "target": "acc", "rho": 0.89, "budget": 170},
    {"id": 19, "scenario": "rbv2_xgboost", "instance": "16", "target": "acc", "rho": 0.91, "budget": 170},
    {"id": 20, "scenario": "rbv2_xgboost", "instance": "40499", "target": "acc", "rho": 0.96, "budget": 170},
]
target_path_soc = target_path / "SO"

# YAHPO Gym multi-objective
yahpo_gym_mo_collection = [
    {"id": 1, "scenario": "iaml_glmnet", "instance": "1489", "target": ["mmce", "nf"], "rho": 0.86, "budget": 77},
    {"id": 2, "scenario": "iaml_glmnet", "instance": "1067", "target": ["mmce", "nf"], "rho": 0.73, "budget": 77},
    {"id": 3, "scenario": "iaml_ranger", "instance": "1489", "target": ["mmce", "nf", "ias"], "rho": 0.93, "budget": 134},
    {"id": 4, "scenario": "iaml_ranger", "instance": "1067", "target": ["mmce", "nf", "ias"], "rho": 0.92, "budget": 134},
    {"id": 5, "scenario": "iaml_super", "instance": "1489", "target": ["mmce", "nf", "ias"], "rho": 0.82, "budget": 232},
    {"id": 6, "scenario": "iaml_super", "instance": "1067", "target": ["mmce", "nf", "ias"], "rho": 0.82, "budget": 232},
    {"id": 7, "scenario": "iaml_xgboost", "instance": "40981", "target": ["mmce", "nf", "ias"], "rho": 0.88, "budget": 165},
    {"id": 8, "scenario": "iaml_xgboost", "instance": "1489", "target": ["mmce", "nf", "ias"], "rho": 0.92, "budget": 165},
    {"id": 9, "scenario": "iaml_xgboost", "instance": "40981", "target": ["mmce", "nf", "ias", "rammodel"], "rho": 0.89, "budget": 165},
    {"id": 10, "scenario": "iaml_xgboost", "instance": "1489", "target": ["mmce", "nf", "ias", "rammodel"], "rho": 0.92, "budget": 165},
    {"id": 11, "scenario": "lcbench", "instance": "167152", "target": ["val_accuracy", "val_cross_entropy"], "rho": 0.98, "budget": 126},
    {"id": 12, "scenario": "lcbench", "instance": "167185", "target": ["val_accuracy", "val_cross_entropy"], "rho": 0.91, "budget": 126},
    {"id": 13, "scenario": "lcbench", "instance": "189873", "target": ["val_accuracy", "val_cross_entropy"], "rho": 0.93, "budget": 126},
    {"id": 14, "scenario": "rbv2_ranger", "instance": "6", "target": ["acc", "memory"], "rho": 0.90, "budget": 134},
    {"id": 15, "scenario": "rbv2_ranger", "instance": "40979", "target": ["acc", "memory"], "rho": 0.73, "budget": 134},
    {"id": 16, "scenario": "rbv2_ranger", "instance": "375", "target": ["acc", "memory"], "rho": 0.85, "budget": 134},
    {"id": 17, "scenario": "rbv2_rpart", "instance": "41163", "target": ["acc", "memory"], "rho": 0.85, "budget": 110},
    {"id": 18, "scenario": "rbv2_rpart", "instance": "1476", "target": ["acc", "memory"], "rho": 0.80, "budget": 110},
    {"id": 19, "scenario": "rbv2_rpart", "instance": "40499", "target": ["acc", "memory"], "rho": 0.83, "budget": 110},
    {"id": 20, "scenario": "rbv2_super", "instance": "1457", "target": ["acc", "memory"], "rho": 0.66, "budget": 267},
    {"id": 21, "scenario": "rbv2_super", "instance": "6", "target": ["acc", "memory"], "rho": 0.68, "budget": 267},
    {"id": 22, "scenario": "rbv2_super", "instance": "1053", "target": ["acc", "memory"], "rho": 0.45, "budget": 267},
    {"id": 23, "scenario": "rbv2_xgboost", "instance": "28", "target": ["acc", "memory"], "rho": 0.80, "budget": 170},
    {"id": 24, "scenario": "rbv2_xgboost", "instance": "182", "target": ["acc", "memory"], "rho": 0.79, "budget": 170},
    {"id": 25, "scenario": "rbv2_xgboost", "instance": "12", "target": ["acc", "memory"], "rho": 0.76, "budget": 170},
]
target_path_moc = target_path / "MO"

for _target_path, collection, identifier in zip([target_path_soc, target_path_moc], [yahpo_gym_so_collection, yahpo_gym_mo_collection], ["so", "mo"]):
    for I in collection:
        fid_type = None
        bench = I["scenario"]
        instance = I["instance"]
        budget = I["budget"]
        metric = I["target"]
        if not isinstance(metric, list):
            metric = [metric]


        if fid_type == 'epoch':
            lower = LOWER['epochs'][bench]
            upper = UPPER['epochs'][bench]
        elif fid_type is None:
            lower = None
            upper = None
        else:
            lower = LOWER[fid_type]
            upper = UPPER[fid_type]

        problem_id = f"yahpo/{identifier}/{bench}/{instance}/{fid_type}"

        search_space_kwargs = dict()
        if YahpoProblem is not None:
            problem = YahpoProblem(bench=bench, instance=instance, budget_type=fid_type, metric=TARGETMETRIC[bench])
            search_space_kwargs = get_search_space_info(configspace=problem.configspace)

        task = Task(
            n_trials=n_trials if fid_type is None else None,
            time_budget=None if fid_type is None else time_budget,
            n_workers=1,
            n_objectives=len(metric),
            objectives=metric,
            is_multifidelity=False if fid_type is None else True,
            fidelity_type=fid_type,
            min_budget=lower,
            max_budget=upper,
            has_constraints=False,
            domain="ML",
            objective_function_approximation="surrogate",
            has_virtual_time=True,
            **search_space_kwargs
        )

        cfg = DictConfig({
            'benchmark_id': 'YAHPO',
            'problem_id': problem_id,
            'problem': {
                '_target_': problem_class,
                'bench': bench,
                'instance': instance,
                'budget_type': fid_type,
                'metric': metric,
            },
            'task': asdict(task)
        })

        fn = _target_path / f"cfg_{bench}_{instance}.yaml"
        fn.parent.mkdir(exist_ok=True, parents=True)
        yaml_str = OmegaConf.to_yaml(cfg=cfg)
        yaml_str = "# @package _global_\n" + yaml_str
        fn.write_text(yaml_str)
        print(cfg)


{'benchmark_id': 'YAHPO', 'problem_id': 'yahpo/so/lcbench/167168/None', 'problem': {'_target_': 'carps.benchmarks.yahpo.YahpoProblem', 'bench': 'lcbench', 'instance': '167168', 'budget_type': None, 'metric': ['val_accuracy']}, 'task': {'n_trials': 100, 'time_budget': None, 'n_workers': 1, 'n_objectives': 1, 'objectives': ['val_accuracy'], 'is_multifidelity': False, 'fidelity_type': None, 'min_budget': None, 'max_budget': None, 'has_constraints': False, 'domain': 'ML', 'objective_function_approximation': 'surrogate', 'has_virtual_time': True, 'dimensions': 7, 'search_space_n_categoricals': 0, 'search_space_n_ordinals': 0, 'search_space_n_integers': 3, 'search_space_n_floats': 4, 'search_space_has_conditionals': False, 'search_space_has_forbiddens': False, 'search_space_has_priors': False}}
{'benchmark_id': 'YAHPO', 'problem_id': 'yahpo/so/lcbench/189873/None', 'problem': {'_target_': 'carps.benchmarks.yahpo.YahpoProblem', 'bench': 'lcbench', 'instance': '189873', 'budget_type': None, 'm

{'benchmark_id': 'YAHPO', 'problem_id': 'yahpo/so/nb301/CIFAR10/None', 'problem': {'_target_': 'carps.benchmarks.yahpo.YahpoProblem', 'bench': 'nb301', 'instance': 'CIFAR10', 'budget_type': None, 'metric': ['val_accuracy']}, 'task': {'n_trials': 100, 'time_budget': None, 'n_workers': 1, 'n_objectives': 1, 'objectives': ['val_accuracy'], 'is_multifidelity': False, 'fidelity_type': None, 'min_budget': None, 'max_budget': None, 'has_constraints': False, 'domain': 'ML', 'objective_function_approximation': 'surrogate', 'has_virtual_time': True, 'dimensions': 34, 'search_space_n_categoricals': 34, 'search_space_n_ordinals': 0, 'search_space_n_integers': 0, 'search_space_n_floats': 0, 'search_space_has_conditionals': True, 'search_space_has_forbiddens': False, 'search_space_has_priors': False}}
{'benchmark_id': 'YAHPO', 'problem_id': 'yahpo/so/rbv2_glmnet/375/None', 'problem': {'_target_': 'carps.benchmarks.yahpo.YahpoProblem', 'bench': 'rbv2_glmnet', 'instance': '375', 'budget_type': None, '

In [16]:
target_path_mf_soc = base_path / "YAHPO" / "multifidelity" / "SO"

for I in yahpo_gym_so_collection:
    for fid_type in FIDELITY_TYPES[I["scenario"]]:
        bench = I["scenario"]
        instance = I["instance"]
        budget = I["budget"]
        metric = I["target"]
        if not isinstance(metric, list):
            metric = [metric]

        if fid_type == "epoch":
            lower = LOWER["epochs"][bench]
            upper = UPPER["epochs"][bench]
        else:
            lower = LOWER[fid_type]
            upper = UPPER[fid_type]

        problem_id = f"yahpo/multifidelity/so/{fid_type}/{bench}/{instance}/{fid_type}"
        problem_class = "carps.benchmarks.yahpo.YahpoProblem"

        search_space_kwargs = dict()
        if YahpoProblem is not None:
            problem = YahpoProblem(bench=bench, instance=instance, budget_type=fid_type, metric=TARGETMETRIC[bench])
            search_space_kwargs = get_search_space_info(configspace=problem.configspace)

        task = Task(
            n_trials=n_trials if fid_type is None else None,
            time_budget=None if fid_type is None else time_budget,
            n_workers=1,
            n_objectives=len(metric),
            objectives=metric,
            is_multifidelity=False if fid_type is None else True,
            fidelity_type=fid_type,
            min_budget=lower,
            max_budget=upper,
            has_constraints=False,
            domain="ML",
            objective_function_approximation="surrogate",
            has_virtual_time=True,
            **search_space_kwargs
        )

        cfg = DictConfig({
            'benchmark_id': 'YAHPO',
            'problem_id': problem_id,
            'problem': {
                '_target_': problem_class,
                'bench': bench,
                'instance': instance,
                'budget_type': fid_type,
                'metric': metric
            },
            'task': asdict(task)
        })

        budget_type_path = target_path_mf_soc / fid_type

        fn = budget_type_path / f"cfg_{bench}_{instance}_mf.yaml"
        fn.parent.mkdir(exist_ok=True, parents=True)
        yaml_str = OmegaConf.to_yaml(cfg=cfg)
        yaml_str = "# @package _global_\n" + yaml_str
        fn.write_text(yaml_str)
        print(cfg)

{'benchmark_id': 'YAHPO', 'problem_id': 'yahpo/multifidelity/so/epoch/lcbench/167168/epoch', 'problem': {'_target_': 'carps.benchmarks.yahpo.YahpoProblem', 'bench': 'lcbench', 'instance': '167168', 'budget_type': 'epoch', 'metric': ['val_accuracy']}, 'task': {'n_trials': None, 'time_budget': 720, 'n_workers': 1, 'n_objectives': 1, 'objectives': ['val_accuracy'], 'is_multifidelity': True, 'fidelity_type': 'epoch', 'min_budget': 1, 'max_budget': 52, 'has_constraints': False, 'domain': 'ML', 'objective_function_approximation': 'surrogate', 'has_virtual_time': True, 'dimensions': 7, 'search_space_n_categoricals': 0, 'search_space_n_ordinals': 0, 'search_space_n_integers': 3, 'search_space_n_floats': 4, 'search_space_has_conditionals': False, 'search_space_has_forbiddens': False, 'search_space_has_priors': False}}
{'benchmark_id': 'YAHPO', 'problem_id': 'yahpo/multifidelity/so/epoch/lcbench/189873/epoch', 'problem': {'_target_': 'carps.benchmarks.yahpo.YahpoProblem', 'bench': 'lcbench', 'in