# Randomly Sample Instance Set
Run this script once for each benchmark and commit the resulting instance set config files.
The fraction of subsampling should be similar to the number of instances selected by SELECTOR for lower thresholds.
Normally, samples 10% of the instance sets. 

In [3]:
from pathlib import Path
from rich import print as printr
from omegaconf import OmegaConf
from hydra.utils import instantiate
import numpy as np
from dacbench.abstract_benchmark import AbstractBenchmark

##################### DEFINE BENCHMARK AND INSTANCE SET
benchmark_cfg_fn = "../instance_dac/configs/benchmark/sigmoid.yaml"
instance_set_cfg_fn = "../instance_dac/configs/inst/sigmoid/2D3M_train.yaml"

# Subsample
seeds = [234235, 43534, 411, 799, 32544]
fraction = 0.1
#####################

# Instantiate benchmark and read instance set
bench_cfg = OmegaConf.load(benchmark_cfg_fn)
iset_cfg = OmegaConf.load(instance_set_cfg_fn)
bench_cfg = OmegaConf.merge(bench_cfg, iset_cfg)
benchmark = instantiate(bench_cfg.benchmark)

def random_sample_instance_set(benchmark: AbstractBenchmark, seed: int, fraction: float) -> dict:
    benchmark.read_instance_set()
    instance_set = benchmark.config["instance_set"]
    rng = np.random.default_rng(seed=seed)
    n_instances = len(instance_set)
    ids = rng.choice(list(instance_set.keys()), size=int(fraction * n_instances), replace=False)
    ids = [int(k) for k in ids]
    instance_set_new = {k: instance_set[k] for k in ids}
    return instance_set_new

for seed in seeds:
    instance_set_new = random_sample_instance_set(benchmark=benchmark, seed=seed, fraction=fraction)

    print("Size of new instance set:", len(instance_set_new))

    instance_set_id = iset_cfg.instance_set_id + f"_{fraction}_{seed}"

    cfg = OmegaConf.create({
        "instance_set_id": instance_set_id,
        "instance_set_selection": "random",
        "selector": {
            "seed": seed,
            "fraction": fraction,
        },
        "source_instance_set_id": iset_cfg.instance_set_id,
        "benchmark": {
            "config": {
                "instance_set": instance_set_new
            }
        },
        
    })
    yaml_str = OmegaConf.to_yaml(cfg)
    yaml_str = f"# @package _global_\n\n{yaml_str}"

    target_fn = Path("../instance_dac/configs/inst/") / bench_cfg.benchmark_id.lower() / "random" / f"{instance_set_id}.yaml"
    target_fn.parent.mkdir(exist_ok=True, parents=True)
    print("Wrote", target_fn)
    target_fn.write_text(yaml_str)

Size of new instance set: 30
Wrote ../instance_dac/configs/inst/sigmoid/random/2D3M_train_0.1_234235.yaml
Size of new instance set: 30
Wrote ../instance_dac/configs/inst/sigmoid/random/2D3M_train_0.1_43534.yaml
Size of new instance set: 30
Wrote ../instance_dac/configs/inst/sigmoid/random/2D3M_train_0.1_411.yaml
Size of new instance set: 30
Wrote ../instance_dac/configs/inst/sigmoid/random/2D3M_train_0.1_799.yaml
Size of new instance set: 30
Wrote ../instance_dac/configs/inst/sigmoid/random/2D3M_train_0.1_32544.yaml


In [4]:
# Check if it works:)
cfg = OmegaConf.load(target_fn)
bcfg =  OmegaConf.load(benchmark_cfg_fn)
cfg = OmegaConf.merge(bcfg, cfg)
benchmark = instantiate(cfg.benchmark)
env = benchmark.get_environment()
printr(len(env.instance_set))
printr(env.instance_set)