Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
65 changes: 58 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -302,14 +302,65 @@ Submit all agents for a given seed and portfolio:
bash runner.sh
```

Individual SLURM scripts:
Each script accepts positional arguments: `SEED [PORTFOLIO...]` (RL-DAS takes only `SEED`).

| Script | Agent |
|---|---|
| `ppo_study.slurm` | PPO |
| `rl_das_study.slurm` | RL-DAS |
| `exp_das_study.slurm` | Exp-DAS |
| `baselines.slurm` | Baselines |
```bash
sbatch baselines.slurm 42 CPSO NM TDE
sbatch ppo_study.slurm 42 CPSO NM TDE
sbatch rl_das_study.slurm 42
sbatch exp_das_study.slurm 42 CPSO NM TDE
```

### `baselines.slurm`

Single job (no array). Runs all baseline agent types (`random`, `fixed:*`, `single:*`, oracle) across all dimensions.

### `ppo_study.slurm` — array 0–9

| Task | CV mode | Dimensions |
|------|---------|------------|
| 0 | LOIO | 2 |
| 1 | LOIO | 3 |
| 2 | LOIO | 5 |
| 3 | LOIO | 10 |
| 4 | LOPO | 2 |
| 5 | LOPO | 3 |
| 6 | LOPO | 5 |
| 7 | LOPO | 10 |
| 8 | LOIO | 2, 3, 5, 10 (multi-dim) |
| 9 | LOPO | 2, 3, 5, 10 (multi-dim) |

### `rl_das_study.slurm` — array 0–7

Fixed DE portfolio (`NL_SHADE_RSP / MADDE / JDE21`). One model per dimension.

| Task | CV mode | Dimension |
|------|---------|-----------|
| 0 | LOIO | 2 |
| 1 | LOIO | 3 |
| 2 | LOIO | 5 |
| 3 | LOIO | 10 |
| 4 | LOPO | 2 |
| 5 | LOPO | 3 |
| 6 | LOPO | 5 |
| 7 | LOPO | 10 |

### `exp_das_study.slurm` — array 0–11

| Task | CV mode | Dimensions |
|------|---------|------------|
| 0 | LOIO | 2, 5, 10 (multi-dim) |
| 1 | LOPO | 2, 5, 10 (multi-dim) |
| 2 | LOIO | 2, 3, 5, 10 (multi-dim) |
| 3 | LOPO | 2, 3, 5, 10 (multi-dim) |
| 4 | LOIO | 2 |
| 5 | LOPO | 2 |
| 6 | LOIO | 3 |
| 7 | LOPO | 3 |
| 8 | LOIO | 5 |
| 9 | LOPO | 5 |
| 10 | LOIO | 10 |
| 11 | LOPO | 10 |

---

Expand Down
32 changes: 18 additions & 14 deletions agents/exponential_das/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,11 @@

import numpy as np
import torch
from tqdm import tqdm

from agents.exponential_das.agent import ExpDASAgent
from das.env.das_env import DASEnv
from das.training.common import compute_run_stats
from das.training.common import compute_run_stats, get_ioh_optimum


def train(
Expand Down Expand Up @@ -64,7 +65,8 @@ def train(
episode_rewards: list[float] = []
best_test_reward = -np.inf

for ep in range(1, total_episodes + 1):
pbar = tqdm(range(1, total_episodes + 1), desc=f"train {name}", unit="ep")
for ep in pbar:
obs, info = train_env.reset()
done = False
step_idx = 0
Expand Down Expand Up @@ -117,17 +119,22 @@ def train(
)
entry["mean_test_reward"] = mean_test_r
mean_train_r = float(np.mean(episode_rewards[-eval_interval:]))
print(
f"Ep {ep:5d}/{total_episodes}"
f" train={mean_train_r:.4f}"
f" test={mean_test_r:.4f}"
f" entropy={agent.entropy_coef:.4f}"
f" lr={agent.current_lr:.2e}"
f" kl={agent.last_kl:.4f}"
pbar.set_postfix(
train=f"{mean_train_r:.4f}",
test=f"{mean_test_r:.4f}",
ent=f"{agent.entropy_coef:.3f}",
kl=f"{agent.last_kl:.4f}",
)
if mean_test_r > best_test_reward:
best_test_reward = mean_test_r
agent.save(os.path.join(save_dir, f"{name}_best.pt"))
else:
mean_train_r = float(
np.mean(episode_rewards[-min(eval_interval, len(episode_rewards)) :])
)
pbar.set_postfix(
train=f"{mean_train_r:.4f}", ent=f"{agent.entropy_coef:.3f}"
)

if ep % save_interval == 0:
ckpt = os.path.join(save_dir, f"{name}_ep{ep}.pt")
Expand All @@ -148,13 +155,10 @@ def evaluate(
env: DASEnv,
agent: ExpDASAgent,
n_episodes: int = 20,
global_optima: dict[str, float] | None = None,
) -> list[dict]:
"""Run the agent deterministically and return per-episode results."""
if global_optima is None:
global_optima = {}
results = []
for _ in range(n_episodes):
for _ in tqdm(range(n_episodes), desc="evaluate", unit="ep", leave=False):
obs, info = env.reset()
problem_id = info.get("problem_id", "")
done = False
Expand All @@ -170,7 +174,7 @@ def evaluate(
fitness_history.extend(step_info.get("fitness_history_step", []))

max_fe = step_info.get("n_fe", 0)
global_minimum = global_optima.get(problem_id, 0.0)
global_minimum = get_ioh_optimum(problem_id)
stats = compute_run_stats(fitness_history, max_fe, global_minimum)
results.append(
{
Expand Down
4 changes: 2 additions & 2 deletions agents/rl_das/env.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def _pop_features(


class RLDASEnv(gym.Env):
"""RL-DAS environment wrapping BBOB problems via a cocoex Suite.
"""RL-DAS environment wrapping optimization problems via an IOHSuite.

Uses a Population object as shared warm-started state across all DE
sub-optimizers (matching the original RL-DAS design).
Expand All @@ -176,7 +176,7 @@ class RLDASEnv(gym.Env):
problem_ids:
BBOB problem IDs to cycle through (one per episode).
suite:
cocoex Suite object.
IOHSuite object.
optimizers:
List of instantiated DE optimizer objects (NL_SHADE_RSP, JDE21, MadDE).
dim:
Expand Down
59 changes: 43 additions & 16 deletions baselines.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,15 +36,20 @@
import os
import warnings

import cocoex
import numpy as np
from tqdm import tqdm

from das.env.das_env import DASEnv
from das.env.ioh_suite import IOHSuite
from das.optimizers.portfolio import get_portfolio
from das.utils import set_seed
from das.env.bbob_splits import ALL_DIMS, get_train_test_split
from das.training.common import compute_run_stats, load_global_optima
from das.training.common import (
compute_run_stats,
get_ioh_optimum,
ERT_TARGETS,
_ert_key,
)

warnings.filterwarnings("ignore")

Expand Down Expand Up @@ -95,7 +100,6 @@ def collect_env_results(
suite,
optimizers: list,
cfg: dict,
global_optima: dict[str, float],
) -> list[dict]:
"""Run policy_fn on every problem in test_ids via DASEnv."""
env = DASEnv(
Expand All @@ -112,7 +116,7 @@ def collect_env_results(
for problem_id in tqdm(test_ids, desc=f" {agent_tag}", smoothing=0.0):
step_info, fitness_history = run_episode(env, policy_fn)
max_fe = step_info.get("n_fe", 0)
global_minimum = global_optima.get(problem_id, 0.0)
global_minimum = get_ioh_optimum(problem_id)
stats = compute_run_stats(fitness_history, max_fe, global_minimum)
results.append({problem_id: {**stats, "agent": agent_tag}})
env.close()
Expand Down Expand Up @@ -164,13 +168,12 @@ def collect_single_results(
suite,
fe_multiplier: int,
n_individuals: int,
global_optima: dict[str, float],
) -> list[dict]:
"""Run the optimizer independently on every problem in test_ids."""
results = []
for problem_id in tqdm(test_ids, desc=f" {agent_tag}", smoothing=0.0):
problem = suite.get_problem(problem_id)
global_minimum = global_optima.get(problem_id, 0.0)
global_minimum = get_ioh_optimum(problem_id)
stats = run_single_algorithm(
optimizer_class, problem, fe_multiplier, n_individuals, global_minimum
)
Expand Down Expand Up @@ -212,6 +215,8 @@ def compute_oracle(all_results: dict[str, list[dict]]) -> tuple[list[dict], list
],
"aocc": best_m["aocc"],
"final_fitness": best_m["final_fitness"],
"hitting_times": best_m.get("hitting_times", {}),
"max_fe": best_m.get("max_fe", 0),
"agent": "oracle-best",
"best_agent": best_m["agent"],
}
Expand All @@ -225,6 +230,8 @@ def compute_oracle(all_results: dict[str, list[dict]]) -> tuple[list[dict], list
],
"aocc": worst_m["aocc"],
"final_fitness": worst_m["final_fitness"],
"hitting_times": worst_m.get("hitting_times", {}),
"max_fe": worst_m.get("max_fe", 0),
"agent": "oracle-worst",
"worst_agent": worst_m["agent"],
}
Expand All @@ -241,12 +248,29 @@ def compute_oracle(all_results: dict[str, list[dict]]) -> tuple[list[dict], list
# ------------------------------------------------------------------ #


def _ert_for_target(records: list[dict], target_key: str) -> float | None:
"""ERT = total_FEs / n_successful_runs (unsuccessful runs contribute max_fe)."""
total_fe = 0
n_succ = 0
for r in records:
m = next(iter(r.values()))
ht = m.get("hitting_times", {}).get(target_key)
mfe = m.get("max_fe", 0)
if ht is not None:
total_fe += ht
n_succ += 1
else:
total_fe += mfe
return float(total_fe / n_succ) if n_succ > 0 else None


def summarise(tag: str, records: list[dict]) -> dict:
fitnesses = [next(iter(r.values()))["final_fitness"] for r in records]
aocc_vals = [next(iter(r.values()))["aocc"] for r in records]
auoc_vals = [
next(iter(r.values()))["area_under_optimization_curve"] for r in records
]
ert = {_ert_key(t): _ert_for_target(records, _ert_key(t)) for t in ERT_TARGETS}
return {
"agent": tag,
"n_problems": len(fitnesses),
Expand All @@ -256,6 +280,7 @@ def summarise(tag: str, records: list[dict]) -> dict:
"worst_final_fitness": float(np.max(fitnesses)),
"mean_aocc": float(np.mean(aocc_vals)),
"mean_auoc": float(np.mean(auoc_vals)),
"ert": ert,
}


Expand All @@ -266,16 +291,23 @@ def save_results(records: list[dict], path: str) -> None:


def print_summary(summaries: list[dict]) -> None:
_ERT_PRINT_TARGET = "1e-04"
width = max(len(s["agent"]) for s in summaries) + 2
header = f" {'Agent':<{width}} {'Mean fitness':>14} {'Median fitness':>14} {'Mean AUOC':>14}"
header = (
f" {'Agent':<{width}} {'Mean fitness':>14} {'Median fitness':>14}"
f" {'Mean AUOC':>14} {'ERT(1e-04)':>12}"
)
print(header)
print(" " + "-" * (len(header) - 2))
for s in summaries:
ert_val = s.get("ert", {}).get(_ERT_PRINT_TARGET)
ert_str = f"{ert_val:>12.1f}" if ert_val is not None else f"{'inf':>12}"
print(
f" {s['agent']:<{width}} "
f"{s['mean_final_fitness']:>14.4e} "
f"{s['median_final_fitness']:>14.4e} "
f"{s['mean_auoc']:>14.4e}"
f"{s['mean_auoc']:>14.4e} "
f"{ert_str}"
)


Expand Down Expand Up @@ -317,7 +349,7 @@ def parse_args():
p.add_argument("-s", "--n-checkpoints", type=int, default=10)
p.add_argument("-x", "--cdb", type=float, default=1.0)
p.add_argument("-O", "--reward-option", type=int, default=1, choices=[1, 2, 3, 4])
p.add_argument("-n", "--n-individuals", type=int, default=100)
p.add_argument("-n", "--n-individuals", type=int, default=None)
p.add_argument("--seed", type=int, default=42)
return p.parse_args()

Expand All @@ -334,11 +366,8 @@ def main():

optimizers = get_portfolio(args.portfolio)
opt_names = args.portfolio
cocoex.utilities.MiniPrint()
suite = cocoex.Suite("bbob", "", "")
suite = IOHSuite()
_, test_ids = get_train_test_split(args.mode, args.dims)
global_optima = load_global_optima()

cfg = {
"fe_multiplier": args.fe_multiplier,
"n_checkpoints": args.n_checkpoints,
Expand Down Expand Up @@ -384,7 +413,7 @@ def main():

if tag == "random":
records = collect_env_results(
tag, random_policy, test_ids, suite, optimizers, cfg, global_optima
tag, random_policy, test_ids, suite, optimizers, cfg
)

elif tag.startswith("fixed:"):
Expand All @@ -402,7 +431,6 @@ def main():
suite,
optimizers,
cfg,
global_optima,
)

elif tag.startswith("single:"):
Expand All @@ -415,7 +443,6 @@ def main():
suite,
args.fe_multiplier,
args.n_individuals,
global_optima,
)

else:
Expand Down
Loading
Loading