In [1]:
import warnings
warnings.filterwarnings('ignore')
from pathlib import Path
import pandas as pd

import seaborn
seaborn.set_context("talk")

In [2]:
from dacbench.benchmarks import ModCMABenchmark

# First steps: running an episode

### Creating a benchmark object
Benchmarks are environments created by a benchmark object.
First, we take a look at that object and the configuration it holds:

In [3]:
benchCMA = ModCMABenchmark()
for k in benchCMA.config.keys():
    print(f"{k}: {benchCMA.config[k]}")

config_space: Configuration space object:
  Hyperparameters:
    0_active, Type: Categorical, Choices: {True, False}, Default: True
    1_elitist, Type: Categorical, Choices: {True, False}, Default: True
    2_orthogonal, Type: Categorical, Choices: {True, False}, Default: True
    3_sequential, Type: Categorical, Choices: {True, False}, Default: True
    4_threshold_convergence, Type: Categorical, Choices: {True, False}, Default: True
    5_step_size_adaption, Type: Categorical, Choices: {csa, tpa, msr, xnes, m-xnes, lp-xnes, psr}, Default: csa
    6_mirrored, Type: Categorical, Choices: {None, mirrored, mirrored pairwise}, Default: None
    7_base_sampler, Type: Categorical, Choices: {gaussian, sobol, halton}, Default: gaussian
    8_weights_option, Type: Categorical, Choices: {default, equal, 1/2^lambda}, Default: default
    90_local_restart, Type: Categorical, Choices: {None, IPOP, BIPOP}, Default: None
    91_bound_correction, Type: Categorical, Choices: {None, saturate, unif_res

### Getting the benchmark environment
Now we can either get the default benchmark setting like this:

In [4]:
env = benchCMA.get_benchmark(seed=1)

### Running the benchmark
To execute a run, first reset the environment. It will return an initial state as well as a dictonary that may or may not contain some meta-information:

In [5]:
state, info = env.reset()
print(state)
print(info)

[ 10.   2. 100.  11.   0.]
{}


Then we can run steps until the algorithm run is done:

In [6]:
terminated, truncated = False, False
cum_reward = 0
while not (terminated or truncated):
    action = env.action_space.sample()
    state, reward, terminated, truncated, info = env.step(action)
    cum_reward += reward
print(f"Episode 1/1...........................................Reward: {cum_reward}")

Episode 1/1...........................................Reward: -141433.68262540782


## Test run the environment for step size
Change the config so that step_size = True

In [None]:
benchStepCMA = ModCMABenchmark(step_size=True)
for k in benchStepCMA.config.keys():
    print(f"{k}: {benchStepCMA.config[k]}")

In [7]:
step_env = benchStepCMA.get_benchmark(seed=1)
benchStepCMA.config.step_size = True
step_env = benchStepCMA.get_environment()

In [8]:
state, info = step_env.reset()
print(state)
print(info)

[ 10.   2. 100.  11.   0.]
{}


In [9]:
terminated, truncated = False, False
cum_reward = 0
while not (terminated or truncated):
    action = step_env.action_space.sample()
    state, reward, terminated, truncated, info = step_env.step(action)
    cum_reward += reward
print(f"Episode 1/1...........................................Reward: {cum_reward}")

Episode 1/1...........................................Reward: -518064.4693024508
