In [1]:
from datetime import datetime

import numpy as np
import torch
from gym.wrappers import (
    FilterObservation,
    FlattenObservation,
    FrameStack,
    RecordVideo,
    RescaleAction,
    TimeLimit,
)
from stable_baselines3.common.env_util import unwrap_wrapper

from bayesopt import calculate_objective, get_next_samples, scale_action, get_new_bound
from ea_optimize import (
    ARESEADOOCS,
    CallbackList,
    OptimizeFunctionCallback,
    report_ea_optimization_to_logbook,
)
from utils import (
    FilterAction,
    NotVecNormalize,
    PolishedDonkeyCompatibility,
    RecordEpisode,
    send_to_elog,
)


initializing ocelot...


## Preamble

Remarks: I would set a higher max_step for BO, maybe 100/150?

In [2]:
config = config = {
    "action_mode": "direct_unidirectional_quads",
    "gamma": 0.99,
    # "filter_action": [0, 1, 3],
    "filter_action": None,
    "filter_observation": None,
    "frame_stack": None,
    "incoming_mode": "random",
    "incoming_values": None,
    "magnet_init_mode": "constant",
    "magnet_init_values": np.array([10, -10, 0, 10, 0]),
    "misalignment_mode": "constant",
    "misalignment_values": np.zeros(8),
    "n_envs": 40,
    "normalize_observation": True,
    "normalize_reward": True,
    "rescale_action": (-3, 3),
    "reward_mode": "logl1",
    "sb3_device": "auto",
    "target_beam_mode": "constant",
    "target_beam_values": np.zeros(4),
    "target_mu_x_threshold": 1e-5,
    "target_mu_y_threshold": 1e-5,
    "target_sigma_x_threshold": 1e-5,
    "target_sigma_y_threshold": 1e-5,
    "threshold_hold": 5,
    "time_limit": 50000,
    "vec_env": "subproc",
    "w_done": 0.0,
    "w_mu_x": 1.0,
    "w_mu_x_in_threshold": 0.0,
    "w_mu_y": 1.0,
    "w_mu_y_in_threshold": 0.0,
    "w_on_screen": 100.0,
    "w_sigma_x": 1.0,
    "w_sigma_x_in_threshold": 0.0,
    "w_sigma_y": 1.0,
    "w_sigma_y_in_threshold": 0.0,
    "w_time": 0.0,
}


In [7]:
# define a similar optimize function as in ea_optimize.py
from bayesopt import get_new_bound
from ea_optimize import BaseCallback


def optimize(
    target_mu_x,
    target_sigma_x,
    target_mu_y,
    target_sigma_y,
    target_mu_x_threshold=3.3198e-6,
    target_mu_y_threshold=3.3198e-6,
    target_sigma_x_threshold=3.3198e-6,
    target_sigma_y_threshold=3.3198e-6,
    max_steps=100,
    model_name="BO",
    logbook=False,
    callback=BaseCallback(),
    stepsize=0.1,  # comparable to RL env
    obj_function="logmae",
    acquisition="EI",
    init_x=None,
    init_samples=5,
    filter_action=None,
    set_to_best=True,  # set back to best found setting after opt.
):
    # TODO move to an init_callback function
    if callback is None:
        callback = BaseCallback()
    elif isinstance(callback, list):
        callback = CallbackList(callback)

    # Create the environment
    env = ARESEADOOCS(
        action_mode=config["action_mode"],
        magnet_init_mode=config["magnet_init_mode"],
        magnet_init_values=config["magnet_init_values"],
        reward_mode=config["reward_mode"],
        target_beam_mode=config["target_beam_mode"],
        target_beam_values=np.array(
            [target_mu_x, target_sigma_x, target_mu_y, target_sigma_y]
        ),
        target_mu_x_threshold=target_mu_x_threshold,
        target_mu_y_threshold=target_mu_y_threshold,
        target_sigma_x_threshold=target_sigma_x_threshold,
        target_sigma_y_threshold=target_sigma_y_threshold,
        threshold_hold=1,
        w_done=config["w_done"],
        w_mu_x=config["w_mu_x"],
        w_mu_x_in_threshold=config["w_mu_x_in_threshold"],
        w_mu_y=config["w_mu_y"],
        w_mu_y_in_threshold=config["w_mu_y_in_threshold"],
        w_on_screen=config["w_on_screen"],
        w_sigma_x=config["w_sigma_x"],
        w_sigma_x_in_threshold=config["w_sigma_x_in_threshold"],
        w_sigma_y=config["w_sigma_y"],
        w_sigma_y_in_threshold=config["w_sigma_y_in_threshold"],
        w_time=config["w_time"],
    )
    if max_steps is not None:
        env = TimeLimit(env, max_steps)
    if callback is not None:
        env = OptimizeFunctionCallback(env, callback)
    env = RecordEpisode(env)
    if config["filter_observation"] is not None:
        env = FilterObservation(env, config["filter_observation"])
    if config["filter_action"] is not None:
        env = FilterAction(env, config["filter_action"], replace=0)
    env = FlattenObservation(env)
    if config["frame_stack"] is not None:
        env = FrameStack(env, config["frame_stack"])
    if config["rescale_action"] is not None:
        env = RescaleAction(
            env, config["rescale_action"][0], config["rescale_action"][1]
        )
    env = RecordVideo(env, video_folder=f"recordings_real/{datetime.now():%Y%m%d%H%M}")
    # env = NotVecNormalize(env, f"models/{model_name}/vec_normalize.pkl")

    callback.env = env

    # Actual optimisation
    t_start = datetime.now()
    observation = env.reset()
    beam_image_before = env.get_beam_image()
    done = False

    # Initialization
    x_dim = env.action_space.shape[0]
    # bounds = torch.tensor(
    #     np.array([env.action_space.low, env.action_space.high]), dtype=torch.float32
    # )
    if init_x is not None:  # From fix starting points
        X = torch.tensor(init_x.reshape(-1, x_dim), dtype=torch.float32)
    else:  # Random Initialization-5.7934
        action_i = scale_action(env, observation, filter_action)
        X = torch.tensor([action_i], dtype=torch.float32)
        bounds = get_new_bound(env, action_i, stepsize)
        for i in range(init_samples - 1):
            new_action = np.random.uniform(low=bounds[0], high=bounds[1])
            X = torch.cat([X, torch.tensor([new_action])])
    # Sample initial Ys to build GP
    Y = torch.empty((X.shape[0], 1))
    for i, action in enumerate(X):
        action = action.detach().numpy()
        print(f"Collecting initial Ys step {i} at {action = }")
        observation, reward, done, info = env.step(action)
        print(f"{reward = }")
        objective = calculate_objective(env, observation, reward, obj=obj_function)
        Y[i] = torch.tensor(objective)

    # Actual BO Loop
    jans_i = 0
    while not done:
        current_action = X[-1].detach().numpy()
        bounds = get_new_bound(env, current_action, stepsize)
        action_t = get_next_samples(
            X, Y, Y.max(), bounds, n_points=1, acquisition=acquisition
        )
        action = action_t.detach().numpy().flatten()
        print(f"Actual optimisation exploring step {jans_i} {action = }")
        jans_i += 1
        observation, reward, done, info = env.step(action)
        print(f"{reward = }")
        objective = calculate_objective(env, observation, reward, obj=obj_function)

        # append data
        X = torch.cat([X, action_t])
        Y = torch.cat([Y, torch.tensor([[objective]], dtype=torch.float32)])

    # Set back to 
    if set_to_best:
        action = X[Y.argmax()].detach().numpy()
        observation, reward, done, info = env.step(action)

    t_end = datetime.now()

    recording = unwrap_wrapper(env, RecordEpisode)
    if logbook:
        report_ea_optimization_to_logbook(
            model_name,
            t_start,
            t_end,
            recording.observations,
            recording.infos,
            beam_image_before,
            target_mu_x_threshold,
            target_sigma_x_threshold,
            target_mu_y_threshold,
            target_sigma_y_threshold,
        )

    env.close()


## Actual BO

In [8]:
# hopefully this would run :)

optimize(
    target_mu_x=0.0,    # 0.4249e-3,
    target_sigma_x=0.0,
    target_mu_y=0.0,    # 1.1048e-3,
    target_sigma_y=0.0,
    target_mu_x_threshold=3.16e-6,
    target_mu_y_threshold=3.16e-6,
    target_sigma_x_threshold=3.16e-6,
    target_sigma_y_threshold=3.16e-6,
    max_steps=10,
    model_name="BO",
    logbook=True,
    callback=None,
    obj_function="logmae",
    acquisition="EI",
    init_x=None,
    init_samples=5,
    filter_action=None,
)


Collecting initial Ys step 0 at action = array([-2.16666651,  2.16666651,  0.        , -2.16666651,  0.        ])
reward = -69.20180975184063
Collecting initial Ys step 1 at action = array([-2.15454164,  1.88808345, -0.21170113, -2.01253051, -0.14593008])
reward = -67.22633474395708
Collecting initial Ys step 2 at action = array([-2.17611868,  2.2183891 , -0.06440592, -2.0814038 , -0.09654788])
reward = -68.75366571387622
Collecting initial Ys step 3 at action = array([-1.97387185,  2.40328593, -0.11946695, -1.98243373,  0.24651946])
reward = -69.103769746625
Collecting initial Ys step 4 at action = array([-1.92396953,  2.35696903, -0.04196954, -2.41072866,  0.19019112])
reward = -70.77952618758502


AttributeError: 'numpy.ndarray' object has no attribute 'isinf'

In [None]:
# hopefully this would run :)

# optimize(
#     target_mu_x=0.4249e-3,
#     target_sigma_x=0.0,
#     target_mu_y=1.1048e-3,
#     target_sigma_y=0.0,
#     target_mu_x_threshold=3.16e-6,
#     target_mu_y_threshold=3.16e-6,
#     target_sigma_x_threshold=3.16e-6,
#     target_sigma_y_threshold=3.16e-6,
#     max_steps=100,
#     model_name="BO",
#     logbook=True,
#     callback=None,
#     obj_function='logmae',
#     acquisition="EI",
#     init_x = None,
#     init_samples = 5,
#     filter_action = None,
# )
