In [1]:
from collections import namedtuple
from datetime import datetime
import logging
import os
from pathlib import Path

from gym.wrappers import RescaleAction, TimeLimit
import json
import numpy as np
import pandas as pd
from stable_baselines3 import TD3
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from tqdm import tqdm

from environments import ARESEASequential, ResetActuators, ResetActuatorsToDFD
from mail import send_mail

initializing ocelot...


In [2]:
# Setup logging (to console)
timestamp = lambda: datetime.now().strftime("%y%m%d_%H%M%S")

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
stream_handler = logging.StreamHandler()
stream_handler.setLevel(logging.DEBUG)
formatter = logging.Formatter("%(asctime)s - %(levelname)-7.7s: %(message)s")
stream_handler.setFormatter(formatter)
logger.addHandler(stream_handler)
path = os.getcwd() # Check path and directory before running
folder = 'logs'
Path(folder).mkdir(parents=True, exist_ok=True)
directory = os.path.join(path, folder)
description  = 'evaluate_machine'
logpath = os.path.join(directory,f'{timestamp()}_{description}.log')

file_handler = logging.FileHandler(logpath)
file_handler.setLevel(logging.DEBUG)

formatter = logging.Formatter("%(asctime)s - %(levelname)-7.7s: %(message)s")
file_handler.setFormatter(formatter)

logger.addHandler(file_handler)

In [3]:
def load_sequential(model_name, max_episode_steps=30, measure_beam="us", init="dfd"):

    ModelSetup = namedtuple("ModelSetup", ["name","env","model","max_episode_steps","measure_beam"])

    log_dir = f"models/{model_name}"

    def make_env():
        env = ARESEASequential(
            backend="machine",
            backendargs={"measure_beam": measure_beam}
        )
        if init == "dfd":
            env = ResetActuatorsToDFD(env, k1=10)
        elif init == "zero":
            env = ResetActuators(env)
        elif init == "random":
            pass
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
        env = RescaleAction(env, -1, 1)
        return env

    env = DummyVecEnv([make_env])
    env = VecNormalize.load(f"{log_dir}/vec_normalize.pkl", env)
    env.training = False
    env.norm_reward = False

    model = TD3.load(f"{log_dir}/model")

    return ModelSetup(model_name, env, model, max_episode_steps, measure_beam)

In [4]:
def pack_dataframe(fn):
    def wrapper(setup, problem):
        observations, rewards, beam_images = fn(setup, problem)
        observations = np.array(observations)

        df = pd.DataFrame(np.arange(len(observations)), columns=["step"])
        df["q1"] = observations[:,0]
        df["q2"] = observations[:,1]
        df["cv"] = observations[:,2]
        df["q3"] = observations[:,3]
        df["ch"] = observations[:,4]
        df["mup_x"] = observations[:,5]
        df["mup_y"] = observations[:,6]
        df["sigmap_x"] = observations[:,7]
        df["sigmap_y"] = observations[:,8]
        df["mu_x"] = observations[:,9]
        df["mu_y"] = observations[:,10]
        df["sigma_x"] = observations[:,11]
        df["sigma_y"] = observations[:,12]
        df["reward"] = [np.nan] + rewards
        df["beam_image"] = beam_images

        df["model_name"] = setup.name
        df["max_episode_steps"] = setup.max_episode_steps
        df["measure_beam"] = setup.measure_beam

        return df
    
    return wrapper

In [5]:
@pack_dataframe
def run(setup, problem):
    env, model = setup.env, setup.model

    if "initial" in problem:
        env.get_attr("unwrapped")[0].next_initial = problem["initial"]
    if "desired" in problem:
        env.get_attr("unwrapped")[0].next_desired = problem["desired"]

    observations = []
    rewards = []
    beam_images = []

    observation = env.reset()
    observations.append(env.unnormalize_obs(observation).squeeze())
    beam_images.append(env.get_attr("backend")[0].last_beam_image)

    env.get_attr("unwrapped")[0].next_initial = "stay"

    with tqdm(total=setup.max_episode_steps) as pbar:
        done = False
        while not done:
            action, _ = model.predict(observation, deterministic=True)
            observation, reward, done, info = env.step(action)

            observations.append(env.unnormalize_obs(observation).squeeze())
            rewards.append(reward.squeeze())
            beam_images.append(env.get_attr("backend")[0].last_beam_image)

            pbar.update(1)

    observations[-1] = env.unnormalize_obs(info[0]["terminal_observation"].squeeze())

    return observations, rewards, beam_images

In [6]:
def evaluate(model_name, directory, method=None, description=None, init="dfd", n=None):
    setup = load_sequential(model_name, init=init)

    with open("problems_3.json", "r") as f:
        if isinstance(n, int):            
            problems = json.load(f) if n is None else json.load(f)[:n]
        elif isinstance(n, tuple):
            problems = json.load(f) if n is None else json.load(f)[n[0]:n[1]]
        else:
            raise ValueError

    Path(directory).mkdir(parents=True, exist_ok=True)

    for i, problem in enumerate(tqdm(problems)):
        logger.info(f"Agent {model_name} running problem {i}:\n  Desired = {problem['desired']}")
        
        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
        result = run(setup, problem=problem)
        result["problem"] = i
        result["model"] = setup.name
        if method is not None:
            result["method"] = method
        if description is not None:
            result["description"] = description
        result.to_pickle(f"{directory}/{model_name}_{i:03d}_{timestamp}.pkl")
        
        send_mail(
            f"MSK-IPC AA: Agent {model_name} finished running problem {i}",
            ["oliver.stein@desy.de","jan.kaiser@desy.de"]
        )

## Actual Running is Here

In [7]:
n = (22, 300)
directory = "machine_studies/next_to_27_polished_donkey"
todo = {
    "method": "resettodfd",
    "description": "Reset to DFD (with Adjusted Initial)",
    "models": ["polished-donkey-996"], # , "polar-lake-997", "still-deluge-998"],
    "init": "dfd"
}

for model in todo["models"]:
    evaluate(
        model,
        directory,
        method=todo["method"],
        description=todo["description"],
        init=todo["init"],
        n=n
    )
send_mail(
    f"MSK-IPC AA: The polished donkey has finished. It's very shiny now.",
    ["oliver.stein@desy.de","jan.kaiser@desy.de"]
)

  0%|                                                                                                                 | 0/1 [00:00<?, ?it/s]2021-11-25 17:22:42,533 - INFO   : Agent polished-donkey-996 running problem 0:
  Desired = [0.0, 0.0, 0.0, 0.0]
2021-11-25 17:22:42,543 - DEBUG  : Setting actuators to [ 10. -10.  10.   0.   0.]

  0%|                                                                                                                | 0/30 [00:00<?, ?it/s][A2021-11-25 17:23:08,131 - DEBUG  : Setting actuators to [ 7.10696903e+00 -1.13961761e+01  8.38059762e+00 -2.98292101e-04
  5.99991136e-04]

  3%|███▍                                                                                                    | 1/30 [00:08<04:04,  8.42s/it][A2021-11-25 17:23:16,550 - DEBUG  : Setting actuators to [ 7.08170894e+00 -1.20983442e+01  8.41890181e+00 -3.91527256e-04
  1.16632283e-03]

2021-11-25 17:23:34,022 - DEBUG  : Setting actuators to [ 7.12641575e+00 -1.26077245e+01  8.57510

Exception: Error count was above 0 for more than 10 minutes