In [1]:
from collections import namedtuple
from datetime import datetime
from pathlib import Path

from gym.wrappers import RescaleAction, TimeLimit
import json
import numpy as np
import pandas as pd
from stable_baselines3 import TD3
from stable_baselines3.common.vec_env import DummyVecEnv, VecNormalize
from tqdm import tqdm

from environments import ARESEASequential

In [2]:
def load_sequential(model_name, max_episode_steps=50, measure_beam="us"):

    ModelSetup = namedtuple("ModelSetup", ["name","env","model","max_episode_steps","measure_beam"])

    log_dir = f"models/{model_name}"

    def make_env():
        env = ARESEASequential(
            backend="machine",
            backendargs={"measure_beam": measure_beam}
        )
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
        env = RescaleAction(env, -1, 1)
        return env

    env = DummyVecEnv([make_env])
    env = VecNormalize.load(f"{log_dir}/vec_normalize.pkl", env)
    env.training = False
    env.norm_reward = False

    model = TD3.load(f"{log_dir}/model")

    return ModelSetup(model_name, env, model, max_episode_steps, measure_beam)

In [7]:
def pack_dataframe(fn):
    def wrapper(setup, problem):
        observations, rewards, beam_images = fn(setup, problem)
        observations = np.array(observations)

        df = pd.DataFrame(np.arange(len(observations)), columns=["step"])
        df["q1"] = observations[:,0]
        df["q2"] = observations[:,1]
        df["cv"] = observations[:,2]
        df["q3"] = observations[:,3]
        df["ch"] = observations[:,4]
        df["mup_x"] = observations[:,5]
        df["mup_y"] = observations[:,6]
        df["sigmap_x"] = observations[:,7]
        df["sigmap_y"] = observations[:,8]
        df["mu_x"] = observations[:,9]
        df["mu_y"] = observations[:,10]
        df["sigma_x"] = observations[:,11]
        df["sigma_y"] = observations[:,12]
        df["reward"] = [np.nan] + rewards
        df["beam_image"] = beam_images

        df["model_name"] = setup.name
        df["max_episode_steps"] = setup.max_episode_steps
        df["measure_beam"] = setup.measure_beam

        return df
    
    return wrapper

In [8]:
@pack_dataframe
def run(setup, problem):
    env, model = setup.env, setup.model

    if "initial" in problem:
        env.get_attr("unwrapped")[0].next_initial = problem["initial"]
    if "desired" in problem:
        env.get_attr("unwrapped")[0].next_desired = problem["desired"]

    observations = []
    rewards = []
    beam_images = []

    observation = env.reset()
    observations.append(env.unnormalize_obs(observation).squeeze())
    beam_images.append(env.get_attr("backend")[0].last_beam_image)

    with tqdm(total=setup.max_episode_steps) as pbar:
        done = False
        while not done:
            action, _ = model.predict(observation, deterministic=True)
            observation, reward, done, info = env.step(action)

            observations.append(env.unnormalize_obs(observation).squeeze())
            rewards.append(reward.squeeze())
            beam_images.append(env.get_attr("backend")[0].last_beam_image)

            pbar.update(1)

    observations[-1] = env.unnormalize_obs(info[0]["terminal_observation"].squeeze())

    return observations, rewards, beam_images

In [9]:
def evaluate(model_name, directory, method=None, k=None):
    setup = load_sequential(model_name)

    with open("problems.json", "r") as f:
        problems = json.load(f) if k is None else json.load(f)[:k]

    Path(directory).mkdir(parents=True, exist_ok=True)

    evaluation = []
    for i, problem in enumerate(tqdm(problems)):
        print(f"Running problem:\n  Initial = {problem['initial']}\n  Desired = {problem['desired']}")
        timestamp = datetime.now().strftime("%Y%m%d%H%M%S")
        result = run(setup, problem=problem)
        result["problem"] = i
        result.to_pickle(f"{directory}/{model_name}_{i}_{timestamp}.pkl")
        evaluation.append(result)
    evaluation = pd.concat(evaluation)
    evaluation["model"] = setup.name
    if method is not None:
        evaluation["method"] = method

## Actual Running is Here

In [10]:
k = 3
directory = "machine_eval_test"
todos = {
    "Trained with Random Initial and No Misalignments for 600k Steps": [
        "bright-rain-963", "lyric-wave-964", "pleasant-wood-965"
    ],
    "Trained 600k With Initial Actuators Set to Zero": [
        "faithful-meadow-975", "amber-mountain-976", "ruby-water-977"
    ],
    "Quadrupole and Screen Misalignments (Up to 400 Micrometers in Both Directions)": [
        "ethereal-firefly-972", "royal-planet-973", "clear-armadillo-974"
    ],
    "Training for 6M Steps": [
        "visionary-blaze-969", "vibrant-leaf-970", "electric-sun-971"
    ]
}

for method in todos.keys():
    for model_name in todos[method]:
        evaluate(model_name, directory, method=method, k=k)

  0%|          | 0/3 [00:00<?, ?it/s]

Running problem:
  Initial = [0.0, 0.0, 0.0, 0.0, 0.0]
  Desired = [0.0010023001814261079, -0.0010743754683062434, 0.0003546174557413906, 0.000328763882862404]


 28%|██▊       | 14/50 [02:01<05:12,  8.67s/it]
  0%|          | 0/3 [02:09<?, ?it/s]


KeyboardInterrupt: 