In [1]:
import sys

sys.path.append("../../")

In [2]:
import matplotlib
import matplotlib.pyplot as plt
import scienceplots
import seaborn as sns
from icecream import ic

from src.eval import (
    Study,
    plot_best_beam_parameter_error_box,
    plot_best_mae_box,
    plot_final_beam_parameter_error_box,
    plot_final_mae_box,
    problem_aligned,
)

In [3]:
FIG_DIR = "figures"
DATA_DIR = "../../data/bo_vs_rl"

In [4]:
es_sim = Study.load(
    f"{DATA_DIR}/simulation/es_with_decay", name="ES (Sim)", runs="*problem_*"
)
es_reviewer_gain_1 = Study.load(
    f"{DATA_DIR}/simulation/es_reviewer_1", name="ES (Rev. 1)", runs="*problem_*"
)
es_reviewer_gain_2 = Study.load(
    f"{DATA_DIR}/simulation/es_reviewer_2", name="ES (Rev. 2)", runs="*problem_*"
)

In [5]:
ic(
    round(es_sim.median_best_mae() * 1e6),
    round(es_reviewer_gain_1.median_best_mae() * 1e6),
    round(es_reviewer_gain_2.median_best_mae() * 1e6),
)

ic| round(es_sim.median_best_mae() * 1e6): 81
    round(es_reviewer_gain_1.median_best_mae() * 1e6): 86
    round(es_reviewer_gain_2.median_best_mae() * 1e6): 111


(81, 86, 111)

In [6]:
ic(
    round(es_sim.median_final_mae() * 1e6),
    round(es_reviewer_gain_1.median_final_mae() * 1e6),
    round(es_reviewer_gain_2.median_final_mae() * 1e6),
)

ic| round(es_sim.median_final_mae() * 1e6): 145
    round(es_reviewer_gain_1.median_final_mae() * 1e6): 134
    round(es_reviewer_gain_2.median_final_mae() * 1e6): 154


(145, 134, 154)

In [7]:
ic(
    round(es_sim.mean_best_mae() * 1e6),
    round(es_reviewer_gain_1.mean_best_mae() * 1e6),
    round(es_reviewer_gain_2.mean_best_mae() * 1e6),
)

ic| round(es_sim.mean_best_mae() * 1e6): 111
    round(es_reviewer_gain_1.mean_best_mae() * 1e6): 103
    round(es_reviewer_gain_2.mean_best_mae() * 1e6): 134


(111, 103, 134)

In [8]:
ic(
    es_sim.median_steps_to_threshold(threshold=40e-6, max_steps=151),
    es_reviewer_gain_1.median_steps_to_threshold(threshold=40e-6, max_steps=151),
    round(es_reviewer_gain_2.median_steps_to_threshold(threshold=40e-6, max_steps=151)),
)

ic| es_sim.median_steps_to_threshold(threshold=40e-6, max_steps=151): 120.5
    es_reviewer_gain_1.median_steps_to_threshold(threshold=40e-6, max_steps=151): 119.5
    round(es_reviewer_gain_2.median_steps_to_threshold(threshold=40e-6, max_steps=151)): 141


(120.5, 119.5, 141)

In [9]:
ic(
    round(es_sim.mean_steps_to_threshold(threshold=40e-6, max_steps=151)),
    round(es_reviewer_gain_1.mean_steps_to_threshold(threshold=40e-6, max_steps=151)),
    round(es_reviewer_gain_2.mean_steps_to_threshold(threshold=40e-6, max_steps=151)),
)

ic| round(es_sim.mean_steps_to_threshold(threshold=40e-6, max_steps=151)): 107
    round(es_reviewer_gain_1.mean_steps_to_threshold(threshold=40e-6, max_steps=151)): 114
    round(es_reviewer_gain_2.mean_steps_to_threshold(threshold=40e-6, max_steps=151)): 121


(107, 114, 121)

In [10]:
ic(
    round(es_sim.proportion_reached_target(threshold=40e-6, max_steps=151) * 100),
    round(
        es_reviewer_gain_1.proportion_reached_target(threshold=40e-6, max_steps=151)
        * 100
    ),
    round(
        es_reviewer_gain_2.proportion_reached_target(threshold=40e-6, max_steps=151)
        * 100
    ),
)

ic| round(es_sim.proportion_reached_target(threshold=40e-6, max_steps=151) * 100): 17
    round(
        es_reviewer_gain_1.proportion_reached_target(threshold=40e-6, max_steps=151)
        * 100
    ): 15
    round(
        es_reviewer_gain_2.proportion_reached_target(threshold=40e-6, max_steps=151)
        * 100
    ): 5


(17, 15, 5)

In [11]:
ic(
    round(es_sim.median_steps_to_convergence(threshold=40e-6, max_steps=151)),
    round(
        es_reviewer_gain_1.median_steps_to_convergence(threshold=40e-6, max_steps=151)
    ),
    round(
        es_reviewer_gain_2.median_steps_to_convergence(threshold=40e-6, max_steps=151)
    ),
)

ic| round(es_sim.median_steps_to_convergence(threshold=40e-6, max_steps=151)): 43
    round(
        es_reviewer_gain_1.median_steps_to_convergence(threshold=40e-6, max_steps=151)
    ): 34
    round(
        es_reviewer_gain_2.median_steps_to_convergence(threshold=40e-6, max_steps=151)
    ): 43


(43, 34, 43)

In [12]:
ic(
    round(es_sim.mean_steps_to_convergence(threshold=40e-6, max_steps=151)),
    round(es_reviewer_gain_1.mean_steps_to_convergence(threshold=40e-6, max_steps=151)),
    round(es_reviewer_gain_2.mean_steps_to_convergence(threshold=40e-6, max_steps=151)),
)

ic| round(es_sim.mean_steps_to_convergence(threshold=40e-6, max_steps=151)): 47
    round(es_reviewer_gain_1.mean_steps_to_convergence(threshold=40e-6, max_steps=151)): 42
    round(es_reviewer_gain_2.mean_steps_to_convergence(threshold=40e-6, max_steps=151)): 48


(47, 42, 48)

In [13]:
ic(
    round(es_sim.proportion_converged(threshold=40e-6, max_steps=151) * 100),
    round(
        es_reviewer_gain_1.proportion_converged(threshold=40e-6, max_steps=151) * 100
    ),
    round(
        es_reviewer_gain_2.proportion_converged(threshold=40e-6, max_steps=151) * 100
    ),
)

ic| round(es_sim.proportion_converged(threshold=40e-6, max_steps=151) * 100): 100
    round(
        es_reviewer_gain_1.proportion_converged(threshold=40e-6, max_steps=151) * 100
    ): 100
    round(
        es_reviewer_gain_2.proportion_converged(threshold=40e-6, max_steps=151) * 100
    ): 100


(100, 100, 100)