In [1]:
import copy
import glob
import json

import pandas as pd

from baybe.targets.enum import TargetMode
from benchmarks.metrics import (
    Precision,
    Recall,
    UncertaintyCurveArea,
    AverageDegree,
    OverlappingUncertaintyArea,
    FeasibilityRate,
    AverageDistance,
    MeanConvergenceRateMetric,
    AreaUnderTheCurve,
    CumulativeRegret,
    SimpleRegret,
    NormalizedAreaUnderTheCurve,
    PointsDifferedRatio,
    KuiperMetric,
    KolmogorovSmirnovMetric,
    MedianPointToPointConvergenceValue,
    MeanSlope,
    MedianGlobalBestConvergenceValue,
    PointVarianceDifferRatio,
    MeanVariance,
    ComparisonOperator,
    PatternItemRatioThreshold,
    PatternBasedAssertion,
    PatternItemPureMetricComparison,
    PointsPositionScore,
    PatternItemValueThreshold,
    ConvergenceLocationRelationship,
    PositionalRelation,
)

In [2]:
taskmap = {
    "default_direct_arylation": (
        TargetMode.MAX,
        (0.0, 100.0),
        "yield",
        [
            {
                "Base": "Cesium acetate",
                "Solvent": "DMAc",
                "Ligand": "SCHEMBL15068049",
                "Temp_C": 105,
                "Concentration": 0.153,
                "yield": 100.0,
            },
            {
                "Base": "Cesium pivalate",
                "Solvent": "DMAc",
                "Ligand": "SCHEMBL15068049",
                "Temp_C": 105,
                "Concentration": 0.153,
                "yield": 100.0,
            },
        ],
    ),
    "direct_arylation": (
        TargetMode.MAX,
        (0.0, 100.0),
        "yield",
        [
            {
                "Base": "Cesium acetate",
                "Solvent": "DMAc",
                "Ligand": "SCHEMBL15068049",
                "Temp_C": 105,
                "Concentration": 0.153,
                "yield": 100.0,
            },
            {
                "Base": "Cesium pivalate",
                "Solvent": "DMAc",
                "Ligand": "SCHEMBL15068049",
                "Temp_C": 105,
                "Concentration": 0.153,
                "yield": 100.0,
            },
        ],
    ),
    "holdertable": (
        TargetMode.MIN,
        (-19.2085, 0.0),
        "yield",
        [
            {"x0": 8.05502, "x1": 9.66459},
            {"x0": 8.05502, "x1": -9.66459},
            {"x0": -8.05502, "x1": 9.66459},
            {"x0": -8.05502, "x1": -9.66459},
        ],
    ),
    "levy": (
        TargetMode.MIN,
        (0.0, 334.6563),
        "yield",
        [{"x0": 1.0, "x1": 1.0, "x2": 1.0, "x3": 1.0, "x4": 1.0}],
    ),
    "michalewicz": (
        TargetMode.MIN,
        (-4.687658, 0.0),
        "yield",
        [
            {
                "x0": 2.20290558,
                "x1": 1.57079628,
                "x2": 1.28499156,
                "x3": 1.92305846,
                "x4": 1.72046977,
            }
        ],
    ),
    "nn_levy": (
        TargetMode.MIN,
        (0.0, 334.6563),
        "yield",
        [{"x0": 1.0, "x1": 1.0, "x2": 1.0, "x3": 1.0, "x4": 1.0}],
    ),
    "photo_pce_10": (TargetMode.MIN, None, "degradation", None),
}

In [3]:
categorical_parameter = {
    "Solvent": [
        "DMAc",
        "Butyornitrile",
        "Butyl Ester",
        "p-Xylene",
    ],
    "Base": [
        "Potassium acetate",
        "Potassium pivalate",
        "Cesium acetate",
        "Cesium pivalate",
    ],
    "Ligand": [
        "BrettPhos",
        "CC=C2OC",
        "Di-tert-butylphenylphosphine",
        "(t-Bu)PhCPhos",
        "Tricyclohexylphosphine",
        "PPh3",
        "XPhos",
        "P(2-furyl)3",
        "Methyldiphenylphosphine",
        "1268824-69-6",
        "JackiePhos",
        "SCHEMBL15068049",
        "Me2PPh",
    ],
}

In [4]:
results = json.load(open("results.json"))

In [5]:
ref_data = pd.read_csv(".idea/michalewicz_0.07.0.csv").rename(
    columns=lambda x: "Cumulative best"
    if x.endswith("_CumBest")
    else "Iterative best"
    if x.endswith("_IterBest")
    else "Measurements"
    if x.endswith("yield_Measurements")
    else "Measurements"
    if x.endswith("degradation_Measurements")
    else x
)

df_explor = pd.read_csv(".idea/michalewicz_ucb_exploration_0.11.2.csv").rename(
    columns=lambda x: "Cumulative best"
    if x.endswith("_CumBest")
    else "Iterative best"
    if x.endswith("_IterBest")
    else "Measurements"
    if x.endswith("yield_Measurements")
    else "Measurements"
    if x.endswith("degradation_Measurements")
    else x
)
df_exploit = pd.read_csv(".idea/michalewicz_ucb_exploitation_0.11.2.csv").rename(
    columns=lambda x: "Cumulative best"
    if x.endswith("_CumBest")
    else "Iterative best"
    if x.endswith("_IterBest")
    else "Measurements"
    if x.endswith("yield_Measurements")
    else "Measurements"
    if x.endswith("degradation_Measurements")
    else x
)
df_explor["Scenario"] = df_explor["Scenario"].apply(
    lambda x: "Optimization Process" if x == "Test_Scenario" else "Random Baseline"
)
df_exploit["Scenario"] = df_exploit["Scenario"].apply(
    lambda x: "Optimization Process" if x == "Test_Scenario" else "Random Baseline"
)

ref_data["Scenario"] = ref_data["Scenario"].apply(
    lambda x: "Optimization Process" if x == "Test_Scenario" else "Random Baseline"
)

ref_data = ref_data[ref_data["Scenario"] == "Optimization Process"]
df_explor = df_explor[df_explor["Scenario"] == "Optimization Process"]
df_exploit = df_exploit[df_exploit["Scenario"] == "Optimization Process"]

targetmode, minmax, targename, x_best = taskmap["michalewicz"]

tests = {
    "Precision": Precision("yield_Input_with_Measurements", 50, "yield" , targetmode, ref_data, x_best),
    "Recall": Recall("yield_Input_with_Measurements", 50, "yield", targetmode, ref_data, x_best),
    "UncertaintyCurveArea": UncertaintyCurveArea(targetmode, "Cumulative best"),
    "AverageDegree": AverageDegree("yield_Input_with_Measurements", 50, targename, targetmode, ref_data),
    "FeasibilityRate": FeasibilityRate(targetmode, "Cumulative best", minmax, 0.7),
    "AverageDistance": AverageDistance("yield_Input_with_Measurements", 50, targename, targetmode, 8),
    "MeanConvergenceRateMetric": MeanConvergenceRateMetric(
        targetmode, "Cumulative best", minmax
    ),
    "AreaUnderTheCurve": AreaUnderTheCurve(targetmode, "Cumulative best"),
    "CumulativeRegret": CumulativeRegret(targetmode, "Cumulative best", minmax),
    "SimpleRegret": SimpleRegret(targetmode, "Cumulative best", minmax),
    "NormalizedAreaUnderTheCurve": NormalizedAreaUnderTheCurve(
        targetmode, "Cumulative best", minmax
    ),
    "MedianPointToPointConvergenceValue": MedianPointToPointConvergenceValue(
        targetmode, "Cumulative best"
    ),
    "MeanSlope": MeanSlope(targetmode, "Cumulative best"),
    "MedianGlobalBestConvergenceValue": MedianGlobalBestConvergenceValue(
        targetmode, "Cumulative best", minmax
    ),
    "MeanVariance": MeanVariance(targetmode, "Cumulative best"),
}

if "michalewicz_ucb" not in results:
    results["michalewicz_ucb"] = {}
    results["michalewicz_ucb"]["0.11.2-explor"] = {}
    results["michalewicz_ucb"]["0.11.2-exploit"] = {}
    results["michalewicz_ucb"]["0.11.2-explor"]["Cumulative best"] = {}
    results["michalewicz_ucb"]["0.11.2-exploit"]["Cumulative best"] = {}


for test in tests:
    results["michalewicz_ucb"]["0.11.2-explor"]["Cumulative best"][test] = tests[
        test
    ].evaluate(df_explor)
for test in tests:
    results["michalewicz_ucb"]["0.11.2-exploit"]["Cumulative best"][test] = tests[
        test
    ].evaluate(df_exploit)


In [6]:
with open("results.json", "w") as outfile:
    json.dump(results, outfile, indent=4)