In [19]:
import copy
import glob
import json

import pandas as pd

from baybe.targets.enum import TargetMode
from benchmarks.metrics import (
    Precision,
    Recall,
    UncertaintyCurveArea,
    AverageDegree,
    OverlappingUncertaintyArea,
    FeasibilityRate,
    AverageDistance,
    MeanConvergenceRateMetric,
    AreaUnderTheCurve,
    CumulativeRegret,
    MeanDynamicConvergenceRateMetric,
    SimpleRegret,
    NormalizedAreaUnderTheCurve,
    PointsDifferedRatio,
    KuiperMetric,
    KolmogorovSmirnovMetric,
    MedianPointToPointConvergenceValue,
    MeanSlope,
    MedianGlobalBestConvergenceValue,
    PointVarianceDifferRatio,
    MeanVariance,
    ComparisonOperator,
    PatternItemRatioThreshold,
    PatternBasedAssertion,
    PatternItemPureMetricComparison,
    PointsPositionScore,
    PatternItemValueThreshold,
    ConvergenceLocationRelationship,
    PositionalRelation,
)

In [20]:
taskmap = {
    "default_direct_arylation": (
        TargetMode.MAX,
        (0.0, 100.0),
        "yield",
        [
            {
                "Base": "Cesium acetate",
                "Solvent": "DMAc",
                "Ligand": "SCHEMBL15068049",
                "Temp_C": 105,
                "Concentration": 0.153,
                "yield": 100.0,
            },
            {
                "Base": "Cesium pivalate",
                "Solvent": "DMAc",
                "Ligand": "SCHEMBL15068049",
                "Temp_C": 105,
                "Concentration": 0.153,
                "yield": 100.0,
            },
        ],
    ),
    "direct_arylation": (
        TargetMode.MAX,
        (0.0, 100.0),
        "yield",
        [
            {
                "Base": "Cesium acetate",
                "Solvent": "DMAc",
                "Ligand": "SCHEMBL15068049",
                "Temp_C": 105,
                "Concentration": 0.153,
                "yield": 100.0,
            },
            {
                "Base": "Cesium pivalate",
                "Solvent": "DMAc",
                "Ligand": "SCHEMBL15068049",
                "Temp_C": 105,
                "Concentration": 0.153,
                "yield": 100.0,
            },
        ],
    ),
    "holdertable": (
        TargetMode.MIN,
        (-19.2085, 0.0),
        "yield",
        [
            {"x0": 8.05502, "x1": 9.66459},
            {"x0": 8.05502, "x1": -9.66459},
            {"x0": -8.05502, "x1": 9.66459},
            {"x0": -8.05502, "x1": -9.66459},
        ],
    ),
    "levy": (
        TargetMode.MIN,
        (0.0, 334.6563),
        "yield",
        [{"x0": 1.0, "x1": 1.0, "x2": 1.0, "x3": 1.0, "x4": 1.0}],
    ),
    "michalewicz": (
        TargetMode.MIN,
        (-4.687658, 0.0),
        "yield",
        [
            {
                "x0": 2.20290558,
                "x1": 1.57079628,
                "x2": 1.28499156,
                "x3": 1.92305846,
                "x4": 1.72046977,
            }
        ],
    ),
    "nn_levy": (
        TargetMode.MIN,
        (0.0, 334.6563),
        "yield",
        [{"x0": 1.0, "x1": 1.0, "x2": 1.0, "x3": 1.0, "x4": 1.0}],
    ),
    "photo_pce_10": (TargetMode.MIN, None, "degradation", None),
}

In [21]:
files = glob.glob(".idea/*")
files.sort()
dfs = {}
for f in files:
    task_indicator = f.split("\\")[-1]
    task_indicator = task_indicator[: task_indicator.rfind("_")]
    version = f.split("_")[-1]
    version = version[: version.rfind(".")]
    if task_indicator not in dfs:
        dfs[task_indicator] = []
    new_df = pd.read_csv(f).rename(
        columns=lambda x: "Cumulative best"
        if x.endswith("_CumBest")
        else "Iterative best"
        if x.endswith("_IterBest")
        else x
    )
    new_df["Scenario"] = new_df["Scenario"].apply(
        lambda x: "Optimization Process" if x == "Test_Scenario" else "Random Baseline"
    )
    dfs[task_indicator].append(
        (
            version,
            new_df[new_df["Scenario"] == "Optimization Process"],
            new_df[new_df["Scenario"] == "Random Baseline"],
        )
    )

In [22]:
categorical_parameter = {
    "Solvent": [
        "DMAc",
        "Butyornitrile",
        "Butyl Ester",
        "p-Xylene",
    ],
    "Base": [
        "Potassium acetate",
        "Potassium pivalate",
        "Cesium acetate",
        "Cesium pivalate",
    ],
    "Ligand": [
        "BrettPhos",
        "CC=C2OC",
        "Di-tert-butylphenylphosphine",
        "(t-Bu)PhCPhos",
        "Tricyclohexylphosphine",
        "PPh3",
        "XPhos",
        "P(2-furyl)3",
        "Methyldiphenylphosphine",
        "1268824-69-6",
        "JackiePhos",
        "SCHEMBL15068049",
        "Me2PPh",
    ],
}

In [23]:
results = json.load(open("results.json"))

In [24]:
for task in dfs:  # noqa: PLC0206
    break
    print("-" * 80)
    print(task)
    print()
    dfs_copy = copy.deepcopy(dfs[task])
    reference_data = dfs_copy[0][1]
    for version, df, random in dfs_copy:
        if task not in results:
            results[task] = {}
        if version not in results[task]:
            results[task][version] = {}
        targetmode, minmax, targename, x_best = taskmap[task]
        cumbest = "Cumulative best"
        iterbest = "Iterative best"
        to_eval = [cumbest]
        sample_size = df["Random_Seed"].unique().shape[0]
        measurment = f"{targename}_Input_with_Measurements"
        for eval_row in to_eval:
            max_median_iter_old = df["Iteration"].max()
            if eval_row == iterbest:
                if targetmode == TargetMode.MAX:
                    max_median_iter_old = (
                        df.groupby("Iteration")["Cumulative best"].mean().idxmax()
                    )
                else:
                    max_median_iter_new = (
                        df.groupby("Iteration")["Cumulative best"].mean().idxmin()
                    )

                df = df[df["Iteration"] <= max_median_iter_old]

            if eval_row not in results[task][version]:
                results[task][version][eval_row] = {}

            tests = (
                {
                    "KolmogorovSmirnovMetric": KolmogorovSmirnovMetric(
                        targetmode, eval_row, minmax
                    ),
                    "KuiperMetric": KuiperMetric(
                        targetmode, eval_row, minmax
                    ),
                }
                if task != "photo_pce_10"
                else {}
            )
            for test in tests:  # noqa: PLC0206
                if test == "MeanDynamicConvergenceRateMetric":
                    continue
                results[task][version][eval_row][test] = tests[test].evaluate(df)

                print(f"{test}-{version}: {results[task][version][eval_row][test]}")

In [27]:
got_worse = {
    "default_direct_arylation": {"0.9.0"},
    "direct_arylation": {"0.9.0"},
    "holdertable": {"0.9.0"},
    "levy": {"0.11.0"},
    "michalewicz": {"0.9.0"},
    "nn_levy": {"0.11.2"},
    "photo_pce_10": {},
}
got_better = {
    "default_direct_arylation": {"0.8.2", "0.10.0"},
    "direct_arylation": {"0.8.2", "0.11.1"},
    "holdertable": {"0.10.0", "0.11.0", "0.11.2"},
    "levy": {"0.8.2", "0.9.0", "0.11.2"},
    "michalewicz": {"0.8.2", "0.10.0", "0.11.0", "0.11.1", "0.11.2"},
    "nn_levy": {"0.8.2", "0.10.0", "0.9.0", "0.11.1"},
    "photo_pce_10": {"0.9.0", "0.11.1", "0.10.0"},
}
changed_variance = {
    "default_direct_arylation": {"0.10.0"},
    "direct_arylation": {"0.10.0"},
    "holdertable": {"0.11.2"},
    "levy": {"0.8.2", "0.10.0"},
    "michalewicz": {"0.9.0", "0.9.1", "0.11.0"},
    "nn_levy": {"0.8.2", "0.10.0"},
    "photo_pce_10": {"0.9.0", "0.10.0"},
}



In [28]:
for task in dfs:  # noqa: PLC0206
    print("-" * 80)
    print(task)        
    print()
    dfs_copy = copy.deepcopy(dfs[task])
    while len(dfs_copy) >= 2:
        first = dfs_copy.pop(0)
        second = dfs_copy[0]
        df1 = first[1].copy()
        df2 = second[1].copy()
        version_one = first[0]
        version_two = second[0]
        targetmode, minmax, targename, x_best = taskmap[task]
        cumbest = "Cumulative best"
        iterbest = "Iterative best"
        to_eval = [cumbest, iterbest]
        sample_size = second[1]["Random_Seed"].unique().shape[0]
        measurment = f"{targename}_Input_with_Measurements"
        for eval_row in to_eval:
            df1_old = dfs[task][0][1].copy()

            if eval_row == iterbest:
                if targetmode == TargetMode.MAX:
                    max_median_iter_old1 = (
                        df1.groupby("Iteration")["Cumulative best"].mean().idxmax()
                    )
                    max_median_iter_old2 = (
                        df2.groupby("Iteration")["Cumulative best"].mean().idxmax()
                    )
                    max_median_iter_old = min(
                        max_median_iter_old1, max_median_iter_old2
                    )
                else:
                    max_median_iter_new1 = (
                        df1.groupby("Iteration")["Cumulative best"].mean().idxmin()
                    )
                    max_median_iter_new2 = (
                        df2.groupby("Iteration")["Cumulative best"].mean().idxmin()
                    )
                    max_median_iter_old = max(
                        max_median_iter_new1, max_median_iter_new2
                    )

                df1 = df1[df1["Iteration"] <= max_median_iter_old]
                df2 = df2[df2["Iteration"] <= max_median_iter_old]

            if eval_row not in results[task][version]:
                results[task][version][eval_row] = {}
            tests = (
                {
                    "KolmogorovSmirnovMetric": KolmogorovSmirnovMetric(
                        targetmode, eval_row, df1
                    ),
                    "KuiperMetric": KuiperMetric(
                        targetmode, eval_row, df1
                    ),
                }
                if task != "photo_pce_10"
                else {"KolmogorovSmirnovMetric": KolmogorovSmirnovMetric(
                        targetmode, eval_row, df1
                    ),
                    "KuiperMetric": KuiperMetric(
                        targetmode, eval_row, df1
                    ),}
            )
            for test in tests:  # noqa: PLC0206
                if eval_row == iterbest and test in results[task][version][eval_row]:
                    del results[task][version_two][eval_row][test]
                    continue
                if test not in results[task][version_one][eval_row]:
                    results[task][version_one][eval_row][test] = -2
                results[task][version_two][eval_row][test] = tests[test].evaluate(df2)
                if version_one in got_worse[task] or version_two in got_better[task]:
                    print(
                        f"{test}-{version_two}: {results[task][version_two][eval_row][test]}"
                    )

--------------------------------------------------------------------------------
default_direct_arylation

KolmogorovSmirnovMetric-0.10.0: 0.05524285829018305
KuiperMetric-0.10.0: 0.05566973227967986
--------------------------------------------------------------------------------
direct_arylation

KolmogorovSmirnovMetric-0.11.1: 7.122357919431277e-05
KuiperMetric-0.11.1: 7.122357919431277e-05
--------------------------------------------------------------------------------
holdertable

KolmogorovSmirnovMetric-0.10.0: 0.03482733260294191
KuiperMetric-0.10.0: 0.05760143994824325
KolmogorovSmirnovMetric-0.11.0: 0.06958065023980531
KuiperMetric-0.11.0: 0.06958065023980531
KolmogorovSmirnovMetric-0.11.2: 0.03484489288260266
KuiperMetric-0.11.2: 0.05750428323274702
--------------------------------------------------------------------------------
levy

KolmogorovSmirnovMetric-0.11.1: 0.06881270338925391
KuiperMetric-0.11.1: 0.07725329079093668
KolmogorovSmirnovMetric-0.11.2: 0.12373324307113204