In [3]:
import json
import os
import pathlib
import re
import subprocess

import polars as pl
import plotly.express as px

In [None]:
def get_benchmark_results() -> pl.DataFrame:
    benchmark = subprocess.run(
        " ".join(
            [
                *["bazelisk", "run"],
                *["-c", "opt"],
                *[":bench"],
                "--",
                # do several runs
                "--benchmark_enable_random_interleaving=true",
                "--benchmark_report_aggregates_only=false",
                "--benchmark_repetitions=50",
                # report in json format
                "--benchmark_format=json",
            ]
        ),
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        cwd=pathlib.Path(os.getcwd()).parent,
        shell=True,
        text=True,
    )

    if benchmark.returncode != 0:
        error = Exception("Benchmark failed")
        error.add_note(benchmark.stderr)
        raise error
    return pl.DataFrame(json.loads(benchmark.stdout)["benchmarks"])


benchmark = get_benchmark_results()
benchmark

In [None]:
def parse_run_name(run_name: str | None) -> dict[str, str | None]:
    # Example: LRUCacheBench<std, hit>::put/iterations:100/manual_time
    if (run_name is not None) and (m := re.match(r"^LRUCacheBench<([^,]+), ([^,]+), ([^>]+)>::(\w+)", run_name)):
        version, allocator, hit_mode, function = m.groups()
        return {"version": version, "allocator": allocator, "hit_mode": hit_mode, "function": function}
    return {}


data = (
    benchmark.select(
        pl.col("run_name").alias("name"),
        pl.col("run_type"),
        pl.col("real_time"),
        pl.col("time_unit"),
    )
    .filter(pl.col("name").str.starts_with("LRUCacheBench"))
    .filter(pl.col("run_type") == "iteration")
    .with_columns(
        pl.col("name").map_elements(
            parse_run_name,
            return_dtype=pl.Struct({"version": pl.Utf8, "allocator": pl.Utf8, "hit_mode": pl.Utf8, "function": pl.Utf8}),
        )
    )
    .unnest("name")
    .with_columns(
        pl.concat_str(
            [
                pl.col("version"),
                pl.lit(" : "),
                pl.col("allocator"),
            ]
        ).alias("preset")
    )
    .sort("function", "hit_mode", "version", "allocator")
)
assert data["time_unit"].unique().to_list() == ["ns"]

fig = px.box(
    data,
    x="hit_mode",
    y="real_time",
    color="preset",
    facet_col="function",
    title="Average execution time per operation, ns",
)
fig.update_yaxes(matches=None)
fig.for_each_yaxis(lambda yaxis: yaxis.update(showticklabels=True))

fig.write_html("bench.html")
fig.write_image("bench.png", width=1920, height=1080)
fig.show()
