Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion bench_runner/__main__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import importlib
import os
import sys


Expand All @@ -7,6 +8,7 @@

COMMANDS = {
"backfill": "Schedule benchmarking a number of commits",
"bisect": "Run a bisect to find the commit that caused a regression",
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe add a comment that it won't work on Windows?

"compare": "Compare a matrix of specific results",
"find_failures": "Find the benchmarks that failed in the last weekly run",
"generate_results": "Create all of the derived artifacts from raw data",
Expand All @@ -24,6 +26,15 @@
}

if __name__ == "__main__":
# This lets pytest-cov collect coverage data in a subprocess
if "COV_CORE_SOURCE" in os.environ:
try:
from pytest_cov.embed import init

init()
except Exception:
sys.stderr.write("pytest-cov: Failed to setup subprocess coverage.")

command = len(sys.argv) >= 2 and sys.argv[1] or ""

if command not in COMMANDS:
Expand All @@ -36,5 +47,5 @@
sys.exit(1)

sys.argv = [sys.argv[0], *sys.argv[2:]]
mod = importlib.import_module(f"bench_runner.scripts.{command}")
mod = importlib.import_module(f".{command}", "bench_runner.scripts")
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is just due to picking the unfortunate name bisect that conflicts with the name of a stdlib module and not being careful about how things are run elsewhere.

mod.main()
11 changes: 10 additions & 1 deletion bench_runner/git.py
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ def clone(
url: str,
*,
branch: str | None = None,
depth: int = 1,
depth: int | None = 1,
) -> None:
is_hash = re.match(r"^[0-9a-f]{40}$", branch) if branch else False

Expand Down Expand Up @@ -168,3 +168,12 @@ def clone(
if depth is not None:
args += ["--depth", str(depth)]
subprocess.check_call(args)


def checkout(dirname: PathLike, ref: str) -> None:
dirname = Path(dirname)

subprocess.check_call(
["git", "checkout", ref],
cwd=dirname,
)
230 changes: 230 additions & 0 deletions bench_runner/scripts/bisect.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,230 @@
import argparse
import contextlib
from pathlib import Path
import subprocess
import sys
import traceback


import numpy as np
import rich_argparse


from bench_runner import flags as mflags
from bench_runner import git
from bench_runner import result
from bench_runner.scripts import run_benchmarks
from bench_runner.scripts import workflow
from bench_runner.util import PathLike, format_seconds


def _get_result_commandline(
benchmark: str,
good_val: float,
bad_val: float,
pgo: bool,
flags: str,
repo: PathLike,
) -> list[str]:
repo = Path(repo)

return [
sys.executable,
"-m",
"bench_runner.scripts.bisect",
benchmark,
str(good_val),
str(bad_val),
str(pgo),
str(flags),
str(repo.absolute()),
]


def parse_result(benchmark_json: PathLike) -> float:
# The name of the benchmark in the JSON file may be different from the one
# used to select the benchmark. Therefore, just take the mean of all the
# benchmarks in the JSON file.
r = result.Result.from_arbitrary_filename(benchmark_json)
timing_data = r.get_timing_data()
return float(np.mean([x.mean() for x in timing_data.values()]))


def get_result(
benchmark: str,
pgo: bool = False,
flags: str = "",
cpython: PathLike = Path("cpython"),
reconfigure: bool = False,
) -> float:
cpython = Path(cpython)

if pgo or reconfigure:
# Jumping around through commits with PGO can leave stale PGO data
# around, so we need to clean it each time. We also always do it the
# first time in case the *last* bisect run used pgo.
subprocess.run(["make", "clean"], cwd=cpython)

workflow.compile_unix(cpython, mflags.parse_flags(flags), pgo, False, reconfigure)
run_benchmarks.run_benchmarks(cpython / "python", benchmark)
timing = parse_result(run_benchmarks.BENCHMARK_JSON)

return timing


def get_log_file() -> Path:
return Path("bisect_log.txt")


def delete_log() -> None:
bisect_log = get_log_file()
if bisect_log.is_file():
bisect_log.unlink()


def show_log() -> None:
print()
print("Bisect log:")

with get_log_file().open("r") as f:
for line in f.readlines():
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there a reason to use readlines instead of just iterating over the file?

Suggested change
for line in f.readlines():
for line in f:

print(line.strip())


def log(message: str) -> None:
with get_log_file().open("a") as f:
f.write(f"{message}\n")


def _main(
benchmark: str,
good: str,
bad: str,
pgo: bool = False,
flags: str = "",
repo: PathLike = Path("."),
cpython: PathLike = Path("cpython"),
):
repo = Path(repo).absolute()
cpython = Path(cpython).absolute()

delete_log()

if not cpython.is_dir():
git.clone(
cpython, "https://github.com/python/cpython.git", branch="main", depth=None
)

git.checkout(cpython, good)
good_timing = get_result(benchmark, pgo, flags, cpython=cpython, reconfigure=True)
log(f"KNOWN GOOD ({good[:7]}): {format_seconds(good_timing)}")

git.checkout(cpython, bad)
bad_timing = get_result(benchmark, pgo, flags, cpython=cpython)
log(f"KNOWN BAD ({bad[:7]}): {format_seconds(bad_timing)}")

try:
with contextlib.chdir(cpython):
subprocess.run(["git", "bisect", "start"])
subprocess.run(["git", "bisect", "bad", bad])
subprocess.run(["git", "bisect", "good", good])
subprocess.run(
["git", "bisect", "run"]
+ _get_result_commandline(
benchmark, good_timing, bad_timing, pgo, flags, repo
)
)
finally:
show_log()
delete_log()


def main():
# This is the entry point for the user

parser = argparse.ArgumentParser(
description="""
Run bisect on a benchmark to find the first regressing commit.

A full checkout of CPython should be in the cpython directory.
If it doesn't exist, it will be cloned.
""",
formatter_class=rich_argparse.ArgumentDefaultsRichHelpFormatter,
)
parser.add_argument(
"benchmark",
type=str,
help="The benchmark to run bisect on.",
)
parser.add_argument(
"good",
type=str,
help="The good commit hash for the bisect.",
)
parser.add_argument(
"bad",
type=str,
help="The bad commit hash for the bisect.",
)
parser.add_argument(
"--pgo",
action="store_true",
)
parser.add_argument(
"--flags",
type=str,
default="",
)

args = parser.parse_args()

_main(args.benchmark, args.good, args.bad, args.pgo, args.flags)


if __name__ == "__main__":
# This is the entry point when we are called from `git bisect run` itself
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This feels a little magical and I would've probably just made this a separate script instead, but as long as it works...


parser = argparse.ArgumentParser()
parser.add_argument("benchmark", type=str)
parser.add_argument("good_val", type=float)
parser.add_argument("bad_val", type=float)
parser.add_argument("pgo", type=str)
parser.add_argument("flags", type=str)
parser.add_argument("repo", type=str)
args = parser.parse_args()

mid_point = (args.good_val + args.bad_val) / 2.0

repo = Path(args.repo)
cpython = repo / "cpython"

try:
with contextlib.chdir(repo):
timing = get_result(
args.benchmark, args.pgo == "True", args.flags, cpython=cpython
)
except Exception as e:
# If there was any exception, display that exception and traceback and
# then abort the git bisect with -1
traceback.print_exception(e)
sys.exit(-1)

# The confidence is 0.0 at the mid-point, 1.0 at the good and bad values,
# and > 1.0 outside of that.
confidence = abs((timing - mid_point) / ((args.bad_val - args.good_val) / 2.0))

with contextlib.chdir(repo):
if timing > mid_point:
log(
f"BISECT BAD ({git.get_git_hash(cpython)[:7]}): "
f"{format_seconds(timing)} (confidence {confidence:.02f})"
)
print(f"BAD: {timing} vs. ({args.good_val}, {args.bad_val})")
sys.exit(1)
else:
log(
f"BISECT GOOD ({git.get_git_hash(cpython)[:7]}): "
f"{format_seconds(timing)} (confidence {confidence:.02f})"
)
print(f"GOOD: {timing} vs. ({args.good_val}, {args.bad_val})")
sys.exit(0)
20 changes: 9 additions & 11 deletions bench_runner/scripts/workflow.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,13 @@ def checkout_benchmarks():
)


def compile_unix(cpython: PathLike, flags: list[str], pgo: bool, pystats: bool) -> None:
def compile_unix(
cpython: PathLike,
flags: list[str],
pgo: bool,
pystats: bool,
reconfigure: bool = True,
) -> None:
cpython = Path(cpython)
cfg = config.get_config_for_current_runner()

Expand Down Expand Up @@ -166,7 +172,8 @@ def compile_unix(cpython: PathLike, flags: list[str], pgo: bool, pystats: bool)
make_args.extend(["-j"])

with contextlib.chdir(cpython):
subprocess.check_call(["./configure", *args], env=env)
if reconfigure:
subprocess.check_call(["./configure", *args], env=env)
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Even if you don't explicitly run configure, the Makefile may decide to rerun configure when jumping through revisions... It's probably a good idea to pass -C (or --config-cache) to configure to significantly reduce configure time.

subprocess.check_call(["make", *make_args], env=env)


Expand Down Expand Up @@ -391,13 +398,4 @@ def main():


if __name__ == "__main__":
# This lets pytest-cov collect coverage data in a subprocess
if "COV_CORE_SOURCE" in os.environ:
try:
from pytest_cov.embed import init

init()
except Exception:
sys.stderr.write("pytest-cov: Failed to setup subprocess coverage.")

main()
20 changes: 20 additions & 0 deletions bench_runner/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,26 @@ def get_simple_platform() -> Literal["linux", "macos", "windows"]:
raise RuntimeError(f"Unsupported platform {sys.platform}.")


def format_seconds(value: float) -> str:
"""
Given a float value in seconds, formats it into a human-readable string with
the appropriate precision.
"""
_TIMEDELTA_UNITS = ("sec", "ms", "us", "ns")

for i in range(2, -9, -1):
if value >= 10.0**i:
break
else:
i = -9
Comment on lines +97 to +101
Copy link

Copilot AI Apr 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The use of magic numbers in the loop (range from 2 to -9) reduces readability. Consider defining named constants or adding inline comments to clarify the purpose of these bounds.

Suggested change
for i in range(2, -9, -1):
if value >= 10.0**i:
break
else:
i = -9
MAX_POWER_OF_TEN = 2 # Maximum power of ten to check (10^2 = 100 seconds)
MIN_POWER_OF_TEN = -9 # Minimum power of ten to check (10^-9 = 1 nanosecond)
for i in range(MAX_POWER_OF_TEN, MIN_POWER_OF_TEN - 1, -1):
if value >= 10.0**i:
break
else:
i = MIN_POWER_OF_TEN

Copilot uses AI. Check for mistakes.

precision = 2 - i % 3
k = -(i // 3) if i < 0 else 0
factor = 10 ** (k * 3)
unit = _TIMEDELTA_UNITS[k]
return f"{value * factor:.{precision}f} {unit}"


if os.getenv("GITHUB_ACTIONS") == "true":

@contextlib.contextmanager
Expand Down
12 changes: 9 additions & 3 deletions tests/test_run_benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,9 @@ def test_run_benchmarks(benchmarks_checkout, monkeypatch):
subprocess.check_call(
[
venv_python,
run_benchmarks.__file__,
"-m",
"bench_runner",
"run_benchmarks",
"benchmark",
sys.executable,
"python",
Expand Down Expand Up @@ -132,7 +134,9 @@ def test_run_benchmarks(benchmarks_checkout, monkeypatch):
returncode = subprocess.call(
[
venv_python,
run_benchmarks.__file__,
"-m",
"bench_runner",
"run_benchmarks",
"benchmark",
sys.executable,
"python",
Expand Down Expand Up @@ -164,7 +168,9 @@ def test_run_benchmarks_flags(benchmarks_checkout):
subprocess.check_call(
[
venv_python,
run_benchmarks.__file__,
"-m",
"bench_runner",
"run_benchmarks",
"benchmark",
sys.executable,
"python",
Expand Down