-
Notifications
You must be signed in to change notification settings - Fork 9
Add a helper script to bisect a regression #430
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,4 +1,5 @@ | ||
| import importlib | ||
| import os | ||
| import sys | ||
|
|
||
|
|
||
|
|
@@ -7,6 +8,7 @@ | |
|
|
||
| COMMANDS = { | ||
| "backfill": "Schedule benchmarking a number of commits", | ||
| "bisect": "Run a bisect to find the commit that caused a regression", | ||
| "compare": "Compare a matrix of specific results", | ||
| "find_failures": "Find the benchmarks that failed in the last weekly run", | ||
| "generate_results": "Create all of the derived artifacts from raw data", | ||
|
|
@@ -24,6 +26,15 @@ | |
| } | ||
|
|
||
| if __name__ == "__main__": | ||
| # This lets pytest-cov collect coverage data in a subprocess | ||
| if "COV_CORE_SOURCE" in os.environ: | ||
| try: | ||
| from pytest_cov.embed import init | ||
|
|
||
| init() | ||
| except Exception: | ||
| sys.stderr.write("pytest-cov: Failed to setup subprocess coverage.") | ||
|
|
||
| command = len(sys.argv) >= 2 and sys.argv[1] or "" | ||
|
|
||
| if command not in COMMANDS: | ||
|
|
@@ -36,5 +47,5 @@ | |
| sys.exit(1) | ||
|
|
||
| sys.argv = [sys.argv[0], *sys.argv[2:]] | ||
| mod = importlib.import_module(f"bench_runner.scripts.{command}") | ||
| mod = importlib.import_module(f".{command}", "bench_runner.scripts") | ||
|
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is just due to picking the unfortunate name |
||
| mod.main() | ||
| Original file line number | Diff line number | Diff line change | ||||
|---|---|---|---|---|---|---|
| @@ -0,0 +1,230 @@ | ||||||
| import argparse | ||||||
| import contextlib | ||||||
| from pathlib import Path | ||||||
| import subprocess | ||||||
| import sys | ||||||
| import traceback | ||||||
|
|
||||||
|
|
||||||
| import numpy as np | ||||||
| import rich_argparse | ||||||
|
|
||||||
|
|
||||||
| from bench_runner import flags as mflags | ||||||
| from bench_runner import git | ||||||
| from bench_runner import result | ||||||
| from bench_runner.scripts import run_benchmarks | ||||||
| from bench_runner.scripts import workflow | ||||||
| from bench_runner.util import PathLike, format_seconds | ||||||
|
|
||||||
|
|
||||||
| def _get_result_commandline( | ||||||
| benchmark: str, | ||||||
| good_val: float, | ||||||
| bad_val: float, | ||||||
| pgo: bool, | ||||||
| flags: str, | ||||||
| repo: PathLike, | ||||||
| ) -> list[str]: | ||||||
| repo = Path(repo) | ||||||
|
|
||||||
| return [ | ||||||
| sys.executable, | ||||||
| "-m", | ||||||
| "bench_runner.scripts.bisect", | ||||||
| benchmark, | ||||||
| str(good_val), | ||||||
| str(bad_val), | ||||||
| str(pgo), | ||||||
| str(flags), | ||||||
| str(repo.absolute()), | ||||||
| ] | ||||||
|
|
||||||
|
|
||||||
| def parse_result(benchmark_json: PathLike) -> float: | ||||||
| # The name of the benchmark in the JSON file may be different from the one | ||||||
| # used to select the benchmark. Therefore, just take the mean of all the | ||||||
| # benchmarks in the JSON file. | ||||||
| r = result.Result.from_arbitrary_filename(benchmark_json) | ||||||
| timing_data = r.get_timing_data() | ||||||
| return float(np.mean([x.mean() for x in timing_data.values()])) | ||||||
|
|
||||||
|
|
||||||
| def get_result( | ||||||
| benchmark: str, | ||||||
| pgo: bool = False, | ||||||
| flags: str = "", | ||||||
| cpython: PathLike = Path("cpython"), | ||||||
| reconfigure: bool = False, | ||||||
| ) -> float: | ||||||
| cpython = Path(cpython) | ||||||
|
|
||||||
| if pgo or reconfigure: | ||||||
| # Jumping around through commits with PGO can leave stale PGO data | ||||||
| # around, so we need to clean it each time. We also always do it the | ||||||
| # first time in case the *last* bisect run used pgo. | ||||||
| subprocess.run(["make", "clean"], cwd=cpython) | ||||||
|
|
||||||
| workflow.compile_unix(cpython, mflags.parse_flags(flags), pgo, False, reconfigure) | ||||||
| run_benchmarks.run_benchmarks(cpython / "python", benchmark) | ||||||
| timing = parse_result(run_benchmarks.BENCHMARK_JSON) | ||||||
|
|
||||||
| return timing | ||||||
|
|
||||||
|
|
||||||
| def get_log_file() -> Path: | ||||||
| return Path("bisect_log.txt") | ||||||
|
|
||||||
|
|
||||||
| def delete_log() -> None: | ||||||
| bisect_log = get_log_file() | ||||||
| if bisect_log.is_file(): | ||||||
| bisect_log.unlink() | ||||||
|
|
||||||
|
|
||||||
| def show_log() -> None: | ||||||
| print() | ||||||
| print("Bisect log:") | ||||||
|
|
||||||
| with get_log_file().open("r") as f: | ||||||
| for line in f.readlines(): | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is there a reason to use
Suggested change
|
||||||
| print(line.strip()) | ||||||
|
|
||||||
|
|
||||||
| def log(message: str) -> None: | ||||||
| with get_log_file().open("a") as f: | ||||||
| f.write(f"{message}\n") | ||||||
|
|
||||||
|
|
||||||
| def _main( | ||||||
| benchmark: str, | ||||||
| good: str, | ||||||
| bad: str, | ||||||
| pgo: bool = False, | ||||||
| flags: str = "", | ||||||
| repo: PathLike = Path("."), | ||||||
| cpython: PathLike = Path("cpython"), | ||||||
| ): | ||||||
| repo = Path(repo).absolute() | ||||||
| cpython = Path(cpython).absolute() | ||||||
|
|
||||||
| delete_log() | ||||||
|
|
||||||
| if not cpython.is_dir(): | ||||||
| git.clone( | ||||||
| cpython, "https://github.com/python/cpython.git", branch="main", depth=None | ||||||
| ) | ||||||
|
|
||||||
| git.checkout(cpython, good) | ||||||
| good_timing = get_result(benchmark, pgo, flags, cpython=cpython, reconfigure=True) | ||||||
| log(f"KNOWN GOOD ({good[:7]}): {format_seconds(good_timing)}") | ||||||
|
|
||||||
| git.checkout(cpython, bad) | ||||||
| bad_timing = get_result(benchmark, pgo, flags, cpython=cpython) | ||||||
| log(f"KNOWN BAD ({bad[:7]}): {format_seconds(bad_timing)}") | ||||||
|
|
||||||
| try: | ||||||
| with contextlib.chdir(cpython): | ||||||
| subprocess.run(["git", "bisect", "start"]) | ||||||
| subprocess.run(["git", "bisect", "bad", bad]) | ||||||
| subprocess.run(["git", "bisect", "good", good]) | ||||||
| subprocess.run( | ||||||
| ["git", "bisect", "run"] | ||||||
| + _get_result_commandline( | ||||||
| benchmark, good_timing, bad_timing, pgo, flags, repo | ||||||
| ) | ||||||
| ) | ||||||
| finally: | ||||||
| show_log() | ||||||
| delete_log() | ||||||
|
|
||||||
|
|
||||||
| def main(): | ||||||
| # This is the entry point for the user | ||||||
|
|
||||||
| parser = argparse.ArgumentParser( | ||||||
| description=""" | ||||||
| Run bisect on a benchmark to find the first regressing commit. | ||||||
|
|
||||||
| A full checkout of CPython should be in the cpython directory. | ||||||
| If it doesn't exist, it will be cloned. | ||||||
| """, | ||||||
| formatter_class=rich_argparse.ArgumentDefaultsRichHelpFormatter, | ||||||
| ) | ||||||
| parser.add_argument( | ||||||
| "benchmark", | ||||||
| type=str, | ||||||
| help="The benchmark to run bisect on.", | ||||||
| ) | ||||||
| parser.add_argument( | ||||||
| "good", | ||||||
| type=str, | ||||||
| help="The good commit hash for the bisect.", | ||||||
| ) | ||||||
| parser.add_argument( | ||||||
| "bad", | ||||||
| type=str, | ||||||
| help="The bad commit hash for the bisect.", | ||||||
| ) | ||||||
| parser.add_argument( | ||||||
| "--pgo", | ||||||
| action="store_true", | ||||||
| ) | ||||||
| parser.add_argument( | ||||||
| "--flags", | ||||||
| type=str, | ||||||
| default="", | ||||||
| ) | ||||||
|
|
||||||
| args = parser.parse_args() | ||||||
|
|
||||||
| _main(args.benchmark, args.good, args.bad, args.pgo, args.flags) | ||||||
|
|
||||||
|
|
||||||
| if __name__ == "__main__": | ||||||
| # This is the entry point when we are called from `git bisect run` itself | ||||||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This feels a little magical and I would've probably just made this a separate script instead, but as long as it works... |
||||||
|
|
||||||
| parser = argparse.ArgumentParser() | ||||||
| parser.add_argument("benchmark", type=str) | ||||||
| parser.add_argument("good_val", type=float) | ||||||
| parser.add_argument("bad_val", type=float) | ||||||
| parser.add_argument("pgo", type=str) | ||||||
| parser.add_argument("flags", type=str) | ||||||
| parser.add_argument("repo", type=str) | ||||||
| args = parser.parse_args() | ||||||
|
|
||||||
| mid_point = (args.good_val + args.bad_val) / 2.0 | ||||||
|
|
||||||
| repo = Path(args.repo) | ||||||
| cpython = repo / "cpython" | ||||||
|
|
||||||
| try: | ||||||
| with contextlib.chdir(repo): | ||||||
| timing = get_result( | ||||||
| args.benchmark, args.pgo == "True", args.flags, cpython=cpython | ||||||
| ) | ||||||
| except Exception as e: | ||||||
| # If there was any exception, display that exception and traceback and | ||||||
| # then abort the git bisect with -1 | ||||||
| traceback.print_exception(e) | ||||||
| sys.exit(-1) | ||||||
|
|
||||||
| # The confidence is 0.0 at the mid-point, 1.0 at the good and bad values, | ||||||
| # and > 1.0 outside of that. | ||||||
| confidence = abs((timing - mid_point) / ((args.bad_val - args.good_val) / 2.0)) | ||||||
|
|
||||||
| with contextlib.chdir(repo): | ||||||
| if timing > mid_point: | ||||||
| log( | ||||||
| f"BISECT BAD ({git.get_git_hash(cpython)[:7]}): " | ||||||
| f"{format_seconds(timing)} (confidence {confidence:.02f})" | ||||||
| ) | ||||||
| print(f"BAD: {timing} vs. ({args.good_val}, {args.bad_val})") | ||||||
| sys.exit(1) | ||||||
| else: | ||||||
| log( | ||||||
| f"BISECT GOOD ({git.get_git_hash(cpython)[:7]}): " | ||||||
| f"{format_seconds(timing)} (confidence {confidence:.02f})" | ||||||
| ) | ||||||
| print(f"GOOD: {timing} vs. ({args.good_val}, {args.bad_val})") | ||||||
| sys.exit(0) | ||||||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -130,7 +130,13 @@ def checkout_benchmarks(): | |
| ) | ||
|
|
||
|
|
||
| def compile_unix(cpython: PathLike, flags: list[str], pgo: bool, pystats: bool) -> None: | ||
| def compile_unix( | ||
| cpython: PathLike, | ||
| flags: list[str], | ||
| pgo: bool, | ||
| pystats: bool, | ||
| reconfigure: bool = True, | ||
| ) -> None: | ||
| cpython = Path(cpython) | ||
| cfg = config.get_config_for_current_runner() | ||
|
|
||
|
|
@@ -166,7 +172,8 @@ def compile_unix(cpython: PathLike, flags: list[str], pgo: bool, pystats: bool) | |
| make_args.extend(["-j"]) | ||
|
|
||
| with contextlib.chdir(cpython): | ||
| subprocess.check_call(["./configure", *args], env=env) | ||
| if reconfigure: | ||
| subprocess.check_call(["./configure", *args], env=env) | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Even if you don't explicitly run configure, the Makefile may decide to rerun configure when jumping through revisions... It's probably a good idea to pass |
||
| subprocess.check_call(["make", *make_args], env=env) | ||
|
|
||
|
|
||
|
|
@@ -391,13 +398,4 @@ def main(): | |
|
|
||
|
|
||
| if __name__ == "__main__": | ||
| # This lets pytest-cov collect coverage data in a subprocess | ||
| if "COV_CORE_SOURCE" in os.environ: | ||
| try: | ||
| from pytest_cov.embed import init | ||
|
|
||
| init() | ||
| except Exception: | ||
| sys.stderr.write("pytest-cov: Failed to setup subprocess coverage.") | ||
|
|
||
| main() | ||
| Original file line number | Diff line number | Diff line change | ||||||||||||||||||||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
|
@@ -87,6 +87,26 @@ def get_simple_platform() -> Literal["linux", "macos", "windows"]: | |||||||||||||||||||||||||||
| raise RuntimeError(f"Unsupported platform {sys.platform}.") | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| def format_seconds(value: float) -> str: | ||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||
| Given a float value in seconds, formats it into a human-readable string with | ||||||||||||||||||||||||||||
| the appropriate precision. | ||||||||||||||||||||||||||||
| """ | ||||||||||||||||||||||||||||
| _TIMEDELTA_UNITS = ("sec", "ms", "us", "ns") | ||||||||||||||||||||||||||||
|
|
||||||||||||||||||||||||||||
| for i in range(2, -9, -1): | ||||||||||||||||||||||||||||
| if value >= 10.0**i: | ||||||||||||||||||||||||||||
| break | ||||||||||||||||||||||||||||
| else: | ||||||||||||||||||||||||||||
| i = -9 | ||||||||||||||||||||||||||||
|
Comment on lines
+97
to
+101
|
||||||||||||||||||||||||||||
| for i in range(2, -9, -1): | |
| if value >= 10.0**i: | |
| break | |
| else: | |
| i = -9 | |
| MAX_POWER_OF_TEN = 2 # Maximum power of ten to check (10^2 = 100 seconds) | |
| MIN_POWER_OF_TEN = -9 # Minimum power of ten to check (10^-9 = 1 nanosecond) | |
| for i in range(MAX_POWER_OF_TEN, MIN_POWER_OF_TEN - 1, -1): | |
| if value >= 10.0**i: | |
| break | |
| else: | |
| i = MIN_POWER_OF_TEN |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe add a comment that it won't work on Windows?