diff --git a/.gitignore b/.gitignore index 84f6f10..82ae0c2 100644 --- a/.gitignore +++ b/.gitignore @@ -5,9 +5,6 @@ /cobertura.xml .DS_Store -# Generated benchmark results (machine-specific) -docs/PERFORMANCE.md - # Python / uv **/__pycache__/ *.egg-info/ diff --git a/CHANGELOG.md b/CHANGELOG.md index e826f64..eed0080 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -43,6 +43,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Move error and tolerance contracts into first-class modules with prelude exports - Update exact benchmarks to distinguish strict Result paths from rounded f64 paths - Document and exercise the rounded fallback pattern for RequiresRounding errors +- [**breaking**] Make exact f64 conversions strict [`89f3720`](https://github.com/acgetchell/la-stack/commit/89f3720ecde9f12d7a0f42e79394836615e8fd97) + - Make Matrix and Vector the finite-by-construction public types for exact arithmetic. + - Add rounded exact-to-f64 APIs for determinant and solve callers that want explicit lossy conversion. + - Return typed Unrepresentable reasons when strict exact-to-f64 conversion would round or become non-finite. + - Specialize D4 exact determinants and keep determinant/error-bound zero coefficients from evaluating overflowing absent terms. + - Update exact benchmark comparison reporting to compare strict and rounded APIs against legacy v0.4.2 rows. ### Changed @@ -72,6 +78,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 Use a literal regex pattern for the malformed Criterion JSON diagnostic so Windows paths with backslashes do not break pytest's match expression. - Align ty with Python 3.13 [`b9e0ba0`](https://github.com/acgetchell/la-stack/commit/b9e0ba08e54a15d8eddd5c5c53edc37bbc03939a) +- Preserve coordinates for overflowed accumulators [`1d976b3`](https://github.com/acgetchell/la-stack/commit/1d976b346172ad4eca37c68a3ec31817eeca8529) + + - Return matrix-cell metadata when inf-norm row sums or symmetry tolerance scaling overflow. + - Avoid reparsing finite-by-construction RHS vectors in LU and LDLT solves. ## [0.4.2] - 2026-06-04 diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 51dfa87..611d9fa 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,6 +22,20 @@ to an explicit allowlist, and kept with readable version comments for review. CI runs `just ci` on Ubuntu, macOS, and Windows to keep platform coverage aligned with the local comprehensive validation path. +## Performance checks + +Performance-sensitive changes should compare the current tree against the +latest published release: + +```bash +just performance-local +``` + +This writes `target/bench-reports/performance.md` without changing committed +release docs. Regressions are worth treating as design feedback: if a slowdown +is intentional, document the correctness, API clarity, or composability benefit +that justifies it. + For coverage commands and report locations, see [`docs/COVERAGE.md`](docs/COVERAGE.md). For benchmark methodology, see [`docs/BENCHMARKING.md`](docs/BENCHMARKING.md). For the full set of developer commands, run `just --list`. diff --git a/docs/BENCHMARKING.md b/docs/BENCHMARKING.md index dfd27d7..db226d0 100644 --- a/docs/BENCHMARKING.md +++ b/docs/BENCHMARKING.md @@ -151,40 +151,28 @@ benchmarks on every iteration. ### Workflow ```bash -# 1. Check out the old release and save its full baseline -git checkout v0.2.0 -just bench-save-baseline v0.2.0 +# Current in-tree code vs latest published release, all measured locally +just performance-local -# 2. Switch to current code and run latest la-stack measurements -git checkout main # or your feature branch -just bench-latest # populates target/criterion/*/new/ - -# 3. Generate a local comparison report -just bench-compare v0.2.0 +# Stored GitHub Actions release assets, no local cargo runs +just performance-github-assets ``` -You can save multiple baselines and compare against any of them. +`performance-local` creates isolated temporary worktrees, generates the latest +published release baseline locally, then benchmarks the current in-tree code on +the same machine. It uses the current checkout's Rust toolchain for both sides +unless `RUSTUP_TOOLCHAIN` is already set. `performance-github-assets` compares +stored GitHub Actions release artifacts and does not run cargo locally. -If the release baseline is already present in `target/criterion/`, skip the -checkout step and compare directly. For example, to compare current code against -the saved `v0.4.2` release baseline: +For local scratch comparisons, you can save multiple baselines and compare +against any of them. If the release baseline is already present in +`target/criterion/`, compare directly: ```bash just bench-latest # gather latest la-stack measurements just bench-compare v0.4.2 # compare latest measurements against v0.4.2 ``` -If the release baseline is not present locally, download and restore the release -asset first: - -```bash -gh release download v0.4.2 --pattern "la-stack-v0.4.2-criterion-baseline.tar.gz" # fetch archived release baseline -mkdir -p target # ensure Criterion parent directory exists -tar -C target -xzf la-stack-v0.4.2-criterion-baseline.tar.gz # restore target/criterion baseline data -just bench-latest # gather latest la-stack measurements -just bench-compare v0.4.2 # compare latest measurements against v0.4.2 -``` - ### Output `just bench-compare` writes `target/bench-reports/performance.md` by @@ -193,6 +181,31 @@ local. The report includes per-dimension tables showing median times, percent change, speedup, and last-release nalgebra/faer context where a matching `vs_linalg` peer exists. +Release PRs promote one curated comparison into committed docs: + +```bash +just performance-release +``` + +This infers the current release tag from `Cargo.toml`, discovers the previous +stable published release, generates both sides locally in temporary worktrees, +copies the finished report to `docs/PERFORMANCE.md`, and archives the previous +committed report under `docs/archive/performance/`. Archive filenames are +release-pair names such as `v0.4.2-vs-v0.4.1.md`, so the directory and generated +index stay lexicographically sorted. For explicit release repair, pass both +tags: `just performance-release v0.4.3 v0.4.2`. + +To compare the latest stored GitHub Actions release assets without touching the +current checkout: + +```bash +just performance-github-assets +``` + +The recipe discovers the latest stable published GitHub release and its previous +stable release automatically. For explicit historical repair, pass both tags: +`just performance-github-assets v0.4.2 v0.4.1`. + For exact-arithmetic comparisons against v0.4.2 or older baselines, rows such as `det_exact_rounded_f64 (vs det_exact_f64)` mean the current rounded API is being compared to the historical lossy `*_exact_f64` benchmark. Rows such as @@ -234,11 +247,12 @@ See `scripts/criterion_dim_plot.py --help` for options. At release time, save a local baseline so future work can compare against it: ```bash -just bench-save-baseline $TAG +just bench-save-baseline just bench-save-last ``` When the GitHub Release is published, `.github/workflows/release-benchmarks.yml` saves a full release baseline and attaches `la-stack-$TAG-criterion-baseline.tar.gz` to the release as the durable archive. -See `docs/RELEASING.md` step 5 for where this fits in the release process. +See the `just performance-release` step in `docs/RELEASING.md` for where the +curated `docs/PERFORMANCE.md` comparison fits in the release process. diff --git a/docs/PERFORMANCE.md b/docs/PERFORMANCE.md new file mode 100644 index 0000000..961762b --- /dev/null +++ b/docs/PERFORMANCE.md @@ -0,0 +1,117 @@ +# Exact Arithmetic Performance + +**la-stack** v0.4.2 · `7e11f93` (HEAD) · 2026-06-08 20:39:03 UTC +**Statistic**: median + +## Benchmark Results + +Comparison against baseline **v0.4.1**: + +Negative change = faster. Speedup > 1.00x = improvement. + +### D=2 + +| Benchmark | v0.4.1 | Current | Change | Speedup | +|-----------|-------:|--------:|-------:|--------:| +| det | 0.6 ns | 0.9 ns | +61.1% | 0.62x | +| det_direct | 0.7 ns | 1.0 ns | +44.7% | 0.69x | +| det_exact | 315.5 ns | 318.4 ns | +0.9% | 0.99x | +| det_exact_f64 | 555.7 ns | 555.7 ns | -0.0% | 1.00x | +| det_sign_exact | 0.7 ns | 1.5 ns | +128.2% | 0.44x | +| solve_exact | 7.05 µs | 7.06 µs | +0.2% | 1.00x | +| solve_exact_f64 | 7.50 µs | 7.67 µs | +2.3% | 0.98x | + +### D=3 + +| Benchmark | v0.4.1 | Current | Change | Speedup | +|-----------|-------:|--------:|-------:|--------:| +| det | 1.3 ns | 1.8 ns | +36.3% | 0.73x | +| det_direct | 4.7 ns | 2.2 ns | **-51.9%** | 2.08x | +| det_exact | 936.9 ns | 924.3 ns | **-1.3%** | 1.01x | +| det_exact_f64 | 1.18 µs | 1.19 µs | +1.1% | 0.99x | +| det_sign_exact | 2.4 ns | 4.2 ns | +78.1% | 0.56x | +| solve_exact | 27.02 µs | 27.41 µs | +1.5% | 0.99x | +| solve_exact_f64 | 28.06 µs | 27.98 µs | -0.3% | 1.00x | + +### D=4 + +| Benchmark | v0.4.1 | Current | Change | Speedup | +|-----------|-------:|--------:|-------:|--------:| +| det | 2.4 ns | 3.3 ns | +36.8% | 0.73x | +| det_direct | 2.4 ns | 4.1 ns | +70.2% | 0.59x | +| det_exact | 2.33 µs | 2.33 µs | -0.0% | 1.00x | +| det_exact_f64 | 2.59 µs | 2.58 µs | -0.7% | 1.01x | +| det_sign_exact | 5.3 ns | 6.9 ns | +30.5% | 0.77x | +| solve_exact | 67.14 µs | 67.99 µs | +1.3% | 0.99x | +| solve_exact_f64 | 67.86 µs | 68.51 µs | +1.0% | 0.99x | + +### D=5 + +| Benchmark | v0.4.1 | Current | Change | Speedup | +|-----------|-------:|--------:|-------:|--------:| +| det | 21.6 ns | 24.5 ns | +13.7% | 0.88x | +| det_direct | 2.3 ns | 4.7 ns | +104.8% | 0.49x | +| det_exact | 5.04 µs | 4.99 µs | -1.0% | 1.01x | +| det_exact_f64 | 5.32 µs | 5.31 µs | -0.1% | 1.00x | +| det_sign_exact | 4.97 µs | 4.99 µs | +0.3% | 1.00x | +| solve_exact | 134.99 µs | 136.04 µs | +0.8% | 0.99x | +| solve_exact_f64 | 137.11 µs | 138.97 µs | +1.4% | 0.99x | + +### Near-singular 3x3 + +| Benchmark | v0.4.1 | Current | Change | Speedup | +|-----------|-------:|--------:|-------:|--------:| +| det_sign_exact | 871.8 ns | 877.6 ns | +0.7% | 0.99x | +| det_exact | 907.3 ns | 904.4 ns | -0.3% | 1.00x | +| solve_exact | 4.31 µs | 4.25 µs | **-1.5%** | 1.02x | +| solve_exact_f64 | 4.29 µs | 4.32 µs | +0.7% | 0.99x | + +### Large entries 3x3 + +| Benchmark | v0.4.1 | Current | Change | Speedup | +|-----------|-------:|--------:|-------:|--------:| +| det_sign_exact | 3.14 µs | 3.09 µs | **-1.3%** | 1.01x | +| det_exact | 3.19 µs | 3.11 µs | **-2.3%** | 1.02x | +| solve_exact | 84.77 µs | 83.89 µs | **-1.0%** | 1.01x | +| solve_exact_f64 | 84.62 µs | 83.92 µs | -0.8% | 1.01x | + +### Hilbert 4x4 + +| Benchmark | v0.4.1 | Current | Change | Speedup | +|-----------|-------:|--------:|-------:|--------:| +| det_sign_exact | 5.3 ns | 6.9 ns | +30.4% | 0.77x | +| det_exact | 2.39 µs | 2.31 µs | **-3.2%** | 1.03x | +| solve_exact | 51.69 µs | 52.27 µs | +1.1% | 0.99x | +| solve_exact_f64 | 52.90 µs | 53.26 µs | +0.7% | 0.99x | + +### Hilbert 5x5 + +| Benchmark | v0.4.1 | Current | Change | Speedup | +|-----------|-------:|--------:|-------:|--------:| +| det_sign_exact | 5.03 µs | 4.88 µs | **-2.9%** | 1.03x | +| det_exact | 5.07 µs | 4.96 µs | **-2.1%** | 1.02x | +| solve_exact | 105.35 µs | 102.72 µs | **-2.5%** | 1.03x | +| solve_exact_f64 | 104.99 µs | 103.94 µs | -1.0% | 1.01x | + +## How to Update + +Local performance reports are generated in isolated temporary worktrees: + +```bash +# Local development: compare the current tree with the latest release +just performance-local + +# Release PR: update docs/PERFORMANCE.md and archive the previous report +just performance-release + +# GitHub Actions release assets +just performance-github-assets + +# Explicit repair +just performance-release +``` + +`just performance-local` writes `target/bench-reports/performance.md`. +`just performance-github-assets` writes `target/bench-reports/github-assets-performance.md`. + +See `docs/BENCHMARKING.md` for the full comparison workflow. diff --git a/docs/RELEASING.md b/docs/RELEASING.md index 07f41a1..7a6aff0 100644 --- a/docs/RELEASING.md +++ b/docs/RELEASING.md @@ -20,6 +20,7 @@ Set these variables to avoid repeating the version string: # tag has the leading v, version does not TAG=vX.Y.Z VERSION=${TAG#v} +PREVIOUS_TAG=vA.B.C ``` Verify your git remotes: @@ -100,7 +101,24 @@ just plot-vs-linalg-readme Review the updated table in `README.md` and the plot in `docs/assets/` for accuracy. -5. Save benchmark baselines for this release +5. Update the release performance comparison + +```bash +# Infers TAG from Cargo.toml, compares it against the previous stable published +# release, writes docs/PERFORMANCE.md, and archives the previous docs/PERFORMANCE.md +# under docs/archive/performance/. +just performance-release +``` + +Review `docs/PERFORMANCE.md` for the latest release-to-release comparison. Older +committed comparisons are archived under `docs/archive/performance/` with +lexicographically sorted filenames such as `v0.4.2-vs-v0.4.1.md`. Iterative +local reports still live under `target/bench-reports/`. For an explicit release +repair, run `just performance-release `. To compare +the stored GitHub Actions release assets instead of running cargo locally, use +`just performance-github-assets`. + +6. Save benchmark baselines for this release ```bash # Save a named full baseline for this release @@ -125,7 +143,7 @@ uploads a short-lived Actions artifact for debugging the run. See `docs/BENCHMARKING.md` for the full comparison workflow. -6. Validate the release branch +7. Validate the release branch ```bash just ci @@ -133,7 +151,7 @@ just citation-check cargo publish --locked --dry-run ``` -7. Stage and commit release artifacts +8. Stage and commit release artifacts ```bash git add Cargo.toml Cargo.lock CITATION.cff pyproject.toml CHANGELOG.md README.md docs/ @@ -143,11 +161,11 @@ git commit -m "chore(release): release $TAG - Bump version to $TAG - Update citation and utility package metadata - Update changelog with latest changes -- Update benchmark comparison table +- Update benchmark comparison table and release performance report - Update documentation for release" ``` -8. Push the branch and open a PR +9. Push the branch and open a PR ```bash git push -u origin "release/$TAG" diff --git a/docs/archive/performance/README.md b/docs/archive/performance/README.md new file mode 100644 index 0000000..dd5fc68 --- /dev/null +++ b/docs/archive/performance/README.md @@ -0,0 +1,6 @@ +# Archived Performance Reports + +Older release-to-release benchmark comparisons are archived here. +`docs/PERFORMANCE.md` contains the latest curated comparison. + +- [v0.4.1-vs-v0.4.0](v0.4.1-vs-v0.4.0.md) diff --git a/docs/archive/performance/v0.4.1-vs-v0.4.0.md b/docs/archive/performance/v0.4.1-vs-v0.4.0.md new file mode 100644 index 0000000..fce355b --- /dev/null +++ b/docs/archive/performance/v0.4.1-vs-v0.4.0.md @@ -0,0 +1,88 @@ +# Exact Arithmetic Performance + +**la-stack** v0.4.1 · `c6e04fd` (main) · 2026-04-21 22:30:49 UTC +**Statistic**: median + +## Benchmark Results + +Comparison against baseline **v0.4.0**: + +Negative change = faster. Speedup > 1.00x = improvement. + +### D=2 + +| Benchmark | v0.4.0 | Current | Change | Speedup | +|-----------|-------:|--------:|-------:|--------:| +| det | 0.6 ns | 0.6 ns | +0.9% | 0.99x | +| det_direct | 0.7 ns | 0.7 ns | +0.6% | 0.99x | +| det_exact | 250.3 ns | 250.0 ns | -0.1% | 1.00x | +| det_exact_f64 | 434.2 ns | 421.7 ns | **-2.9%** | 1.03x | +| det_sign_exact | 1.1 ns | 0.7 ns | **-39.6%** | 1.66x | +| solve_exact | 15.74 µs | 6.51 µs | **-58.7%** | 2.42x | +| solve_exact_f64 | 16.76 µs | 7.06 µs | **-57.9%** | 2.38x | + +### D=3 + +| Benchmark | v0.4.0 | Current | Change | Speedup | +|-----------|-------:|--------:|-------:|--------:| +| det | 1.4 ns | 1.3 ns | **-3.0%** | 1.03x | +| det_direct | 4.7 ns | 4.6 ns | **-2.3%** | 1.02x | +| det_exact | 719.2 ns | 741.2 ns | +3.1% | 0.97x | +| det_exact_f64 | 942.0 ns | 933.1 ns | -0.9% | 1.01x | +| det_sign_exact | 4.1 ns | 2.3 ns | **-43.3%** | 1.76x | +| solve_exact | 51.03 µs | 25.19 µs | **-50.6%** | 2.03x | +| solve_exact_f64 | 53.31 µs | 26.64 µs | **-50.0%** | 2.00x | + +### D=4 + +| Benchmark | v0.4.0 | Current | Change | Speedup | +|-----------|-------:|--------:|-------:|--------:| +| det | 2.4 ns | 2.4 ns | **-2.6%** | 1.03x | +| det_direct | 2.4 ns | 2.4 ns | -0.6% | 1.01x | +| det_exact | 1.89 µs | 1.88 µs | -0.3% | 1.00x | +| det_exact_f64 | 2.09 µs | 2.09 µs | +0.2% | 1.00x | +| det_sign_exact | 6.4 ns | 5.4 ns | **-15.7%** | 1.19x | +| solve_exact | 147.18 µs | 64.62 µs | **-56.1%** | 2.28x | +| solve_exact_f64 | 147.75 µs | 64.03 µs | **-56.7%** | 2.31x | + +### D=5 + +| Benchmark | v0.4.0 | Current | Change | Speedup | +|-----------|-------:|--------:|-------:|--------:| +| det | 26.3 ns | 24.4 ns | **-7.5%** | 1.08x | +| det_direct | 2.3 ns | 2.3 ns | **-1.7%** | 1.02x | +| det_exact | 4.24 µs | 4.13 µs | **-2.6%** | 1.03x | +| det_exact_f64 | 4.40 µs | 4.48 µs | +1.9% | 0.98x | +| det_sign_exact | 4.19 µs | 4.08 µs | **-2.7%** | 1.03x | +| solve_exact | 339.25 µs | 132.55 µs | **-60.9%** | 2.56x | +| solve_exact_f64 | 340.14 µs | 135.75 µs | **-60.1%** | 2.51x | + +### Near-singular 3x3 + +| Benchmark | v0.4.0 | Current | Change | Speedup | +|-----------|-------:|--------:|-------:|--------:| +| det_sign_exact | 909.2 ns | 705.9 ns | **-22.4%** | 1.29x | +| det_exact | 929.6 ns | 729.8 ns | **-21.5%** | 1.27x | + +## How to Update + +Local performance reports are generated in isolated temporary worktrees: + +```bash +# Local development: compare the current tree with the latest release +just performance-local + +# Release PR: update docs/PERFORMANCE.md and archive the previous report +just performance-release + +# GitHub Actions release assets +just performance-github-assets + +# Explicit repair +just performance-release +``` + +`just performance-local` writes `target/bench-reports/performance.md`. +`just performance-github-assets` writes `target/bench-reports/github-assets-performance.md`. + +See `docs/BENCHMARKING.md` for the full comparison workflow. diff --git a/justfile b/justfile index 811f19b..7bbb61b 100644 --- a/justfile +++ b/justfile @@ -186,6 +186,46 @@ bench-compare baseline="last" suite="all" scope="release-signal": python-sync baseline="{{baseline}}" uv run bench-compare "$baseline" --suite "{{suite}}" --scope "{{scope}}" +# Backward-compatible alias for the GitHub Actions release-asset comparison. +performance-archive-published current_tag="" baseline_tag="": + just performance-github-assets "{{current_tag}}" "{{baseline_tag}}" + +# Compare stored GitHub Actions release benchmark assets without local cargo runs. +performance-github-assets current_tag="" baseline_tag="": python-sync + #!/usr/bin/env bash + set -euo pipefail + current_tag="{{current_tag}}" + baseline_tag="{{baseline_tag}}" + if [[ -n "$current_tag" || -n "$baseline_tag" ]]; then + if [[ -z "$current_tag" || -z "$baseline_tag" ]]; then + echo "current_tag and baseline_tag must be provided together" >&2 + exit 2 + fi + uv run archive-performance "$current_tag" "$baseline_tag" --github-assets --generate-in-temp-worktree --worktree-ref "$current_tag" --output-only --output target/bench-reports/github-assets-performance.md + else + uv run archive-performance --published-latest --github-assets --generate-in-temp-worktree --output-only --output target/bench-reports/github-assets-performance.md + fi + +# Compare the current tree against the latest published release locally. +performance-local: python-sync + uv run archive-performance --current-vs-latest --generate-in-temp-worktree --output-only --output target/bench-reports/performance.md + +# Generate local release-signal measurements in a temp worktree, then promote/archive docs. +performance-release current_tag="" baseline_tag="": python-sync + #!/usr/bin/env bash + set -euo pipefail + current_tag="{{current_tag}}" + baseline_tag="{{baseline_tag}}" + if [[ -n "$current_tag" || -n "$baseline_tag" ]]; then + if [[ -z "$current_tag" || -z "$baseline_tag" ]]; then + echo "current_tag and baseline_tag must be provided together" >&2 + exit 2 + fi + uv run archive-performance "$current_tag" "$baseline_tag" --generate-in-temp-worktree --worktree-ref HEAD + else + uv run archive-performance --infer-release --generate-in-temp-worktree --worktree-ref HEAD + fi + # Run the exact-arithmetic benchmark suite. bench-exact: cargo bench --features bench,exact --bench exact @@ -390,6 +430,9 @@ help-workflows: @echo " just bench-compile # Compile benches with warnings-as-errors" @echo " just bench-latest # Run cheap latest measurements" @echo " just bench-latest-vs-last # Run latest and compare against last" + @echo " just performance-github-assets # Compare stored GitHub Actions release assets" + @echo " just performance-local # Compare current tree against latest release locally" + @echo " just performance-release # Promote local release performance docs" @echo " just bench-save-last # Save full baseline as 'last'" @echo " just bench-vs-linalg # Run vs_linalg bench (optional filter)" @echo " just bench-vs-linalg-la-stack # Run la-stack rows from vs_linalg" diff --git a/pyproject.toml b/pyproject.toml index 6f9ff45..9136cf4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,6 +36,7 @@ dependencies = [ ] [project.scripts] archive-changelog = "archive_changelog:main" +archive-performance = "archive_performance:main" bench-compare = "bench_compare:main" criterion-dim-plot = "criterion_dim_plot:main" postprocess-changelog = "postprocess_changelog:main" @@ -45,7 +46,7 @@ check-docs-version-sync = "check_docs_version_sync:main" # Configure setuptools to find modules in scripts/ directory. [tool.setuptools] package-dir = { "" = "scripts" } -py-modules = [ "archive_changelog", "bench_compare", "check_docs_version_sync", "check_semgrep_fixtures", "criterion_dim_plot", "postprocess_changelog", "subprocess_utils", "tag_release" ] +py-modules = [ "archive_changelog", "archive_performance", "bench_compare", "check_docs_version_sync", "check_semgrep_fixtures", "criterion_dim_plot", "postprocess_changelog", "subprocess_utils", "tag_release" ] [tool.ruff] line-length = 160 @@ -88,6 +89,7 @@ ignore = [ [tool.ruff.lint.isort] known-first-party = [ "archive_changelog", + "archive_performance", "bench_compare", "check_semgrep_fixtures", "criterion_dim_plot", diff --git a/scripts/README.md b/scripts/README.md index 4b0e5e6..c19e1ea 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -33,6 +33,31 @@ just bench-compare Use `uv run bench-compare --snapshot` for a no-baseline snapshot, or `uv run bench-compare ` to compare against a named saved baseline. +For release PRs, promote one curated release-to-release comparison into +committed docs and archive the previous committed report. Benchmark generation +runs locally in temporary worktrees: + +```bash +just performance-release +``` + +For local development regression checks, compare the current in-tree code +against the latest published release: + +```bash +just performance-local +``` + +To compare stored GitHub Actions release benchmark assets without local cargo +runs: + +```bash +just performance-github-assets +``` + +For explicit release repair, pass both tags: +`just performance-release v0.4.3 v0.4.2`. + ### Plotting Criterion benchmarks (la-stack vs nalgebra/faer) The plotter reads Criterion output under: @@ -169,6 +194,8 @@ tag-annotation size limit. | Script | Purpose | |---|---| +| `archive_performance.py` | Promote release performance docs and archive older comparisons | +| `bench_compare.py` | Compare Criterion benchmark baselines and render Markdown reports | | `criterion_dim_plot.py` | Plot Criterion benchmark results (CSV + SVG + README table) | | `tag_release.py` | Create annotated git tags from CHANGELOG.md sections | | `postprocess_changelog.py` | Strip trailing blank lines from git-cliff output | diff --git a/scripts/archive_performance.py b/scripts/archive_performance.py new file mode 100644 index 0000000..ac4dcf4 --- /dev/null +++ b/scripts/archive_performance.py @@ -0,0 +1,980 @@ +#!/usr/bin/env -S uv run +"""Promote a benchmark report into docs/PERFORMANCE.md and archive the old one. + +Release performance docs have two different lifetimes: + + - ``target/bench-reports/performance.md`` is local scratch output for the + current machine and branch. + - ``docs/PERFORMANCE.md`` is the latest curated release-to-release comparison. + - ``docs/archive/performance/*.md`` stores older curated comparisons. + +This script copies a freshly generated local report into ``docs/PERFORMANCE.md`` +and archives the previous committed report under a filename derived from the +report metadata, such as ``v0.4.2-vs-v0.4.1.md``. +""" + +from __future__ import annotations + +import argparse +import json +import os +import re +import shutil +import subprocess +import sys +import tarfile +import tempfile +import tomllib +from collections.abc import Mapping +from dataclasses import dataclass +from pathlib import Path +from typing import Any, Literal, cast + +from subprocess_utils import run_git_command, run_git_command_with_input, run_safe_command + +_VERSION_RE = re.compile(r"^\*\*la-stack\*\* v(?P[^\s`]+)", re.MULTILINE) +_BASELINE_RE = re.compile(r"^Comparison against baseline \*\*(?P[^*]+)\*\*:", re.MULTILINE) +_SEMVER_IDENTIFIER_RE = r"(?:0|[1-9][0-9]*|[0-9A-Za-z-]*[A-Za-z-][0-9A-Za-z-]*)" +_TAG_RE = re.compile( + rf"^v?(?:0|[1-9][0-9]*)\.(?:0|[1-9][0-9]*)\.(?:0|[1-9][0-9]*)" + rf"(?:-{_SEMVER_IDENTIFIER_RE}(?:\.{_SEMVER_IDENTIFIER_RE})*)?" + r"(?:\+[0-9A-Za-z-]+(?:\.[0-9A-Za-z-]+)*)?$" +) +_SEMVER_PARTS_RE = re.compile(r"^v?(?P0|[1-9][0-9]*)\.(?P0|[1-9][0-9]*)\.(?P0|[1-9][0-9]*)$") + +_DEFAULT_SOURCE = "target/bench-reports/performance.md" +_DEFAULT_CURRENT = "docs/PERFORMANCE.md" +_DEFAULT_ARCHIVE_DIR = "docs/archive/performance" +_DEFAULT_SUITE = "all" +_DEFAULT_SCOPE = "release-signal" +_BENCH_TIMEOUT_SECONDS = 7200 +_COMMAND_TIMEOUT_SECONDS = 600 +_HOW_TO_UPDATE_RE = re.compile(r"(?ms)^## How to Update\n.*\Z") +type BaselineSource = Literal["local", "github-assets"] + + +@dataclass(frozen=True) +class ReportId: + """Release-pair identity parsed from a benchmark report.""" + + current_tag: str + baseline_tag: str + + @property + def archive_name(self) -> str: + """Return the canonical archive filename for this report.""" + return f"{self.current_tag}-vs-{self.baseline_tag}.md" + + +@dataclass(frozen=True) +class GenerationConfig: + """Configuration for benchmark report generation in a temp worktree.""" + + repo_root: Path + current_tag: str + baseline_tag: str + worktree_ref: str + suite: str = _DEFAULT_SUITE + scope: str = _DEFAULT_SCOPE + apply_current_diff: bool = True + baseline_source: BaselineSource = "local" + + +@dataclass(frozen=True) +class ResolvedArchiveRequest: + """Release pair and worktree ref resolved from CLI arguments.""" + + current_tag: str + baseline_tag: str + worktree_ref: str + tags_to_fetch: tuple[str, ...] = () + + +@dataclass(frozen=True) +class ArchiveRequestOptions: + """CLI options used to resolve release tags.""" + + current_tag: str | None + baseline_tag: str | None + published_latest: bool + infer_release: bool + current_vs_latest: bool + worktree_ref: str + repo_root: Path + + +@dataclass(frozen=True) +class ArchivePaths: + """Filesystem paths used by the archive CLI.""" + + source: Path + current: Path + output: Path + archive_dir: Path + + +@dataclass(frozen=True) +class ArchiveResult: + """Result and destination metadata for a completed archive operation.""" + + report_id: ReportId + action: Literal["output", "promote-generated", "promote-source"] + + +@dataclass(frozen=True) +class PublishedRelease: + """Stable GitHub release metadata used to infer release pairs.""" + + tag: str + published_at: str + + +def normalize_tag(tag: str) -> str: + """Return *tag* with a leading ``v`` and no surrounding whitespace.""" + normalized = tag.strip() + if not normalized: + msg = "tag must not be empty" + raise ValueError(msg) + if not normalized.startswith("v"): + normalized = f"v{normalized}" + if not _TAG_RE.fullmatch(normalized): + msg = f"expected a semver tag like v0.4.2, got {tag!r}" + raise ValueError(msg) + return normalized + + +def parse_report_id(text: str) -> ReportId: + """Parse the current version and baseline tag from a benchmark report.""" + version_match = _VERSION_RE.search(text) + if version_match is None: + msg = "could not find la-stack version line in benchmark report" + raise ValueError(msg) + + baseline_match = _BASELINE_RE.search(text) + if baseline_match is None: + msg = "could not find comparison baseline line in benchmark report" + raise ValueError(msg) + + return ReportId( + current_tag=normalize_tag(version_match.group("version")), + baseline_tag=normalize_tag(baseline_match.group("baseline")), + ) + + +def _semver_sort_key(tag: str) -> tuple[int, int, int]: + match = _SEMVER_PARTS_RE.fullmatch(normalize_tag(tag)) + if match is None: + msg = f"expected a stable semver tag like v0.4.2, got {tag!r}" + raise ValueError(msg) + return (int(match.group("major")), int(match.group("minor")), int(match.group("patch"))) + + +def _stable_published_releases(releases: object) -> list[PublishedRelease]: + if not isinstance(releases, list): + msg = "expected GitHub release list to be a JSON array" + raise TypeError(msg) + + stable_releases: dict[str, PublishedRelease] = {} + for release in releases: + if not isinstance(release, Mapping): + continue + release = cast("Mapping[str, Any]", release) + if release.get("isDraft") or release.get("isPrerelease"): + continue + tag_name = release.get("tagName") + published_at = release.get("publishedAt") + if not isinstance(tag_name, str) or not isinstance(published_at, str) or not published_at: + continue + try: + normalized = normalize_tag(tag_name) + _semver_sort_key(normalized) + except ValueError: + continue + stable_releases[normalized] = PublishedRelease(tag=normalized, published_at=published_at) + + return list(stable_releases.values()) + + +def _github_release_list(repo_root: Path) -> object: + command = [ + "release", + "list", + "--json", + "tagName,isDraft,isPrerelease,publishedAt", + "--limit", + "100", + ] + try: + result = run_safe_command( + "gh", + command, + cwd=repo_root, + timeout=_COMMAND_TIMEOUT_SECONDS, + ) + except subprocess.CalledProcessError as exc: + raise RuntimeError(_format_command_failure(["gh", *command], exc)) from exc + try: + return json.loads(result.stdout) + except json.JSONDecodeError as exc: + msg = "could not parse GitHub release list JSON" + raise RuntimeError(msg) from exc + + +def _published_stable_releases(repo_root: Path) -> list[PublishedRelease]: + return _stable_published_releases(_github_release_list(repo_root)) + + +def _latest_published_release(repo_root: Path) -> PublishedRelease: + stable_releases = _published_stable_releases(repo_root) + if not stable_releases: + msg = "expected at least one published stable semver release" + raise RuntimeError(msg) + return max(stable_releases, key=lambda release: release.published_at) + + +def _previous_release_from_list(stable_releases: list[PublishedRelease], current_tag: str) -> PublishedRelease: + current_key = _semver_sort_key(current_tag) + previous_releases = sorted( + (release for release in stable_releases if _semver_sort_key(release.tag) < current_key), + key=lambda release: _semver_sort_key(release.tag), + ) + if not previous_releases: + msg = f"could not find a previous stable semver release before {current_tag}" + raise RuntimeError(msg) + return previous_releases[-1] + + +def _previous_published_release(repo_root: Path, current_tag: str) -> PublishedRelease: + return _previous_release_from_list(_published_stable_releases(repo_root), current_tag) + + +def _normalize_worktree_ref_for_tag(worktree_ref: str, current_tag: str) -> str: + try: + normalized_ref = normalize_tag(worktree_ref) + except ValueError: + return worktree_ref + return current_tag if normalized_ref == current_tag else worktree_ref + + +def _current_package_tag(repo_root: Path) -> str: + cargo_toml = repo_root / "Cargo.toml" + data = tomllib.loads(_read_text(cargo_toml)) + package = data.get("package") + if not isinstance(package, dict): + msg = f"could not find [package] in {cargo_toml}" + raise TypeError(msg) + version = package.get("version") + if not isinstance(version, str): + msg = f"could not find package.version in {cargo_toml}" + raise TypeError(msg) + return normalize_tag(version) + + +def _published_release_pair(repo_root: Path) -> ReportId: + stable_releases = _published_stable_releases(repo_root) + if len(stable_releases) < 2: + msg = "expected at least two published stable semver releases" + raise RuntimeError(msg) + + current = max(stable_releases, key=lambda release: release.published_at) + previous = _previous_release_from_list(stable_releases, current.tag) + return ReportId(current_tag=current.tag, baseline_tag=previous.tag) + + +def _read_text(path: Path) -> str: + return path.read_text(encoding="utf-8") + + +def _how_to_update_section() -> str: + lines = [ + "## How to Update", + "", + "Local performance reports are generated in isolated temporary worktrees:", + "", + "```bash", + "# Local development: compare the current tree with the latest release", + "just performance-local", + "", + "# Release PR: update docs/PERFORMANCE.md and archive the previous report", + "just performance-release", + "", + "# GitHub Actions release assets", + "just performance-github-assets", + "", + "# Explicit repair", + "just performance-release ", + "```", + "", + "`just performance-local` writes `target/bench-reports/performance.md`.", + "`just performance-github-assets` writes `target/bench-reports/github-assets-performance.md`.", + "", + "See `docs/BENCHMARKING.md` for the full comparison workflow.", + "", + ] + return "\n".join(lines) + + +def _normalize_how_to_update(text: str) -> str: + section = _how_to_update_section() + if _HOW_TO_UPDATE_RE.search(text): + return _HOW_TO_UPDATE_RE.sub(section, text) + return f"{text.rstrip()}\n\n{section}" + + +def _replace_file(src: Path, dst: Path) -> None: + src.replace(dst) + + +def _write_text(path: Path, text: str) -> None: + path.parent.mkdir(parents=True, exist_ok=True) + tmp_path: Path | None = None + try: + with tempfile.NamedTemporaryFile( + "w", + encoding="utf-8", + dir=path.parent, + prefix=f".{path.name}.", + suffix=".tmp", + delete=False, + ) as tmp: + tmp_path = Path(tmp.name) + tmp.write(text) + tmp.flush() + os.fsync(tmp.fileno()) + _replace_file(tmp_path, path) + finally: + if tmp_path is not None and tmp_path.exists(): + tmp_path.unlink() + + +def _archive_readme(archive_dir: Path) -> str: + reports = sorted(path.name for path in archive_dir.glob("*.md") if path.name != "README.md") + lines = [ + "# Archived Performance Reports", + "", + "Older release-to-release benchmark comparisons are archived here.", + "`docs/PERFORMANCE.md` contains the latest curated comparison.", + "", + ] + if reports: + lines.extend(f"- [{name.removesuffix('.md')}]({name})" for name in reports) + else: + lines.append("- No archived performance reports yet.") + return "\n".join(lines) + "\n" + + +def update_archive_index(archive_dir: Path) -> None: + """Write a lexicographically sorted archive index.""" + _write_text(archive_dir / "README.md", _archive_readme(archive_dir)) + + +def _format_command_failure(command: list[str], exc: subprocess.CalledProcessError) -> str: + parts = [f"command failed ({exc.returncode}): {' '.join(command)}"] + if exc.stdout: + parts.append(f"stdout:\n{exc.stdout.strip()}") + if exc.stderr: + parts.append(f"stderr:\n{exc.stderr.strip()}") + return "\n".join(parts) + + +def _run_git(args: list[str], *, cwd: Path, timeout: int = _COMMAND_TIMEOUT_SECONDS) -> None: + try: + run_git_command(args, cwd=cwd, timeout=timeout) + except subprocess.CalledProcessError as exc: + raise RuntimeError(_format_command_failure(["git", *args], exc)) from exc + + +def _fetch_release_tags(*, repo_root: Path, tags: list[str]) -> None: + refspecs = [f"refs/tags/{tag}:refs/tags/{tag}" for tag in tags] + _run_git(["fetch", "origin", *refspecs], cwd=repo_root) + + +def _run_tool(command: str, args: list[str], *, cwd: Path, timeout: int = _COMMAND_TIMEOUT_SECONDS, env: dict[str, str] | None = None) -> None: + try: + run_safe_command(command, args, cwd=cwd, timeout=timeout, env=env) + except subprocess.CalledProcessError as exc: + raise RuntimeError(_format_command_failure([command, *args], exc)) from exc + + +def _current_rust_toolchain(checkout: Path) -> str | None: + rust_toolchain = checkout / "rust-toolchain.toml" + if not rust_toolchain.exists(): + return None + data = tomllib.loads(_read_text(rust_toolchain)) + toolchain = data.get("toolchain") + if not isinstance(toolchain, dict): + return None + channel = toolchain.get("channel") + return channel if isinstance(channel, str) else None + + +def _benchmark_env(checkout: Path) -> dict[str, str] | None: + if "RUSTUP_TOOLCHAIN" in os.environ: + return None + toolchain = _current_rust_toolchain(checkout) + if toolchain is None: + return None + env = os.environ.copy() + env["RUSTUP_TOOLCHAIN"] = toolchain + return env + + +def _safe_extract_tar(archive: Path, target_dir: Path) -> None: + target_dir.mkdir(parents=True, exist_ok=True) + target_root = target_dir.resolve() + with tarfile.open(archive, "r:gz") as tar: + for member in tar.getmembers(): + member_path = (target_dir / member.name).resolve() + if not member_path.is_relative_to(target_root): + msg = f"refusing to extract unsafe archive member {member.name!r}" + raise ValueError(msg) + tar.extractall(target_dir, filter="data") + + +def _download_release_baseline(*, baseline_tag: str, download_dir: Path, repo_root: Path) -> Path: + artifact = download_dir / f"la-stack-{baseline_tag}-criterion-baseline.tar.gz" + _run_tool( + "gh", + [ + "release", + "download", + baseline_tag, + "--pattern", + artifact.name, + "--dir", + str(download_dir), + ], + cwd=repo_root, + ) + if not artifact.exists(): + msg = f"release baseline asset was not downloaded: {artifact}" + raise FileNotFoundError(msg) + return artifact + + +def _copy_criterion_sample(*, criterion_dir: Path, source_sample: str, target_sample: str) -> None: + copied = 0 + for source in list(criterion_dir.rglob(source_sample)): + if not source.is_dir() or not (source / "estimates.json").exists(): + continue + target = source.parent / target_sample + if target.exists(): + shutil.rmtree(target) + shutil.copytree(source, target) + copied += 1 + if copied == 0: + msg = f"could not find Criterion sample {source_sample!r} under {criterion_dir}" + raise FileNotFoundError(msg) + + +def _generate_release_baseline(*, baseline_tag: str, repo_root: Path, target_worktree: Path, tmp_dir: Path) -> None: + baseline_worktree = tmp_dir / "baseline-worktree" + _run_git(["worktree", "add", "--detach", str(baseline_worktree), baseline_tag], cwd=repo_root) + try: + _run_tool("just", ["bench-save-baseline", baseline_tag], cwd=baseline_worktree, timeout=_BENCH_TIMEOUT_SECONDS, env=_benchmark_env(repo_root)) + baseline_criterion = baseline_worktree / "target" / "criterion" + if not baseline_criterion.is_dir(): + msg = f"generated baseline Criterion results were not found: {baseline_criterion}" + raise FileNotFoundError(msg) + target_criterion = target_worktree / "target" / "criterion" + target_criterion.parent.mkdir(parents=True, exist_ok=True) + shutil.copytree(baseline_criterion, target_criterion, dirs_exist_ok=True) + finally: + try: + _run_git(["worktree", "remove", "--force", str(baseline_worktree)], cwd=repo_root) + except RuntimeError as exc: + print(f"archive-performance: failed to remove baseline worktree: {exc}", file=sys.stderr) + + +def _prepare_local_release_baseline(*, baseline_tag: str, repo_root: Path, target_worktree: Path, tmp_dir: Path) -> None: + _generate_release_baseline( + baseline_tag=baseline_tag, + repo_root=repo_root, + target_worktree=target_worktree, + tmp_dir=tmp_dir, + ) + + +def _prepare_github_release_assets(*, current_tag: str, baseline_tag: str, repo_root: Path, target_worktree: Path, tmp_dir: Path) -> None: + baseline_archive = _download_release_baseline( + baseline_tag=baseline_tag, + download_dir=tmp_dir, + repo_root=repo_root, + ) + current_archive = _download_release_baseline( + baseline_tag=current_tag, + download_dir=tmp_dir, + repo_root=repo_root, + ) + target_dir = target_worktree / "target" + _safe_extract_tar(baseline_archive, target_dir) + _safe_extract_tar(current_archive, target_dir) + _copy_criterion_sample(criterion_dir=target_dir / "criterion", source_sample=current_tag, target_sample="new") + + +def _apply_current_diff_to_worktree(*, repo_root: Path, worktree: Path) -> None: + diff = run_git_command(["diff", "--binary", "HEAD"], cwd=repo_root).stdout + if diff.strip(): + try: + run_git_command_with_input(["apply", "--binary"], diff, cwd=worktree) + except subprocess.CalledProcessError as exc: + raise RuntimeError(_format_command_failure(["git", "apply", "--binary"], exc)) from exc + + +def _has_current_release_signal_tooling(worktree: Path) -> bool: + justfile = worktree / "justfile" + bench_compare = worktree / "scripts" / "bench_compare.py" + if not justfile.exists() or not bench_compare.exists(): + return False + + justfile_text = _read_text(justfile) + bench_compare_text = _read_text(bench_compare) + return re.search(r"(?m)^bench-latest(?:[ :]|$)", justfile_text) is not None and '"--suite"' in bench_compare_text and '"--scope"' in bench_compare_text + + +def _render_report(*, worktree: Path, report: Path, config: GenerationConfig) -> None: + if _has_current_release_signal_tooling(worktree): + _run_tool( + "uv", + [ + "run", + "bench-compare", + config.baseline_tag, + "--suite", + config.suite, + "--scope", + config.scope, + "--output", + str(report), + ], + cwd=worktree, + timeout=_COMMAND_TIMEOUT_SECONDS, + ) + else: + _run_tool( + "uv", + [ + "run", + "bench-compare", + config.baseline_tag, + "--output", + str(report), + ], + cwd=worktree, + timeout=_COMMAND_TIMEOUT_SECONDS, + ) + + +def _run_benchmarks_and_render_report(*, worktree: Path, report: Path, config: GenerationConfig) -> None: + benchmark_env = _benchmark_env(config.repo_root) + if _has_current_release_signal_tooling(worktree): + _run_tool("just", ["bench-latest"], cwd=worktree, timeout=_BENCH_TIMEOUT_SECONDS, env=benchmark_env) + else: + _run_tool("just", ["bench-exact"], cwd=worktree, timeout=_BENCH_TIMEOUT_SECONDS, env=benchmark_env) + _render_report(worktree=worktree, report=report, config=config) + + +def _generate_report_in_temp_worktree( + *, + config: GenerationConfig, +) -> str: + with tempfile.TemporaryDirectory(prefix="la-stack-performance-") as tmp: + tmp_dir = Path(tmp) + worktree = tmp_dir / "worktree" + report = tmp_dir / f"{config.current_tag}-vs-{config.baseline_tag}.md" + + _run_git(["worktree", "add", "--detach", str(worktree), config.worktree_ref], cwd=config.repo_root) + try: + if config.apply_current_diff: + _apply_current_diff_to_worktree(repo_root=config.repo_root, worktree=worktree) + if config.baseline_source == "github-assets": + _prepare_github_release_assets( + current_tag=config.current_tag, + baseline_tag=config.baseline_tag, + repo_root=config.repo_root, + target_worktree=worktree, + tmp_dir=tmp_dir, + ) + _render_report(worktree=worktree, report=report, config=config) + else: + _prepare_local_release_baseline( + baseline_tag=config.baseline_tag, + repo_root=config.repo_root, + target_worktree=worktree, + tmp_dir=tmp_dir, + ) + _run_benchmarks_and_render_report(worktree=worktree, report=report, config=config) + return _read_text(report) + finally: + try: + _run_git(["worktree", "remove", "--force", str(worktree)], cwd=config.repo_root) + except RuntimeError as exc: + print(f"archive-performance: failed to remove temporary worktree: {exc}", file=sys.stderr) + + +def promote_report( + *, + source: Path, + current: Path, + archive_dir: Path, + expected_current_tag: str, + expected_baseline_tag: str, +) -> ReportId: + """Archive the old committed report and promote *source* as the current one.""" + source_text = _normalize_how_to_update(_read_text(source)) + source_id = parse_report_id(source_text) + expected_source_id = ReportId( + current_tag=normalize_tag(expected_current_tag), + baseline_tag=normalize_tag(expected_baseline_tag), + ) + if source_id != expected_source_id: + msg = ( + "benchmark report does not match requested release pair: " + f"found {source_id.current_tag} vs {source_id.baseline_tag}, " + f"expected {expected_source_id.current_tag} vs {expected_source_id.baseline_tag}" + ) + raise ValueError(msg) + + if current.exists(): + current_text = _normalize_how_to_update(_read_text(current)) + current_id = parse_report_id(current_text) + if current_id != source_id: + archive_path = archive_dir / current_id.archive_name + if not archive_path.exists(): + _write_text(archive_path, current_text) + + _write_text(current, source_text) + update_archive_index(archive_dir) + return source_id + + +def generate_and_promote_worktree_report( + *, + current: Path, + archive_dir: Path, + config: GenerationConfig, +) -> ReportId: + """Generate a comparison in a temp worktree, then promote it.""" + current_tag = normalize_tag(config.current_tag) + baseline_tag = normalize_tag(config.baseline_tag) + config = GenerationConfig( + repo_root=config.repo_root, + current_tag=current_tag, + baseline_tag=baseline_tag, + worktree_ref=config.worktree_ref, + suite=config.suite, + scope=config.scope, + apply_current_diff=config.apply_current_diff, + baseline_source=config.baseline_source, + ) + report_text = _generate_report_in_temp_worktree( + config=config, + ) + with tempfile.NamedTemporaryFile("w", encoding="utf-8", suffix=".md", delete=False) as tmp: + source = Path(tmp.name) + tmp.write(report_text) + try: + return promote_report( + source=source, + current=current, + archive_dir=archive_dir, + expected_current_tag=current_tag, + expected_baseline_tag=baseline_tag, + ) + finally: + if source.exists(): + source.unlink() + + +def generate_worktree_report( + *, + output: Path, + config: GenerationConfig, +) -> ReportId: + """Generate a comparison in a temp worktree and write it to *output*.""" + current_tag = normalize_tag(config.current_tag) + baseline_tag = normalize_tag(config.baseline_tag) + config = GenerationConfig( + repo_root=config.repo_root, + current_tag=current_tag, + baseline_tag=baseline_tag, + worktree_ref=config.worktree_ref, + suite=config.suite, + scope=config.scope, + apply_current_diff=config.apply_current_diff, + baseline_source=config.baseline_source, + ) + report_text = _normalize_how_to_update(_generate_report_in_temp_worktree(config=config)) + report_id = parse_report_id(report_text) + expected = ReportId(current_tag=current_tag, baseline_tag=baseline_tag) + if report_id != expected: + msg = ( + "benchmark report does not match requested release pair: " + f"found {report_id.current_tag} vs {report_id.baseline_tag}, " + f"expected {expected.current_tag} vs {expected.baseline_tag}" + ) + raise ValueError(msg) + _write_text(output, report_text) + return report_id + + +def resolve_archive_request(options: ArchiveRequestOptions) -> ResolvedArchiveRequest: + """Resolve explicit, package-inferred, or latest-published release arguments.""" + current_tag = options.current_tag + baseline_tag = options.baseline_tag + worktree_ref = options.worktree_ref + repo_root = options.repo_root + published_latest = options.published_latest + infer_release = options.infer_release + current_vs_latest = options.current_vs_latest + requested_modes = sum((published_latest, infer_release, current_vs_latest)) + if requested_modes > 1: + msg = "choose only one of --published-latest, --infer-release, or --current-vs-latest" + raise ValueError(msg) + + if published_latest: + if current_tag is not None or baseline_tag is not None: + msg = "do not pass current_tag or baseline_tag with --published-latest" + raise ValueError(msg) + published_pair = _published_release_pair(repo_root) + resolved_worktree_ref = published_pair.current_tag if worktree_ref == "HEAD" else worktree_ref + return ResolvedArchiveRequest( + current_tag=published_pair.current_tag, + baseline_tag=published_pair.baseline_tag, + worktree_ref=resolved_worktree_ref, + tags_to_fetch=(published_pair.current_tag, published_pair.baseline_tag), + ) + + if infer_release: + if current_tag is not None or baseline_tag is not None: + msg = "do not pass current_tag or baseline_tag with --infer-release" + raise ValueError(msg) + inferred_current = _current_package_tag(repo_root) + inferred_baseline = _previous_published_release(repo_root, inferred_current).tag + return ResolvedArchiveRequest( + current_tag=inferred_current, + baseline_tag=inferred_baseline, + worktree_ref=worktree_ref, + tags_to_fetch=(inferred_baseline,), + ) + + if current_vs_latest: + if current_tag is not None or baseline_tag is not None: + msg = "do not pass current_tag or baseline_tag with --current-vs-latest" + raise ValueError(msg) + inferred_current = _current_package_tag(repo_root) + latest = _latest_published_release(repo_root).tag + return ResolvedArchiveRequest( + current_tag=inferred_current, + baseline_tag=latest, + worktree_ref=worktree_ref, + tags_to_fetch=(latest,), + ) + + if current_tag is None or baseline_tag is None: + msg = "current_tag and baseline_tag are required unless an inference mode is used" + raise ValueError(msg) + normalized_current = normalize_tag(current_tag) + normalized_baseline = normalize_tag(baseline_tag) + return ResolvedArchiveRequest( + current_tag=normalized_current, + baseline_tag=normalized_baseline, + worktree_ref=_normalize_worktree_ref_for_tag(worktree_ref, normalized_current), + tags_to_fetch=(normalized_baseline,), + ) + + +def build_parser() -> argparse.ArgumentParser: + """Build the CLI argument parser.""" + parser = argparse.ArgumentParser( + description="Promote a benchmark comparison into docs/PERFORMANCE.md and archive the previous report.", + ) + parser.add_argument("current_tag", nargs="?", help="Release tag for the new report, e.g. v0.4.3") + parser.add_argument("baseline_tag", nargs="?", help="Previous release tag used as the comparison baseline, e.g. v0.4.2") + parser.add_argument( + "--source", + default=_DEFAULT_SOURCE, + help=f"Generated benchmark report to promote (default: {_DEFAULT_SOURCE})", + ) + parser.add_argument( + "--current", + default=_DEFAULT_CURRENT, + help=f"Committed performance report path (default: {_DEFAULT_CURRENT})", + ) + parser.add_argument( + "--output", + default=_DEFAULT_SOURCE, + help=f"Generated report path for --output-only (default: {_DEFAULT_SOURCE})", + ) + parser.add_argument( + "--archive-dir", + default=_DEFAULT_ARCHIVE_DIR, + help=f"Archive directory for older reports (default: {_DEFAULT_ARCHIVE_DIR})", + ) + parser.add_argument( + "--generate-in-temp-worktree", + action="store_true", + help="Generate the comparison in a temporary detached worktree before promoting it.", + ) + parser.add_argument( + "--published-latest", + action="store_true", + help="Infer the latest stable published GitHub release and its previous stable release.", + ) + parser.add_argument( + "--infer-release", + action="store_true", + help="Infer current_tag from Cargo.toml and baseline_tag from the previous stable published release.", + ) + parser.add_argument( + "--current-vs-latest", + action="store_true", + help="Infer current_tag from Cargo.toml and baseline_tag from the latest stable published release.", + ) + parser.add_argument( + "--github-assets", + action="store_true", + help="Compare stored GitHub Release benchmark assets instead of generating the baseline locally.", + ) + parser.add_argument( + "--output-only", + action="store_true", + help="Write the generated report to --output without promoting docs/PERFORMANCE.md.", + ) + parser.add_argument( + "--worktree-ref", + default="HEAD", + help="Git ref to check out in the temporary worktree (default: HEAD).", + ) + parser.add_argument( + "--no-apply-current-diff", + action="store_true", + help="Do not apply the current checkout's tracked diff to the temporary worktree.", + ) + parser.add_argument( + "--suite", + default=_DEFAULT_SUITE, + help=f"Benchmark suite for --generate-in-temp-worktree (default: {_DEFAULT_SUITE})", + ) + parser.add_argument( + "--scope", + default=_DEFAULT_SCOPE, + help=f"Comparison scope for --generate-in-temp-worktree (default: {_DEFAULT_SCOPE})", + ) + return parser + + +def _resolve_cli_paths(root: Path, args: argparse.Namespace) -> ArchivePaths: + source = Path(args.source) + current = Path(args.current) + output = Path(args.output) + archive_dir = Path(args.archive_dir) + if not source.is_absolute(): + source = root / source + if not current.is_absolute(): + current = root / current + if not output.is_absolute(): + output = root / output + if not archive_dir.is_absolute(): + archive_dir = root / archive_dir + return ArchivePaths(source=source, current=current, output=output, archive_dir=archive_dir) + + +def _fetch_required_tags(*, request: ResolvedArchiveRequest, repo_root: Path, include_current: bool) -> None: + tags_to_fetch = request.tags_to_fetch + if include_current and request.current_tag not in tags_to_fetch: + tags_to_fetch = (*tags_to_fetch, request.current_tag) + if tags_to_fetch: + _fetch_release_tags(repo_root=repo_root, tags=list(dict.fromkeys(tags_to_fetch))) + + +def _generation_config(*, args: argparse.Namespace, request: ResolvedArchiveRequest, repo_root: Path) -> GenerationConfig: + return GenerationConfig( + repo_root=repo_root, + current_tag=request.current_tag, + baseline_tag=request.baseline_tag, + worktree_ref=request.worktree_ref, + suite=args.suite, + scope=args.scope, + apply_current_diff=not args.no_apply_current_diff and not args.github_assets, + baseline_source="github-assets" if args.github_assets else "local", + ) + + +def _run_archive_request(*, args: argparse.Namespace, paths: ArchivePaths, request: ResolvedArchiveRequest, repo_root: Path) -> ArchiveResult: + if args.generate_in_temp_worktree: + _fetch_required_tags(request=request, repo_root=repo_root, include_current=args.github_assets) + config = _generation_config(args=args, request=request, repo_root=repo_root) + if args.output_only: + return ArchiveResult( + report_id=generate_worktree_report( + output=paths.output, + config=config, + ), + action="output", + ) + return ArchiveResult( + report_id=generate_and_promote_worktree_report( + current=paths.current, + archive_dir=paths.archive_dir, + config=config, + ), + action="promote-generated", + ) + + if args.output_only: + msg = "--output-only requires --generate-in-temp-worktree" + raise ValueError(msg) + if args.github_assets: + msg = "--github-assets requires --generate-in-temp-worktree" + raise ValueError(msg) + return ArchiveResult( + report_id=promote_report( + source=paths.source, + current=paths.current, + archive_dir=paths.archive_dir, + expected_current_tag=request.current_tag, + expected_baseline_tag=request.baseline_tag, + ), + action="promote-source", + ) + + +def main(argv: list[str] | None = None) -> int: + """CLI entry point.""" + args = build_parser().parse_args(argv) + root = Path.cwd() + paths = _resolve_cli_paths(root, args) + + try: + request = resolve_archive_request( + ArchiveRequestOptions( + current_tag=args.current_tag, + baseline_tag=args.baseline_tag, + published_latest=args.published_latest, + infer_release=args.infer_release, + current_vs_latest=args.current_vs_latest, + worktree_ref=args.worktree_ref, + repo_root=root, + ) + ) + result = _run_archive_request(args=args, paths=paths, request=request, repo_root=root) + except (ValueError, RuntimeError, FileNotFoundError, subprocess.CalledProcessError) as exc: + print(f"archive-performance: {exc}", file=sys.stderr) + return 1 + except Exception: + raise + + if result.action == "output": + print(f"Generated benchmark report in a temporary worktree and wrote it to {paths.output}") + elif result.action == "promote-generated": + print(f"Generated benchmark report in a temporary worktree and promoted it to {paths.current}") + else: + print(f"Promoted {paths.source} to {paths.current}") + print(f"Current performance report: {result.report_id.current_tag} vs {result.report_id.baseline_tag}") + print(f"Archive directory: {paths.archive_dir}") + return 0 + + +if __name__ == "__main__": + raise SystemExit(main()) diff --git a/scripts/bench_compare.py b/scripts/bench_compare.py index 24dcea3..df9b689 100644 --- a/scripts/bench_compare.py +++ b/scripts/bench_compare.py @@ -707,31 +707,24 @@ def _generate_markdown( [ "## How to Update", "", - "```bash", - "# Save a full last-release baseline", - "just bench-save-last", + "Local performance reports are generated in isolated temporary worktrees:", "", - "# Run the cheaper latest measurements and compare against last", - "just bench-latest-vs-last", + "```bash", + "# Local development: compare the current tree with the latest release", + "just performance-local", "", - "# Re-render the report from existing Criterion output", - "just bench-compare", + "# Release PR: update docs/PERFORMANCE.md and archive the previous report", + "just performance-release", "", - "# Generate a snapshot without comparison", - "uv run bench-compare --snapshot", - "```", + "# GitHub Actions release assets", + "just performance-github-assets", "", - "To compare against a *previous* release, check out the old tag first:", - "", - "```bash", - "git checkout v0.2.0", - "just bench-save-baseline v0.2.0", - "git checkout main", - "just bench-latest", - "just bench-compare v0.2.0", + "# Explicit repair", + "just performance-release ", "```", "", - "Baselines persist in `target/criterion/` across checkouts (but not `cargo clean`).", + "`just performance-local` writes `target/bench-reports/performance.md`.", + "`just performance-github-assets` writes `target/bench-reports/github-assets-performance.md`.", "", "See `docs/BENCHMARKING.md` for the full comparison workflow.", ] diff --git a/scripts/tests/test_archive_performance.py b/scripts/tests/test_archive_performance.py new file mode 100644 index 0000000..41d5329 --- /dev/null +++ b/scripts/tests/test_archive_performance.py @@ -0,0 +1,1032 @@ +"""Tests for archive_performance.py.""" + +from __future__ import annotations + +import io +import subprocess +import tarfile +from pathlib import Path +from types import SimpleNamespace +from typing import TYPE_CHECKING, Any + +import pytest + +import archive_performance +from archive_performance import GenerationConfig, generate_and_promote_worktree_report, main, normalize_tag, parse_report_id, promote_report + +if TYPE_CHECKING: + from collections.abc import Sequence + +type RunnerCall = tuple[str, tuple[str, ...], Path | None] + + +def _result(stdout: str = "") -> SimpleNamespace: + return SimpleNamespace(stdout=stdout) + + +def _report(version: str, baseline: str) -> str: + return ( + "# Benchmark Performance\n\n" + f"**la-stack** v{version} · `abc1234` (release/test) · 2026-06-08 12:00:00 UTC\n" + "**Statistic**: median\n" + "**Suite**: all\n" + "**Scope**: release-signal\n\n" + "## Benchmark Results\n\n" + f"Comparison against baseline **{baseline}**:\n\n" + "Negative change = faster. Speedup > 1.00x = improvement.\n\n" + "## Exact arithmetic\n\n" + "| Benchmark | Baseline | Latest | Change | Speedup |\n" + "|-----------|---------:|-------:|-------:|--------:|\n" + "| det_exact | 1.0 ns | 0.9 ns | -10.0% | 1.11x |\n" + ) + + +def _normalized_report(version: str, baseline: str) -> str: + return archive_performance._normalize_how_to_update(_report(version, baseline)) + + +def _legacy_report(version: str, baseline: str) -> str: + return ( + _report(version, baseline) + + "\n" + + "## How to Update\n\n" + + "```bash\n" + + "git checkout v0.2.0\n" + + "just bench-save-baseline v0.2.0\n" + + "git checkout main\n" + + "just bench-compare v0.2.0\n" + + "```\n" + ) + + +def _write_baseline_archive(path: Path) -> None: + tag = path.name.removeprefix("la-stack-").removesuffix("-criterion-baseline.tar.gz") + fixture_dir = path.parent / f"baseline-fixture-{tag}" + criterion_dir = fixture_dir / "criterion" + criterion_dir.mkdir(parents=True) + (criterion_dir / "placeholder.txt").write_text("baseline\n", encoding="utf-8") + sample_dir = criterion_dir / "exact_d2" / "det_exact" / tag + sample_dir.mkdir(parents=True) + (sample_dir / "estimates.json").write_text('{"median":{"point_estimate":1.0}}\n', encoding="utf-8") + with tarfile.open(path, "w:gz") as tar: + tar.add(criterion_dir, arcname="criterion") + + +def _write_unsafe_baseline_archive(path: Path) -> None: + payload = b"escape\n" + info = tarfile.TarInfo("../escape.txt") + info.size = len(payload) + with tarfile.open(path, "w:gz") as tar: + tar.addfile(info, io.BytesIO(payload)) + + +def _write_current_benchmark_tooling(worktree: Path) -> None: + (worktree / "scripts").mkdir(parents=True, exist_ok=True) + (worktree / "justfile").write_text("bench-latest: bench-vs-linalg-la-stack bench-exact\n", encoding="utf-8") + (worktree / "scripts" / "bench_compare.py").write_text('parser.add_argument("--suite")\nparser.add_argument("--scope")\n', encoding="utf-8") + + +def _write_legacy_benchmark_tooling(worktree: Path) -> None: + (worktree / "scripts").mkdir(parents=True, exist_ok=True) + (worktree / "justfile").write_text("bench-exact:\n", encoding="utf-8") + (worktree / "scripts" / "bench_compare.py").write_text('parser.add_argument("--output")\n', encoding="utf-8") + + +def test_normalize_tag_adds_leading_v() -> None: + assert normalize_tag("0.4.2") == "v0.4.2" + assert normalize_tag("v0.4.2") == "v0.4.2" + assert normalize_tag("v1.2.3-rc.1+build.7") == "v1.2.3-rc.1+build.7" + + +def test_normalize_tag_rejects_non_semver_baseline_names() -> None: + with pytest.raises(ValueError, match="semver tag"): + normalize_tag("last") + + +def test_parse_report_id_reads_current_and_baseline_tags() -> None: + report_id = parse_report_id(_report("0.4.2", "v0.4.1")) + + assert report_id.current_tag == "v0.4.2" + assert report_id.baseline_tag == "v0.4.1" + assert report_id.archive_name == "v0.4.2-vs-v0.4.1.md" + + +def test_published_release_pair_discovers_latest_stable_semver_pair(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + def fake_run_safe(command: str, args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + assert command == "gh" + assert args == [ + "release", + "list", + "--json", + "tagName,isDraft,isPrerelease,publishedAt", + "--limit", + "100", + ] + assert cwd == tmp_path + return _result( + "[" + '{"tagName":"v0.4.2","isDraft":false,"isPrerelease":false,"publishedAt":"2026-01-01T00:00:00Z"},' + '{"tagName":"v0.4.10","isDraft":false,"isPrerelease":false,"publishedAt":"2026-04-01T00:00:00Z"},' + '{"tagName":"v0.4.11-rc.1","isDraft":false,"isPrerelease":true,"publishedAt":"2026-06-01T00:00:00Z"},' + '{"tagName":"v0.4.11","isDraft":true,"isPrerelease":false,"publishedAt":"2026-06-02T00:00:00Z"},' + '{"tagName":"not-semver","isDraft":false,"isPrerelease":false,"publishedAt":"2026-06-03T00:00:00Z"},' + '{"tagName":"v0.4.3","isDraft":false,"isPrerelease":false,"publishedAt":"2026-03-01T00:00:00Z"}' + "]" + ) + + monkeypatch.setattr(archive_performance, "run_safe_command", fake_run_safe) + + report_id = archive_performance._published_release_pair(tmp_path) + + assert report_id.current_tag == "v0.4.10" + assert report_id.baseline_tag == "v0.4.3" + + +def test_published_release_pair_uses_latest_published_release_not_highest_semver(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + def fake_run_safe(command: str, args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + assert command == "gh" + assert cwd == tmp_path + return _result( + "[" + '{"tagName":"v0.5.0","isDraft":false,"isPrerelease":false,"publishedAt":"2026-01-01T00:00:00Z"},' + '{"tagName":"v0.4.9","isDraft":false,"isPrerelease":false,"publishedAt":"2026-02-01T00:00:00Z"},' + '{"tagName":"v0.4.8","isDraft":false,"isPrerelease":false,"publishedAt":"2025-12-01T00:00:00Z"}' + "]" + ) + + monkeypatch.setattr(archive_performance, "run_safe_command", fake_run_safe) + + report_id = archive_performance._published_release_pair(tmp_path) + + assert report_id.current_tag == "v0.4.9" + assert report_id.baseline_tag == "v0.4.8" + + +def test_resolve_archive_request_infer_release_uses_package_version_and_previous_release(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + (tmp_path / "Cargo.toml").write_text('[package]\nversion = "0.4.3"\n', encoding="utf-8") + + def fake_run_safe(command: str, args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + assert command == "gh" + assert args[:2] == ["release", "list"] + assert cwd == tmp_path + return _result( + "[" + '{"tagName":"v0.4.1","isDraft":false,"isPrerelease":false,"publishedAt":"2026-01-01T00:00:00Z"},' + '{"tagName":"v0.4.2","isDraft":false,"isPrerelease":false,"publishedAt":"2026-02-01T00:00:00Z"}' + "]" + ) + + monkeypatch.setattr(archive_performance, "run_safe_command", fake_run_safe) + + request = archive_performance.resolve_archive_request( + archive_performance.ArchiveRequestOptions( + current_tag=None, + baseline_tag=None, + published_latest=False, + infer_release=True, + current_vs_latest=False, + worktree_ref="HEAD", + repo_root=tmp_path, + ) + ) + + assert request.current_tag == "v0.4.3" + assert request.baseline_tag == "v0.4.2" + assert request.worktree_ref == "HEAD" + assert request.tags_to_fetch == ("v0.4.2",) + + +def test_resolve_archive_request_current_vs_latest_uses_package_version_and_latest_release(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + (tmp_path / "Cargo.toml").write_text('[package]\nversion = "0.4.3"\n', encoding="utf-8") + + def fake_run_safe(command: str, args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + assert command == "gh" + assert args[:2] == ["release", "list"] + assert cwd == tmp_path + return _result( + "[" + '{"tagName":"v0.4.1","isDraft":false,"isPrerelease":false,"publishedAt":"2026-01-01T00:00:00Z"},' + '{"tagName":"v0.4.2","isDraft":false,"isPrerelease":false,"publishedAt":"2026-02-01T00:00:00Z"}' + "]" + ) + + monkeypatch.setattr(archive_performance, "run_safe_command", fake_run_safe) + + request = archive_performance.resolve_archive_request( + archive_performance.ArchiveRequestOptions( + current_tag=None, + baseline_tag=None, + published_latest=False, + infer_release=False, + current_vs_latest=True, + worktree_ref="HEAD", + repo_root=tmp_path, + ) + ) + + assert request.current_tag == "v0.4.3" + assert request.baseline_tag == "v0.4.2" + assert request.worktree_ref == "HEAD" + assert request.tags_to_fetch == ("v0.4.2",) + + +def test_benchmark_env_uses_current_repo_toolchain(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("RUSTUP_TOOLCHAIN", raising=False) + (tmp_path / "rust-toolchain.toml").write_text('[toolchain]\nchannel = "1.96.0"\n', encoding="utf-8") + + env = archive_performance._benchmark_env(tmp_path) + + assert env is not None + assert env["RUSTUP_TOOLCHAIN"] == "1.96.0" + + +def test_benchmark_env_respects_existing_toolchain_override(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv("RUSTUP_TOOLCHAIN", "nightly") + (tmp_path / "rust-toolchain.toml").write_text('[toolchain]\nchannel = "1.96.0"\n', encoding="utf-8") + + assert archive_performance._benchmark_env(tmp_path) is None + + +def test_promote_report_archives_previous_and_updates_sorted_index(tmp_path: Path) -> None: + source = tmp_path / "target" / "bench-reports" / "performance.md" + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + + source.parent.mkdir(parents=True) + current.parent.mkdir(parents=True) + archive_dir.mkdir(parents=True) + source.write_text(_report("0.4.2", "v0.4.1"), encoding="utf-8") + current.write_text(_report("0.4.1", "v0.4.0"), encoding="utf-8") + (archive_dir / "v0.3.1-vs-v0.3.0.md").write_text(_report("0.3.1", "v0.3.0"), encoding="utf-8") + + promoted = promote_report( + source=source, + current=current, + archive_dir=archive_dir, + expected_current_tag="v0.4.2", + expected_baseline_tag="v0.4.1", + ) + + assert promoted.archive_name == "v0.4.2-vs-v0.4.1.md" + assert current.read_text(encoding="utf-8") == _normalized_report("0.4.2", "v0.4.1") + assert (archive_dir / "v0.4.1-vs-v0.4.0.md").read_text(encoding="utf-8") == _normalized_report("0.4.1", "v0.4.0") + assert (archive_dir / "README.md").read_text(encoding="utf-8") == ( + "# Archived Performance Reports\n\n" + "Older release-to-release benchmark comparisons are archived here.\n" + "`docs/PERFORMANCE.md` contains the latest curated comparison.\n\n" + "- [v0.3.1-vs-v0.3.0](v0.3.1-vs-v0.3.0.md)\n" + "- [v0.4.1-vs-v0.4.0](v0.4.1-vs-v0.4.0.md)\n" + ) + + +def test_promote_report_is_idempotent_for_same_release_pair(tmp_path: Path) -> None: + source = tmp_path / "performance-new.md" + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + + source.write_text(_report("0.4.2", "v0.4.1"), encoding="utf-8") + current.parent.mkdir(parents=True) + current.write_text(_report("0.4.2", "v0.4.1"), encoding="utf-8") + + promote_report( + source=source, + current=current, + archive_dir=archive_dir, + expected_current_tag="v0.4.2", + expected_baseline_tag="v0.4.1", + ) + + assert not (archive_dir / "v0.4.2-vs-v0.4.1.md").exists() + assert "- No archived performance reports yet." in (archive_dir / "README.md").read_text(encoding="utf-8") + + +def test_promote_report_does_not_overwrite_existing_archive(tmp_path: Path) -> None: + source = tmp_path / "performance-new.md" + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + archived = archive_dir / "v0.4.1-vs-v0.4.0.md" + + source.write_text(_report("0.4.2", "v0.4.1"), encoding="utf-8") + current.parent.mkdir(parents=True) + current.write_text(_report("0.4.1", "v0.4.0"), encoding="utf-8") + archive_dir.mkdir(parents=True) + archived.write_text("already archived\n", encoding="utf-8") + + promote_report( + source=source, + current=current, + archive_dir=archive_dir, + expected_current_tag="v0.4.2", + expected_baseline_tag="v0.4.1", + ) + + assert archived.read_text(encoding="utf-8") == "already archived\n" + + +def test_promote_report_rejects_unexpected_release_pair(tmp_path: Path) -> None: + source = tmp_path / "performance-new.md" + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + source.write_text(_report("0.4.2", "v0.4.1"), encoding="utf-8") + + with pytest.raises(ValueError, match="does not match requested release pair"): + promote_report( + source=source, + current=current, + archive_dir=archive_dir, + expected_current_tag="v0.4.3", + expected_baseline_tag="v0.4.2", + ) + + +def test_promote_report_rewrites_legacy_update_instructions(tmp_path: Path) -> None: + source = tmp_path / "performance-new.md" + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + source.write_text(_legacy_report("0.4.3", "v0.4.2"), encoding="utf-8") + current.parent.mkdir(parents=True) + current.write_text(_legacy_report("0.4.2", "v0.4.1"), encoding="utf-8") + + promote_report( + source=source, + current=current, + archive_dir=archive_dir, + expected_current_tag="v0.4.3", + expected_baseline_tag="v0.4.2", + ) + + current_text = current.read_text(encoding="utf-8") + archived_text = (archive_dir / "v0.4.2-vs-v0.4.1.md").read_text(encoding="utf-8") + assert "just performance-local" in current_text + assert "just performance-release" in current_text + assert "just performance-github-assets" in current_text + assert "just performance-release " in current_text + assert "git checkout" not in current_text + assert "just performance-local" in archived_text + assert "just performance-github-assets" in archived_text + assert "git checkout" not in archived_text + + +def test_main_promotes_generated_report_to_docs_performance(tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None: + source = tmp_path / "target" / "bench-reports" / "performance.md" + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + generated = _report("0.4.3", "v0.4.2") + + source.parent.mkdir(parents=True) + source.write_text(generated, encoding="utf-8") + current.parent.mkdir(parents=True) + current.write_text(_report("0.4.2", "v0.4.1"), encoding="utf-8") + + rc = main( + [ + "v0.4.3", + "v0.4.2", + "--source", + str(source), + "--current", + str(current), + "--archive-dir", + str(archive_dir), + ] + ) + + assert rc == 0 + assert current.read_text(encoding="utf-8") == archive_performance._normalize_how_to_update(generated) + assert (archive_dir / "v0.4.2-vs-v0.4.1.md").exists() + assert "Current performance report: v0.4.3 vs v0.4.2" in capsys.readouterr().out + + +def test_main_reports_release_pair_mismatch_to_stderr(tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None: + source = tmp_path / "target" / "bench-reports" / "performance.md" + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + source.parent.mkdir(parents=True) + source.write_text(_report("0.4.3", "v0.4.2"), encoding="utf-8") + + rc = main( + [ + "v0.4.4", + "v0.4.3", + "--source", + str(source), + "--current", + str(current), + "--archive-dir", + str(archive_dir), + ] + ) + + captured = capsys.readouterr() + assert rc == 1 + assert "does not match requested release pair" in captured.err + assert not current.exists() + + +def test_main_reraises_unexpected_errors(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + def fail_unexpected(*, args: object, paths: object, request: object, repo_root: Path) -> archive_performance.ArchiveResult: + msg = "unexpected test failure" + raise AssertionError(msg) + + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(archive_performance, "_run_archive_request", fail_unexpected) + + with pytest.raises(AssertionError, match="unexpected test failure"): + main(["v0.4.3", "v0.4.2"]) + + +def test_main_generates_report_in_temp_worktree(tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None: + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + calls: list[RunnerCall] = [] + + def fake_run_git(args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git", tuple(args), cwd)) + if args[:3] == ["worktree", "add", "--detach"]: + worktree = Path(args[3]) + worktree.mkdir(parents=True) + _write_current_benchmark_tooling(worktree) + return _result() + + def fake_run_git_with_input(args: Sequence[str], input_data: str, cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git-stdin", tuple(args), cwd)) + return _result() + + def fake_run_safe(command: str, args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append((command, tuple(args), cwd)) + if command == "just" and args == ["bench-save-baseline", "v0.4.2"]: + assert cwd is not None + criterion_dir = cwd / "target" / "criterion" + criterion_dir.mkdir(parents=True) + (criterion_dir / "baseline.txt").write_text("baseline\n", encoding="utf-8") + if command == "uv": + output = Path(args[args.index("--output") + 1]) + output.write_text(_report("0.4.3", "v0.4.2"), encoding="utf-8") + return _result() + + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(archive_performance, "run_git_command", fake_run_git) + monkeypatch.setattr(archive_performance, "run_git_command_with_input", fake_run_git_with_input) + monkeypatch.setattr(archive_performance, "run_safe_command", fake_run_safe) + + rc = main( + [ + "v0.4.3", + "v0.4.2", + "--current", + str(current), + "--archive-dir", + str(archive_dir), + "--generate-in-temp-worktree", + "--worktree-ref", + "v0.4.3", + "--no-apply-current-diff", + "--suite", + "exact", + "--scope", + "release-signal", + ] + ) + + captured = capsys.readouterr() + assert rc == 0 + assert current.read_text(encoding="utf-8") == _normalized_report("0.4.3", "v0.4.2") + assert "Generated benchmark report in a temporary worktree" in captured.out + assert "target/bench-reports/performance.md" not in captured.out + assert any(kind == "git" and args[:3] == ("worktree", "add", "--detach") and args[4] == "v0.4.3" for kind, args, _ in calls) + assert any(kind == "uv" and "--suite" in args and args[args.index("--suite") + 1] == "exact" for kind, args, _ in calls) + assert not any(kind == "git" and args == ("diff", "--binary", "HEAD") for kind, args, _ in calls) + assert not any(kind == "git-stdin" for kind, _, _ in calls) + + +def test_temp_worktree_is_removed_when_benchmark_command_fails(tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None: + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + calls: list[RunnerCall] = [] + + def fake_run_git(args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git", tuple(args), cwd)) + if args[:2] == ["fetch", "origin"]: + return _result() + if args[:3] == ["worktree", "add", "--detach"]: + worktree = Path(args[3]) + worktree.mkdir(parents=True) + _write_current_benchmark_tooling(worktree) + return _result() + + def fake_run_git_with_input(args: Sequence[str], input_data: str, cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git-stdin", tuple(args), cwd)) + return _result() + + def fake_run_safe(command: str, args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append((command, tuple(args), cwd)) + if command == "just" and args == ["bench-save-baseline", "v0.4.2"]: + assert cwd is not None + criterion_dir = cwd / "target" / "criterion" + criterion_dir.mkdir(parents=True) + (criterion_dir / "baseline.txt").write_text("baseline\n", encoding="utf-8") + if command == "just" and args == ["bench-latest"]: + raise subprocess.CalledProcessError(42, [command, *args], output="bench stdout", stderr="bench stderr") + return _result() + + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(archive_performance, "run_git_command", fake_run_git) + monkeypatch.setattr(archive_performance, "run_git_command_with_input", fake_run_git_with_input) + monkeypatch.setattr(archive_performance, "run_safe_command", fake_run_safe) + + rc = main( + [ + "v0.4.3", + "v0.4.2", + "--current", + str(current), + "--archive-dir", + str(archive_dir), + "--generate-in-temp-worktree", + "--worktree-ref", + "HEAD", + "--no-apply-current-diff", + ] + ) + + captured = capsys.readouterr() + assert rc == 1 + assert "command failed (42): just bench-latest" in captured.err + assert "bench stderr" in captured.err + assert not current.exists() + assert any(kind == "git" and args[:3] == ("worktree", "remove", "--force") for kind, args, _ in calls) + + +def test_generate_report_rejects_unsafe_baseline_archive(tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None: + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + calls: list[RunnerCall] = [] + + def fake_run_git(args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git", tuple(args), cwd)) + if args[:3] == ["worktree", "add", "--detach"]: + worktree = Path(args[3]) + worktree.mkdir(parents=True) + _write_current_benchmark_tooling(worktree) + return _result() + + def fake_run_git_with_input(args: Sequence[str], input_data: str, cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git-stdin", tuple(args), cwd)) + return _result() + + def fake_run_safe(command: str, args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append((command, tuple(args), cwd)) + if command == "gh": + download_dir = Path(args[args.index("--dir") + 1]) + tag = args[2] + _write_unsafe_baseline_archive(download_dir / f"la-stack-{tag}-criterion-baseline.tar.gz") + return _result() + + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(archive_performance, "run_git_command", fake_run_git) + monkeypatch.setattr(archive_performance, "run_git_command_with_input", fake_run_git_with_input) + monkeypatch.setattr(archive_performance, "run_safe_command", fake_run_safe) + + rc = main( + [ + "v0.4.3", + "v0.4.2", + "--current", + str(current), + "--archive-dir", + str(archive_dir), + "--generate-in-temp-worktree", + "--worktree-ref", + "HEAD", + "--no-apply-current-diff", + "--github-assets", + ] + ) + + captured = capsys.readouterr() + assert rc == 1 + assert "refusing to extract unsafe archive member '../escape.txt'" in captured.err + assert not (tmp_path / "escape.txt").exists() + assert not current.exists() + assert not any(kind in {"just", "uv"} for kind, _, _ in calls) + assert any(kind == "git" and args[:3] == ("worktree", "remove", "--force") for kind, args, _ in calls) + + +def test_generate_report_generates_release_baseline_locally(tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None: + monkeypatch.delenv("RUSTUP_TOOLCHAIN", raising=False) + (tmp_path / "rust-toolchain.toml").write_text('[toolchain]\nchannel = "1.96.0"\n', encoding="utf-8") + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + calls: list[RunnerCall] = [] + + def fake_run_git(args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git", tuple(args), cwd)) + if args[:3] == ["worktree", "add", "--detach"]: + worktree = Path(args[3]) + worktree.mkdir(parents=True) + if worktree.name == "baseline-worktree": + _write_legacy_benchmark_tooling(worktree) + else: + _write_current_benchmark_tooling(worktree) + return _result() + + def fake_run_git_with_input(args: Sequence[str], input_data: str, cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git-stdin", tuple(args), cwd)) + return _result() + + def fake_run_safe(command: str, args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append((command, tuple(args), cwd)) + if command == "just" and args == ["bench-save-baseline", "v0.4.2"]: + assert kwargs["env"]["RUSTUP_TOOLCHAIN"] == "1.96.0" + assert cwd is not None + criterion_dir = cwd / "target" / "criterion" + criterion_dir.mkdir(parents=True) + (criterion_dir / "baseline.txt").write_text("baseline\n", encoding="utf-8") + if command == "just" and args == ["bench-latest"]: + assert kwargs["env"]["RUSTUP_TOOLCHAIN"] == "1.96.0" + if command == "uv": + output = Path(args[args.index("--output") + 1]) + output.write_text(_report("0.4.3", "v0.4.2"), encoding="utf-8") + return _result() + + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(archive_performance, "run_git_command", fake_run_git) + monkeypatch.setattr(archive_performance, "run_git_command_with_input", fake_run_git_with_input) + monkeypatch.setattr(archive_performance, "run_safe_command", fake_run_safe) + + rc = main( + [ + "v0.4.3", + "v0.4.2", + "--current", + str(current), + "--archive-dir", + str(archive_dir), + "--generate-in-temp-worktree", + "--worktree-ref", + "HEAD", + "--no-apply-current-diff", + ] + ) + + captured = capsys.readouterr() + assert rc == 0 + assert captured.err == "" + assert current.read_text(encoding="utf-8") == _normalized_report("0.4.3", "v0.4.2") + assert not any(kind == "gh" for kind, _, _ in calls) + assert any(kind == "just" and args == ("bench-save-baseline", "v0.4.2") for kind, args, _ in calls) + assert any(kind == "just" and args == ("bench-latest",) for kind, args, _ in calls) + assert any(kind == "uv" and "--suite" in args for kind, args, _ in calls) + assert sum(1 for kind, args, _ in calls if kind == "git" and args[:3] == ("worktree", "remove", "--force")) == 2 + + +def test_main_generates_latest_published_report_from_github_releases(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + calls: list[RunnerCall] = [] + + def fake_run_git(args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git", tuple(args), cwd)) + if args[:3] == ["worktree", "add", "--detach"]: + worktree = Path(args[3]) + worktree.mkdir(parents=True) + _write_current_benchmark_tooling(worktree) + return _result() + + def fake_run_git_with_input(args: Sequence[str], input_data: str, cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git-stdin", tuple(args), cwd)) + return _result() + + def fake_run_safe(command: str, args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append((command, tuple(args), cwd)) + if command == "gh" and args[:2] == ["release", "list"]: + return _result( + "[" + '{"tagName":"v0.4.2","isDraft":false,"isPrerelease":false,"publishedAt":"2026-01-01T00:00:00Z"},' + '{"tagName":"v0.4.3","isDraft":false,"isPrerelease":false,"publishedAt":"2026-02-01T00:00:00Z"}' + "]" + ) + if command == "gh": + download_dir = Path(args[args.index("--dir") + 1]) + tag = args[2] + _write_baseline_archive(download_dir / f"la-stack-{tag}-criterion-baseline.tar.gz") + if command == "uv": + output = Path(args[args.index("--output") + 1]) + output.write_text(_report("0.4.3", "v0.4.2"), encoding="utf-8") + return _result() + + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(archive_performance, "run_git_command", fake_run_git) + monkeypatch.setattr(archive_performance, "run_git_command_with_input", fake_run_git_with_input) + monkeypatch.setattr(archive_performance, "run_safe_command", fake_run_safe) + + rc = main( + [ + "--current", + str(current), + "--archive-dir", + str(archive_dir), + "--published-latest", + "--github-assets", + "--generate-in-temp-worktree", + "--no-apply-current-diff", + ] + ) + + assert rc == 0 + assert current.read_text(encoding="utf-8") == _normalized_report("0.4.3", "v0.4.2") + assert any( + kind == "git" + and args + == ( + "fetch", + "origin", + "refs/tags/v0.4.3:refs/tags/v0.4.3", + "refs/tags/v0.4.2:refs/tags/v0.4.2", + ) + for kind, args, _ in calls + ) + fetch_index = next(index for index, (kind, args, _) in enumerate(calls) if kind == "git" and args[:2] == ("fetch", "origin")) + worktree_index = next(index for index, (kind, args, _) in enumerate(calls) if kind == "git" and args[:3] == ("worktree", "add", "--detach")) + assert fetch_index < worktree_index + assert any(kind == "git" and args[:3] == ("worktree", "add", "--detach") and args[4] == "v0.4.3" for kind, args, _ in calls) + + +def test_main_normalizes_explicit_bare_tags_before_fetching_and_checkout(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + output = tmp_path / "target" / "bench-reports" / "github-assets-performance.md" + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + calls: list[RunnerCall] = [] + + def fake_run_git(args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git", tuple(args), cwd)) + if args[:3] == ["worktree", "add", "--detach"]: + worktree = Path(args[3]) + worktree.mkdir(parents=True) + _write_current_benchmark_tooling(worktree) + return _result() + + def fake_run_git_with_input(args: Sequence[str], input_data: str, cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git-stdin", tuple(args), cwd)) + return _result() + + def fake_run_safe(command: str, args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append((command, tuple(args), cwd)) + if command == "gh": + download_dir = Path(args[args.index("--dir") + 1]) + tag = args[2] + _write_baseline_archive(download_dir / f"la-stack-{tag}-criterion-baseline.tar.gz") + if command == "uv": + report = Path(args[args.index("--output") + 1]) + report.write_text(_report("0.4.3", "v0.4.2"), encoding="utf-8") + return _result() + + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(archive_performance, "run_git_command", fake_run_git) + monkeypatch.setattr(archive_performance, "run_git_command_with_input", fake_run_git_with_input) + monkeypatch.setattr(archive_performance, "run_safe_command", fake_run_safe) + + rc = main( + [ + "0.4.3", + "0.4.2", + "--current", + str(current), + "--archive-dir", + str(archive_dir), + "--github-assets", + "--generate-in-temp-worktree", + "--worktree-ref", + "0.4.3", + "--output-only", + "--output", + str(output), + ] + ) + + assert rc == 0 + assert output.read_text(encoding="utf-8") == _normalized_report("0.4.3", "v0.4.2") + assert not current.exists() + assert any( + kind == "git" + and args + == ( + "fetch", + "origin", + "refs/tags/v0.4.2:refs/tags/v0.4.2", + "refs/tags/v0.4.3:refs/tags/v0.4.3", + ) + for kind, args, _ in calls + ) + fetch_index = next(index for index, (kind, args, _) in enumerate(calls) if kind == "git" and args[:2] == ("fetch", "origin")) + worktree_index = next(index for index, (kind, args, _) in enumerate(calls) if kind == "git" and args[:3] == ("worktree", "add", "--detach")) + assert fetch_index < worktree_index + assert any(kind == "git" and args[:3] == ("worktree", "add", "--detach") and args[4] == "v0.4.3" for kind, args, _ in calls) + assert any(kind == "gh" and args[:3] == ("release", "download", "v0.4.2") for kind, args, _ in calls) + assert any(kind == "gh" and args[:3] == ("release", "download", "v0.4.3") for kind, args, _ in calls) + + +def test_main_published_latest_fetch_failure_stops_before_worktree(tmp_path: Path, monkeypatch: pytest.MonkeyPatch, capsys: pytest.CaptureFixture[str]) -> None: + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + calls: list[RunnerCall] = [] + + def fake_run_git(args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git", tuple(args), cwd)) + if args[:2] == ["fetch", "origin"]: + raise subprocess.CalledProcessError(128, ["git", *args], output="fetch stdout", stderr="missing tag") + if args[:3] == ["worktree", "add", "--detach"]: + worktree = Path(args[3]) + worktree.mkdir(parents=True) + _write_current_benchmark_tooling(worktree) + return _result() + + def fake_run_git_with_input(args: Sequence[str], input_data: str, cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git-stdin", tuple(args), cwd)) + return _result() + + def fake_run_safe(command: str, args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append((command, tuple(args), cwd)) + if command == "gh" and args[:2] == ["release", "list"]: + return _result( + "[" + '{"tagName":"v0.4.2","isDraft":false,"isPrerelease":false,"publishedAt":"2026-01-01T00:00:00Z"},' + '{"tagName":"v0.4.3","isDraft":false,"isPrerelease":false,"publishedAt":"2026-02-01T00:00:00Z"}' + "]" + ) + return _result() + + monkeypatch.chdir(tmp_path) + monkeypatch.setattr(archive_performance, "run_git_command", fake_run_git) + monkeypatch.setattr(archive_performance, "run_git_command_with_input", fake_run_git_with_input) + monkeypatch.setattr(archive_performance, "run_safe_command", fake_run_safe) + + rc = main( + [ + "--current", + str(current), + "--archive-dir", + str(archive_dir), + "--published-latest", + "--generate-in-temp-worktree", + "--no-apply-current-diff", + ] + ) + + captured = capsys.readouterr() + assert rc == 1 + assert "command failed (128): git fetch origin" in captured.err + assert "missing tag" in captured.err + assert not current.exists() + assert not any(kind == "git" and args[:3] == ("worktree", "add", "--detach") for kind, args, _ in calls) + assert not any(kind in {"just", "uv"} for kind, _, _ in calls) + assert not any(kind == "git-stdin" for kind, _, _ in calls) + + +def test_failed_atomic_replace_preserves_existing_report(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + source = tmp_path / "performance-new.md" + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + original = _report("0.4.2", "v0.4.1") + + source.write_text(_report("0.4.3", "v0.4.2"), encoding="utf-8") + current.parent.mkdir(parents=True) + current.write_text(original, encoding="utf-8") + + def fail_replace(src: Path, dst: Path) -> None: + msg = f"simulated replace failure for {dst}" + raise OSError(msg) + + monkeypatch.setattr(archive_performance, "_replace_file", fail_replace) + + with pytest.raises(OSError, match="simulated replace failure"): + promote_report( + source=source, + current=current, + archive_dir=archive_dir, + expected_current_tag="v0.4.3", + expected_baseline_tag="v0.4.2", + ) + + assert current.read_text(encoding="utf-8") == original + assert not list(current.parent.glob(".PERFORMANCE.md.*.tmp")) + + +def test_generate_and_promote_uses_temp_worktree_and_current_diff(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.delenv("RUSTUP_TOOLCHAIN", raising=False) + (tmp_path / "rust-toolchain.toml").write_text('[toolchain]\nchannel = "1.96.0"\n', encoding="utf-8") + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + current.parent.mkdir(parents=True) + current.write_text(_report("0.4.2", "v0.4.1"), encoding="utf-8") + calls: list[RunnerCall] = [] + + def fake_run_git(args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git", tuple(args), cwd)) + if args[:3] == ["worktree", "add", "--detach"]: + worktree = Path(args[3]) + worktree.mkdir(parents=True) + _write_current_benchmark_tooling(worktree) + if args == ["diff", "--binary", "HEAD"]: + return _result("diff --git a/README.md b/README.md\n") + return _result() + + def fake_run_git_with_input(args: Sequence[str], input_data: str, cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git-stdin", tuple(args), cwd)) + assert "diff --git" in input_data + return _result() + + def fake_run_safe(command: str, args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append((command, tuple(args), cwd)) + if command == "just" and args == ["bench-save-baseline", "v0.4.2"]: + assert kwargs["env"]["RUSTUP_TOOLCHAIN"] == "1.96.0" + assert cwd is not None + criterion_dir = cwd / "target" / "criterion" + criterion_dir.mkdir(parents=True) + (criterion_dir / "baseline.txt").write_text("baseline\n", encoding="utf-8") + if command == "just" and args == ["bench-latest"]: + assert kwargs["env"]["RUSTUP_TOOLCHAIN"] == "1.96.0" + if command == "uv": + output = Path(args[args.index("--output") + 1]) + output.write_text(_report("0.4.3", "v0.4.2"), encoding="utf-8") + return _result() + + monkeypatch.setattr(archive_performance, "run_git_command", fake_run_git) + monkeypatch.setattr(archive_performance, "run_git_command_with_input", fake_run_git_with_input) + monkeypatch.setattr(archive_performance, "run_safe_command", fake_run_safe) + + report_id = generate_and_promote_worktree_report( + current=current, + archive_dir=archive_dir, + config=GenerationConfig( + repo_root=tmp_path, + current_tag="v0.4.3", + baseline_tag="v0.4.2", + worktree_ref="HEAD", + apply_current_diff=True, + ), + ) + + assert report_id.archive_name == "v0.4.3-vs-v0.4.2.md" + assert current.read_text(encoding="utf-8") == _normalized_report("0.4.3", "v0.4.2") + assert (archive_dir / "v0.4.2-vs-v0.4.1.md").exists() + assert any(kind == "git" and args[:3] == ("worktree", "add", "--detach") and args[4] == "HEAD" for kind, args, _ in calls) + assert any(kind == "git-stdin" and args == ("apply", "--binary") for kind, args, _ in calls) + assert any(kind == "just" and args == ("bench-latest",) for kind, args, _ in calls) + assert any(kind == "git" and args[:3] == ("worktree", "remove", "--force") for kind, args, _ in calls) + + +def test_generate_and_promote_legacy_published_tag_uses_legacy_commands(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> None: + current = tmp_path / "docs" / "PERFORMANCE.md" + archive_dir = tmp_path / "docs" / "archive" / "performance" + calls: list[RunnerCall] = [] + + def fake_run_git(args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git", tuple(args), cwd)) + if args[:3] == ["worktree", "add", "--detach"]: + worktree = Path(args[3]) + worktree.mkdir(parents=True) + _write_legacy_benchmark_tooling(worktree) + return _result() + + def fake_run_git_with_input(args: Sequence[str], input_data: str, cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append(("git-stdin", tuple(args), cwd)) + return _result() + + def fake_run_safe(command: str, args: Sequence[str], cwd: Path | None = None, **kwargs: Any) -> SimpleNamespace: + calls.append((command, tuple(args), cwd)) + if command == "just" and args == ["bench-save-baseline", "v0.4.1"]: + assert cwd is not None + criterion_dir = cwd / "target" / "criterion" + criterion_dir.mkdir(parents=True) + (criterion_dir / "baseline.txt").write_text("baseline\n", encoding="utf-8") + if command == "uv": + output = Path(args[args.index("--output") + 1]) + output.write_text(_report("0.4.2", "v0.4.1"), encoding="utf-8") + return _result() + + monkeypatch.setattr(archive_performance, "run_git_command", fake_run_git) + monkeypatch.setattr(archive_performance, "run_git_command_with_input", fake_run_git_with_input) + monkeypatch.setattr(archive_performance, "run_safe_command", fake_run_safe) + + report_id = generate_and_promote_worktree_report( + current=current, + archive_dir=archive_dir, + config=GenerationConfig( + repo_root=tmp_path, + current_tag="v0.4.2", + baseline_tag="v0.4.1", + worktree_ref="v0.4.2", + apply_current_diff=False, + ), + ) + + assert report_id.archive_name == "v0.4.2-vs-v0.4.1.md" + assert current.read_text(encoding="utf-8") == _normalized_report("0.4.2", "v0.4.1") + assert any(kind == "git" and args[:3] == ("worktree", "add", "--detach") and args[4] == "v0.4.2" for kind, args, _ in calls) + assert any(kind == "just" and args == ("bench-exact",) for kind, args, _ in calls) + assert not any(kind == "just" and args == ("bench-latest",) for kind, args, _ in calls) + assert not any(kind == "uv" and "--suite" in args for kind, args, _ in calls) + assert not any(kind == "uv" and "--scope" in args for kind, args, _ in calls) + assert not any(kind == "git" and args == ("diff", "--binary", "HEAD") for kind, args, _ in calls) + assert not any(kind == "git-stdin" for kind, _, _ in calls) diff --git a/scripts/tests/test_bench_compare.py b/scripts/tests/test_bench_compare.py index e0744a2..04c59bc 100644 --- a/scripts/tests/test_bench_compare.py +++ b/scripts/tests/test_bench_compare.py @@ -375,7 +375,11 @@ def test_main_snapshot_writes_output(tmp_path: Path) -> None: assert "### D=2" in text assert "### Random percentile D=3" in text assert "### Near-singular 3x3" in text - assert "just bench-compare" in text + assert "just performance-local" in text + assert "just performance-release" in text + assert "just performance-github-assets" in text + assert "just performance-release " in text + assert "git checkout" not in text def test_main_no_criterion_dir(tmp_path: Path, capsys: pytest.CaptureFixture[str]) -> None: