From e6bd4de81026dbe2e24ff677c372e16fe30abe17 Mon Sep 17 00:00:00 2001 From: igerber Date: Sun, 31 May 2026 11:48:17 -0400 Subject: [PATCH] ci: only re-run gated matrices on ready-for-ci label transitions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The rust-test, notebooks, and docs-tests workflows are gated on the ready-for-ci label and list labeled/unlabeled in their pull_request trigger types. Their job guards only checked that ready-for-ci was PRESENT, so churning any unrelated label (e.g. adding `bug`) on a PR that already carried ready-for-ci re-fired the entire matrix — including the multi-hour pure-Python fallback. Refine the 7 job guards across the three matrices so a labeled/unlabeled event only (re)runs when the triggering label is ready-for-ci; code pushes and the initial ready-for-ci add are unaffected. ci-gate.yml (the cheap one-step required check) is intentionally left to re-evaluate on every label event so the gate still flips red/green on add/remove. Lock the behavior with TestCiWorkflowLabelEventGuard in tests/test_openai_review.py and drop the resolved TODO row. Co-Authored-By: Claude Opus 4.8 (1M context) --- .github/workflows/docs-tests.yml | 19 ++- .github/workflows/notebooks.yml | 5 +- .github/workflows/rust-test.yml | 29 +++- TODO.md | 1 - tests/test_openai_review.py | 281 +++++++++++++++++++++++++++++++ 5 files changed, 325 insertions(+), 10 deletions(-) diff --git a/.github/workflows/docs-tests.yml b/.github/workflows/docs-tests.yml index 12921670..66b87c5e 100644 --- a/.github/workflows/docs-tests.yml +++ b/.github/workflows/docs-tests.yml @@ -41,9 +41,12 @@ permissions: jobs: doc-snippets: name: Validate RST code snippets + # Skip unrelated label churn: a non-ready-for-ci label add/remove won't run this job. if: >- github.event_name != 'pull_request' - || contains(github.event.pull_request.labels.*.name, 'ready-for-ci') + || (contains(github.event.pull_request.labels.*.name, 'ready-for-ci') + && (github.event.action != 'labeled' && github.event.action != 'unlabeled' + || github.event.label.name == 'ready-for-ci')) runs-on: ubuntu-latest steps: @@ -63,15 +66,18 @@ jobs: - name: Run doc snippet tests in pure Python mode # PYTHONPATH=. lets the test import diff_diff directly from - # source without invoking the maturin/Rust build (mirrors Pure - # Python Fallback at rust-test.yml:189-193). + # source without invoking the maturin/Rust build (mirrors the + # `python-fallback` job in rust-test.yml). run: PYTHONPATH=. DIFF_DIFF_BACKEND=python pytest tests/test_doc_snippets.py -v sphinx-build: name: Sphinx HTML build (-W warnings as errors) + # Skip unrelated label churn: a non-ready-for-ci label add/remove won't run this job. if: >- github.event_name != 'pull_request' - || contains(github.event.pull_request.labels.*.name, 'ready-for-ci') + || (contains(github.event.pull_request.labels.*.name, 'ready-for-ci') + && (github.event.action != 'labeled' && github.event.action != 'unlabeled' + || github.event.label.name == 'ready-for-ci')) runs-on: ubuntu-latest steps: @@ -104,9 +110,12 @@ jobs: docs-deps-py39-smoke: name: Validate docs deps install on Python 3.9 (floor compat) + # Skip unrelated label churn: a non-ready-for-ci label add/remove won't run this job. if: >- github.event_name != 'pull_request' - || contains(github.event.pull_request.labels.*.name, 'ready-for-ci') + || (contains(github.event.pull_request.labels.*.name, 'ready-for-ci') + && (github.event.action != 'labeled' && github.event.action != 'unlabeled' + || github.event.label.name == 'ready-for-ci')) runs-on: ubuntu-latest steps: diff --git a/.github/workflows/notebooks.yml b/.github/workflows/notebooks.yml index 1628cc0c..282c4df5 100644 --- a/.github/workflows/notebooks.yml +++ b/.github/workflows/notebooks.yml @@ -26,9 +26,12 @@ permissions: jobs: execute-notebooks: name: Execute tutorial notebooks + # Skip unrelated label churn: a non-ready-for-ci label add/remove won't run this job. if: >- github.event_name != 'pull_request' - || contains(github.event.pull_request.labels.*.name, 'ready-for-ci') + || (contains(github.event.pull_request.labels.*.name, 'ready-for-ci') + && (github.event.action != 'labeled' && github.event.action != 'unlabeled' + || github.event.label.name == 'ready-for-ci')) runs-on: ubuntu-latest steps: diff --git a/.github/workflows/rust-test.yml b/.github/workflows/rust-test.yml index 0abdd3f7..ced1d43f 100644 --- a/.github/workflows/rust-test.yml +++ b/.github/workflows/rust-test.yml @@ -13,6 +13,13 @@ on: - 'tools/**' - 'pyproject.toml' - '.github/workflows/rust-test.yml' + # docs-tests.yml / notebooks.yml / ci-gate.yml carry ready-for-ci label + # contracts locked by TestCiWorkflowLabelEventGuard in + # tests/test_openai_review.py; include them so a regression there triggers + # this suite (enforced by TestRustTestWorkflowPathFilter). + - '.github/workflows/docs-tests.yml' + - '.github/workflows/notebooks.yml' + - '.github/workflows/ci-gate.yml' # The AI-review surfaces below are tested by # tests/test_openai_review.py (TestWorkflowPromptHardening, # TestAdaptReviewCriteria, etc.). Without these path filters, a @@ -33,6 +40,13 @@ on: - 'tools/**' - 'pyproject.toml' - '.github/workflows/rust-test.yml' + # docs-tests.yml / notebooks.yml / ci-gate.yml carry ready-for-ci label + # contracts locked by TestCiWorkflowLabelEventGuard in + # tests/test_openai_review.py; include them so a regression there triggers + # this suite (enforced by TestRustTestWorkflowPathFilter). + - '.github/workflows/docs-tests.yml' + - '.github/workflows/notebooks.yml' + - '.github/workflows/ci-gate.yml' - '.github/workflows/ai_pr_review.yml' - '.github/codex/prompts/pr_review.md' - '.claude/scripts/openai_review.py' @@ -47,9 +61,12 @@ jobs: # Run Rust unit tests on all platforms rust-tests: name: Rust Unit Tests (${{ matrix.os }}) + # Skip unrelated label churn: a non-ready-for-ci label add/remove won't run this job. if: >- github.event_name != 'pull_request' - || contains(github.event.pull_request.labels.*.name, 'ready-for-ci') + || (contains(github.event.pull_request.labels.*.name, 'ready-for-ci') + && (github.event.action != 'labeled' && github.event.action != 'unlabeled' + || github.event.label.name == 'ready-for-ci')) runs-on: ${{ matrix.os }} env: PYO3_USE_ABI3_FORWARD_COMPATIBILITY: 1 @@ -83,9 +100,12 @@ jobs: # Build and test with Python on multiple platforms python-tests: name: Python Tests (${{ matrix.os }}, py${{ matrix.python-version }}) + # Skip unrelated label churn: a non-ready-for-ci label add/remove won't run this job. if: >- github.event_name != 'pull_request' - || contains(github.event.pull_request.labels.*.name, 'ready-for-ci') + || (contains(github.event.pull_request.labels.*.name, 'ready-for-ci') + && (github.event.action != 'labeled' && github.event.action != 'unlabeled' + || github.event.label.name == 'ready-for-ci')) runs-on: ${{ matrix.os }} strategy: fail-fast: false @@ -190,9 +210,12 @@ jobs: # Test pure Python fallback (without Rust extension) python-fallback: name: Pure Python Fallback + # Skip unrelated label churn: a non-ready-for-ci label add/remove won't run this job. if: >- github.event_name != 'pull_request' - || contains(github.event.pull_request.labels.*.name, 'ready-for-ci') + || (contains(github.event.pull_request.labels.*.name, 'ready-for-ci') + && (github.event.action != 'labeled' && github.event.action != 'unlabeled' + || github.event.label.name == 'ready-for-ci')) runs-on: ubuntu-latest steps: - uses: actions/checkout@v6 diff --git a/TODO.md b/TODO.md index f7866f37..9c05e4de 100644 --- a/TODO.md +++ b/TODO.md @@ -159,7 +159,6 @@ Deferred items from PR reviews that were not addressed before merge. |-------|----------|----|----------| | ImputationDiD event-study SEs recompute full conservative variance per horizon (should cache A0/A1 factorization) | `imputation.py` | #141 | Low | | Rust faer SVD ndarray-to-faer conversion overhead (minimal vs SVD cost) | `rust/src/linalg.rs:67` | #115 | Low | -| Unrelated label events (e.g., adding `bug` label) re-trigger CI workflows when `ready-for-ci` is already present; filter `labeled`/`unlabeled` events to only `ready-for-ci` transitions | `.github/workflows/rust-test.yml`, `notebooks.yml`, `docs-tests.yml` | #269 | Low | | `bread_inv` as a performance kwarg on `compute_robust_vcov` to avoid re-inverting `(X'WX)` when the caller already has it. Deferred from Phase 1a for scope. HC2 and HC2+BM both need the bread inverse, so a shared hint would save one `np.linalg.solve` per sandwich. | `linalg.py::compute_robust_vcov` | Phase 1a | Low | | MPD cluster+hc2_bm path computes CR2 precomputes twice — once via `solve_ols` → `_compute_cr2_bm` for vcov + per-coefficient DOF, then again via `_compute_cr2_bm_contrast_dof` from `MultiPeriodDiD.fit()` for the post-period-average contrast DOF. Both rebuild `H = X bread_inv X'`, the residual-maker `M`, and the per-cluster `A_g = (I - H_gg)^{-1/2}` matrices. O(n²k) redundant work; acceptable for typical cluster-robust DiD panel sizes (n ≤ a few thousand). Fix would plumb the contrast DOF through the existing CR2 vcov path (intrusive API change) or share the precomputes via a cached helper. | `linalg.py::_compute_cr2_bm_contrast_dof`, `estimators.py::MultiPeriodDiD.fit` | follow-up | Low | | Rust-backend HC2 implementation. Current Rust path only supports HC1; HC2 and CR2 Bell-McCaffrey fall through to the NumPy backend. For large-n fits this is noticeable. | `rust/src/linalg.rs` | Phase 1a | Low | diff --git a/tests/test_openai_review.py b/tests/test_openai_review.py index 8e6cc5ae..e6c510d6 100644 --- a/tests/test_openai_review.py +++ b/tests/test_openai_review.py @@ -2074,6 +2074,287 @@ def test_rust_test_yml_pr_filter_covers_ai_review_surfaces(self, workflow_paths) f"or prompt would skip the hardening tests entirely." ) + # docs-tests.yml, notebooks.yml, and ci-gate.yml are not AI-review surfaces, + # but their ready-for-ci label contracts are asserted by + # TestCiWorkflowLabelEventGuard in this file. They must be in rust-test.yml's + # path filters too, or a workflow-only edit to them would skip the suite that + # locks their contracts. + GUARD_COVERED_WORKFLOWS = ( + ".github/workflows/docs-tests.yml", + ".github/workflows/notebooks.yml", + ".github/workflows/ci-gate.yml", + ) + + def test_rust_test_yml_push_filter_covers_guarded_workflows(self, workflow_paths): + push_section, _ = workflow_paths + for path in self.GUARD_COVERED_WORKFLOWS: + assert path in push_section, ( + f"rust-test.yml `push.paths:` must include {path!r} so an edit " + f"to that workflow triggers the suite whose " + f"TestCiWorkflowLabelEventGuard locks its ready-for-ci guard." + ) + + def test_rust_test_yml_pr_filter_covers_guarded_workflows(self, workflow_paths): + _, pr_section = workflow_paths + for path in self.GUARD_COVERED_WORKFLOWS: + assert path in pr_section, ( + f"rust-test.yml `pull_request.paths:` must include {path!r} " + f"(same rationale as push.paths): a PR that only edits that " + f"workflow must still run TestCiWorkflowLabelEventGuard." + ) + + +class TestCiWorkflowLabelEventGuard: + """The expensive CI matrices — rust-test.yml, notebooks.yml, + docs-tests.yml — are gated on the `ready-for-ci` label and list + `labeled`/`unlabeled` in their `pull_request` trigger `types`. Without a + label-NAME refinement, churning ANY label (e.g. adding `bug`) on a PR that + already carries `ready-for-ci` re-fires the whole matrix — the 4h+ + pure-Python fallback included — because the guard only checks the label is + PRESENT, not that the triggering label IS `ready-for-ci`. + + Locks the fix (PR #269): each guarded job's `if:` must additionally require + that, when the triggering action is a label add/remove, the label is + `ready-for-ci`. `ci-gate.yml` is exempt — it is the one-step required check + and must re-evaluate on every label event to flip the gate red/green when + `ready-for-ci` is added/removed.""" + + # Each guarded workflow maps to the jobs whose `if:` guard must carry the + # refinement. Asserting per NAMED job (not just whole-file presence) means + # dropping the refinement from ANY single job — or renaming/removing one — + # fails the test, even in multi-job files. + EXPECTED_JOBS = { + "rust-test.yml": ("rust-tests", "python-tests", "python-fallback"), + "notebooks.yml": ("execute-notebooks",), + "docs-tests.yml": ("doc-snippets", "sphinx-build", "docs-deps-py39-smoke"), + } + + # The exact guard every gated job must carry (folded to one line). Asserting + # the WHOLE normalized expression — not just fragment presence — pins the + # boolean structure, so a malformed predicate that merely mentions the right + # tokens (e.g. `==` flipped to `!=`, or mis-parenthesized) still fails the + # test. The folded form was cross-checked against GitHub's `if:` semantics + # with a YAML parser at authoring; it evaluates as: run on push or on a + # ready-for-ci PR, but skip when an UNRELATED label is added/removed. + EXPECTED_GUARD = ( + "github.event_name != 'pull_request' " + "|| (contains(github.event.pull_request.labels.*.name, 'ready-for-ci') " + "&& (github.event.action != 'labeled' && github.event.action != 'unlabeled' " + "|| github.event.label.name == 'ready-for-ci'))" + ) + + @staticmethod + def _norm(expr): + """Collapse all whitespace so guard comparison is robust to line breaks + and indentation (YAML `>-` folding) while still pinning every token.""" + return "".join(expr.split()) + + @staticmethod + def _job_if_guards(text): + """Map each top-level job name -> its `if:` expression folded to one + line. Pure-text parse (PyYAML is not a test dependency), relying on the + workflows' 2-space job-header / 4-space step-key indentation.""" + import re + + lines = text.splitlines() + try: + start = next(i for i, ln in enumerate(lines) if ln.rstrip() == "jobs:") + except StopIteration: + return {} + guards = {} + job = None + folding = False # inside a folded/literal `if:` block scalar + for ln in lines[start + 1 :]: + header = re.match(r"^ ([A-Za-z0-9_-]+):\s*$", ln) + if header: + job, folding = header.group(1), False + continue + if job is None: + continue + key = re.match(r"^ if:\s*(.*)$", ln) + if key: + rest = key.group(1).strip() + folding = rest in (">-", ">", ">+", "|", "|-", "|+") + guards[job] = "" if folding else rest + continue + if folding: + if ln.strip() and (len(ln) - len(ln.lstrip())) > 4: + guards[job] = (guards[job] + " " + ln.strip()).strip() + elif ln.strip(): + folding = False # dedented to a sibling key — block ended + return guards + + @staticmethod + def _job_names(text): + """Set of all top-level job names (the ` name:` headers under `jobs:`), + including jobs with no `if:` — so a newly added unguarded job is still + visible to the allowlist check.""" + import re + + lines = text.splitlines() + try: + start = next(i for i, ln in enumerate(lines) if ln.rstrip() == "jobs:") + except StopIteration: + return set() + names = set() + for ln in lines[start + 1 :]: + if re.match(r"^\S", ln): # back to column 0: out of jobs: + break + header = re.match(r"^ ([A-Za-z0-9_-]+):\s*$", ln) + if header: + names.add(header.group(1)) + return names + + @staticmethod + def _pull_request_types(text): + """Parse `on:` -> `pull_request:` -> `types:` (SCOPED to that block, so an + unrelated `types:` key elsewhere cannot bind). Handles both inline + (`types: [a, b]`) and block (`types:` then `- a`) YAML list syntax. + Returns the set of trigger event-type strings; empty set if absent.""" + import re + + in_on = in_pr = collecting = False + block = set() + for ln in text.splitlines(): + if re.match(r"^on:\s*$", ln): + in_on, in_pr, collecting = True, False, False + continue + if collecting: # accumulating a block-style `- item` list under types: + item = re.match(r"^ -\s*['\"]?([A-Za-z_]+)['\"]?\s*$", ln) + if item: + block.add(item.group(1)) + continue + return block # dedented out of the types: list + if in_on and re.match(r"^\S", ln): # a col-0 key: out of the on: block + in_on = in_pr = False + if in_on and re.match(r"^ pull_request:\s*$", ln): + in_pr = True + continue + if in_pr and re.match(r"^ \S", ln): # 2-space sibling: out of pull_request + in_pr = False + if in_pr: + inline = re.match(r"^ types:\s*\[([^\]]*)\]", ln) + if inline: + return {t.strip() for t in inline.group(1).split(",") if t.strip()} + if re.match(r"^ types:\s*$", ln): + collecting, block = True, set() + return block + + @staticmethod + def _named_step_if(text, step_name): + """Fold the `if:` of the step whose `- name:` equals step_name (6-space + step item, 8-space `if:`). Anchored to the named step so a later + conditional step elsewhere can't shift what this locks. '' if not found.""" + import re + + in_step = False + step_if = "" + folding = False + for ln in text.splitlines(): + name = re.match(r"^ - name:\s*(.*\S)\s*$", ln) + if name: + in_step = name.group(1).strip() == step_name + folding = False + continue + if not in_step: + continue + key = re.match(r"^ if:\s*(.*)$", ln) + if key: + rest = key.group(1).strip() + folding = rest in (">-", ">", ">+", "|", "|-", "|+") + step_if = "" if folding else rest + continue + if folding: + if ln.strip() and (len(ln) - len(ln.lstrip())) > 8: + step_if = (step_if + " " + ln.strip()).strip() + elif ln.strip(): + folding = False + return step_if + + @pytest.fixture(scope="class") + def workflows_dir(self): + if _SCRIPT_PATH is None: + pytest.skip("Could not resolve script path") + assert _SCRIPT_PATH is not None + return _SCRIPT_PATH.parent.parent.parent / ".github" / "workflows" + + def test_guards_filter_unrelated_label_churn(self, workflows_dir): + for wf_name, expected_jobs in self.EXPECTED_JOBS.items(): + wf = workflows_dir / wf_name + assert wf.exists(), ( + f"{wf_name} is missing (guarded workflow deleted/renamed?) — the " + f"lock test must fail loudly rather than skip." + ) + text = wf.read_text() + # Every top-level job must be in EXPECTED_JOBS, so a NEW job added to + # a guarded workflow fails here instead of slipping in without a guard. + assert self._job_names(text) == set(expected_jobs), ( + f"{wf_name}: top-level jobs {sorted(self._job_names(text))} != " + f"EXPECTED_JOBS {sorted(expected_jobs)}. Add the new job to " + f"EXPECTED_JOBS and give it the ready-for-ci guard." + ) + guards = self._job_if_guards(text) + for job in expected_jobs: + assert job in guards, ( + f"{wf_name}: gated job {job!r} not found (renamed or " + f"removed?). Update EXPECTED_JOBS or restore the job guard." + ) + assert self._norm(guards[job]) == self._norm(self.EXPECTED_GUARD), ( + f"{wf_name} job {job!r} `if:` guard must be EXACTLY the " + f"ready-for-ci-transition guard (whitespace-insensitive), so a " + f"malformed predicate cannot pass by merely mentioning the right " + f"tokens.\n expected: {self.EXPECTED_GUARD}\n " + f"parsed: {guards[job]}\nSee PR #269." + ) + + def test_ci_gate_locks_presence_based_contract(self, workflows_dir): + """ci-gate.yml is the cheap one-step REQUIRED check: it must keep + re-evaluating on every label event (so the gate flips red/green when + ready-for-ci is added/removed) and stay presence-based. It must NOT + adopt the matrices' label-name refinement, or removing an unrelated + label could leave the required check stale.""" + wf = workflows_dir / "ci-gate.yml" + assert wf.exists(), ( + "ci-gate.yml is missing (required-check workflow deleted/renamed?) — " + "the lock test must fail loudly rather than skip." + ) + text = wf.read_text() + types = self._pull_request_types(text) + step_if = self._named_step_if(text, "Require ready-for-ci label on PRs") + assert {"labeled", "unlabeled"} <= types, ( + f"ci-gate.yml `pull_request.types` must include labeled+unlabeled so " + f"the required check re-fires when ready-for-ci is added/removed; " + f"parsed types = {sorted(types)}." + ) + # Exact-match the step guard: presence-based AND label-name-agnostic (the + # matrices' `github.event.label.name` refinement must NOT leak in here). + assert self._norm(step_if) == self._norm( + "github.event_name == 'pull_request' " + "&& !contains(github.event.pull_request.labels.*.name, 'ready-for-ci')" + ), ( + f"ci-gate.yml step `if:` must stay the presence-based gate " + f"(label-name-agnostic).\n parsed: {step_if!r}" + ) + + def test_guarded_workflows_keep_label_event_types(self, workflows_dir): + """The matrices are gated by ADDING `ready-for-ci`, which only triggers + them if they listen for `labeled`/`unlabeled`. The exact-guard test above + pins the `if:` predicate but not the trigger `types:`; without these + events a ready-for-ci add would never start CI, yet the guard would still + match. Lock the trigger surface too.""" + for wf_name in self.EXPECTED_JOBS: + wf = workflows_dir / wf_name + assert wf.exists(), ( + f"{wf_name} is missing (guarded workflow deleted/renamed?) — the " + f"lock test must fail loudly rather than skip." + ) + types = self._pull_request_types(wf.read_text()) + assert {"labeled", "unlabeled"} <= types, ( + f"{wf_name} `pull_request.types` must include labeled+unlabeled so " + f"adding `ready-for-ci` actually triggers the matrix; parsed " + f"types = {sorted(types)}." + ) + class TestWorkflowCommentPosting: """The workflow has TWO rerun-detection gates that must agree: