diff --git a/.github/workflows/rust-ci.yml b/.github/workflows/rust-ci.yml index d8e65689..f97e23ad 100644 --- a/.github/workflows/rust-ci.yml +++ b/.github/workflows/rust-ci.yml @@ -45,14 +45,25 @@ jobs: - name: Check formatting run: cargo fmt --all -- --check + # echidna#85: `flint`/`spark`/`chapel` are opt-in, system-library- + # dependent features (libflint LGPL-3 / GNAT+libechidna_spark / Zig + # FFI). build.rs gates their `-l...` link directives correctly, but + # the bare runner has none of those libraries. Key distinction: + # `clippy`/`doc`/`check` type-check and lint the cfg-gated code WITHOUT + # invoking the linker, so `--all-features` is safe there and keeps full + # lint/compile coverage of flint/spark/chapel. Only `cargo test` builds + # a linked binary — so the two test steps drop to `--features verisim` + # (the only pure-Rust optional feature; `live-provers ⊇ verisim`). + # flint/spark/chapel test execution stays in their dedicated CI + # (chapel-ci.yml, SPARK Theatre Gate, live-provers.yml). - name: Clippy lints run: cargo clippy --all-targets --all-features -- -D warnings - name: Run unit tests - run: cargo test --lib --all-features + run: cargo test --lib --features verisim - name: Run integration tests - run: cargo test --test integration_tests --all-features + run: cargo test --test integration_tests --features verisim - name: Build release run: cargo build --release @@ -133,7 +144,7 @@ jobs: uses: Swatinem/rust-cache@ad397744b0d591a723ab90405b7247fac0e6b8db # v2 - name: Build documentation - run: cargo doc --no-deps --all-features + run: cargo doc --no-deps --all-features # echidna#85: doc doesn't link, --all-features safe env: RUSTDOCFLAGS: -D warnings @@ -202,4 +213,4 @@ jobs: uses: Swatinem/rust-cache@ad397744b0d591a723ab90405b7247fac0e6b8db # v2 - name: Check MSRV - run: cargo check --all-features + run: cargo check --all-features # echidna#85: check doesn't link, --all-features safe diff --git a/.machine_readable/6a2/STATE.a2ml b/.machine_readable/6a2/STATE.a2ml index 0563dc06..4987c026 100644 --- a/.machine_readable/6a2/STATE.a2ml +++ b/.machine_readable/6a2/STATE.a2ml @@ -5,7 +5,7 @@ [metadata] project = "echidna" version = "2.1.1" -last-updated = "2026-04-26" +last-updated = "2026-05-19" status = "active" [project-context] @@ -1265,3 +1265,25 @@ not-done = [ "Static-correctness only: full podman builds of Containerfile.wave3 targets NOT run in this env (heavy). Stage graph + COPY --from=rust-builder wiring verified by construction from the 9 verbatim originals.", "container-ci.yml edit needs `workflow` token scope to push.", ] + +[session-2026-05-19-ci-baseline-triage] +summary = "Landed PR #73 (Wave-3 consolidation, MERGED d3db97d) + triaged its red CI into 3 classes (real PR defect / baseline-rot / #77 infra jam) + cleared the two baseline blockers as dedicated PRs. Human ADR: docs/decisions/2026-05-19-ci-baseline-triage.md." +required-merge-gate = ["Analyze (rust)", "Cargo check + clippy + fmt", "Dependency audit", "Dogfood Gate", "Hypatia Neurosymbolic Analysis", "OpenSSF Scorecard"] +findings = [ + "Triage rule: a red echidna PR check is one of (1) real in-scope PR defect, (2) baseline-rot — fails identically on origin/main, fix in dedicated PR not on the feature branch, (3) #77 infra jam — runner unassigned or ~15-30s fast-fail with EMPTY --log-failed. Verify class via `git show origin/main:` + check main's run before patching.", + "Only the 6 required-merge-gate contexts block auto-merge; MVP Smoke / Julia Integration / MSRV / PR (address) / Validate A2ML+K9 / governance/* / T1-T4 are NON-gating.", + "build.rs ALREADY correctly cfg-gates -lflint/-lspark/-lchapel link dirs; the -lflint CI failure was rust-ci.yml forcing --all-features, NOT a build.rs defect.", + "Linker distinction: cargo test/build LINK; cargo clippy/doc/check do NOT — so clippy/doc/check tolerate --all-features on a bare runner (full lint coverage), only the test steps must drop features.", + "MSRV check fails identically on origin/main (24s, zero step logs) = #77-class baseline, not a regression, not required.", +] +changes = [ + "PR #73 (MERGED d3db97d 17:49Z): added `COPY crates ./crates` + `COPY benches ./benches` to .containerization/Containerfile + Containerfile.full (were stale vs the crates/* workspace; wave3 + mcp were already correct).", + "Issue #85 -> PR #86 (fix/rust-ci-drop-all-features): rust-ci.yml clippy/doc/check KEEP --all-features (no link, full coverage); the 2 cargo-test steps -> --features verisim (sole pure-Rust optional feat; live-provers superset). Avoids blinding ~510 LoC flint + spark-FFI to CI. Validated: Test Suite X(main)->ok(#86) 3m18s; local cargo test --lib --features verisim = 1092 pass/0 fail.", + "PR #87 (fix/clusterfuzzlite-dockerfile-symlink): tracked symlink .clusterfuzzlite/Dockerfile -> Containerfile (git mode 120000); clusterfuzzlite action hardcodes Dockerfile, keeps house Containerfile name, zero blast radius. Non-gating hygiene.", + "docs/decisions/2026-05-19-ci-baseline-triage.md — human-readable ADR of the above (companion to this block).", +] +not-done = [ + "#86 + #87 BLOCKED solely on #77 (required jobs awaiting runner assignment); auto-merge-squash armed, will self-fire when #77 clears. Do NOT admin-override.", + "#77 CI-infra jam is estate-owned (active estate-wide remediation), not collapsible from this repo.", + "MSRV + Julia Integration remain red on main as separate non-required baseline rot — not in scope of #85; file dedicated issues only if they later become gating.", +] diff --git a/docs/decisions/2026-05-19-ci-baseline-triage.md b/docs/decisions/2026-05-19-ci-baseline-triage.md new file mode 100644 index 00000000..1ee903f3 --- /dev/null +++ b/docs/decisions/2026-05-19-ci-baseline-triage.md @@ -0,0 +1,100 @@ + + +# 2026-05-19 — CI baseline triage: real defects vs baseline-rot vs infra jam + +ADR-style record of the CI triage done while landing PR #73 and the +two baseline-blocker follow-ups (#86, #87). Written so future humans +**and** agents do not re-litigate these red checks. Companion +machine-readable record: +`.machine_readable/6a2/STATE.a2ml § [session-2026-05-19-ci-baseline-triage]`. + +## TL;DR for anyone debugging echidna PR CI + +A red check on an echidna PR is one of **three** distinct things. Do +not treat them the same: + +1. **Real, in-scope PR defect** — caused by the PR's own diff. Fix it + on the PR. +2. **Baseline rot** — fails identically on `origin/main`, independent + of any PR diff. **Do not block the PR on it.** Fix in a *dedicated* + PR off `main`; blocked PRs clear on their next `main` merge. +3. **#77 CI-infra jam** — runner never assigned, or job fast-fails in + ~15–30 s with **no step logs**. Not a code defect. Estate-infra, + tracked in #77; an estate-wide remediation owns it. + +**Merge is gated only by these 6 required status checks** (GitHub +branch protection on `main`, verified 2026-05-19): + +- `Analyze (rust)` +- `Cargo check + clippy + fmt` +- `Dependency audit` +- `Dogfood Gate` +- `Hypatia Neurosymbolic Analysis` +- `OpenSSF Scorecard` + +Everything else (`MVP Smoke`, `Julia Integration`, `Minimum Supported +Rust Version`, `PR (address)`, `Validate A2ML/K9 manifests`, +`governance / *`, the `T1–T4` prover matrices) is **non-gating**. A +red non-required check does not stop auto-merge. + +## What was triaged (PR #73 = Wave-3 consolidation, MERGED `d3db97d`) + +| Check | Class | Resolution | +|---|---|---| +| `Build & verify container image` | **Real PR defect** | The minimal `.containerization/Containerfile` + `Containerfile.full` copied only `src/rust`+`src/interfaces`; the workspace `Cargo.toml` has path-deps `crates/echidna-core` + `crates/typed_wasm` and `crates/*` members. `Containerfile.wave3` fixed it; the other two were left stale (`Containerfile.mcp` was already correct). Carried `COPY crates ./crates` + `COPY benches ./benches` to both — merged in #73. | +| `Test Suite`, `Code Coverage`, `MVP Smoke`, `Julia Integration` | **Baseline rot** | `rust-ci.yml` ran with `--all-features` → forced on `flint`/`spark`/`chapel` (system-lib features) → `cargo test` link-failed `unable to find library -lflint` on **every** PR. → issue #85 → **PR #86**. | +| `governance / *` (3 jobs, ~15 s, no logs), most pending required jobs | **#77 infra jam** | Runner-assignment / fast-fail-no-logs. Parked on #77. | +| `PR (address)` (clusterfuzzlite) | Pre-existing, non-gating | Action hardcodes `.clusterfuzzlite/Dockerfile`; repo ships `Containerfile`. → **PR #87** (tracked symlink). | +| `Minimum Supported Rust Version` | Baseline / #77-class | Fails **identically on `main`** (24 s, zero step logs). Not introduced by any PR; not required. | + +## Decision 1 — `rust-ci.yml` feature flags (#85 → #86) + +**Root cause is the workflow, not the code.** `build.rs` *correctly* +gates every `-l...` link directive behind `#[cfg(feature = "...")]` +for `flint`/`spark`/`chapel`. The defect was `rust-ci.yml` forcing +those features on via `--all-features` on a runner with no `libflint` +/ GNAT / Zig FFI lib. + +**Key distinction that shaped the fix:** only `cargo test` (and +`cargo build`) invoke the **linker**. `cargo clippy`, `cargo doc`, +`cargo check` type-check and lint the `cfg`-gated code **without +linking**, so they tolerate `--all-features` on a bare runner. + +Therefore: + +- `clippy`, `doc`, `check` → **keep `--all-features`** (full + lint/compile coverage of flint/spark/chapel, zero infra cost). +- the two `cargo test` steps → **`--features verisim`** (the only + pure-Rust optional feature; `live-provers ⊇ verisim`). + +A blanket `--features verisim` everywhere was **rejected** because it +would stop CI ever compiling/linting ~510 LoC of `flint` +(`src/rust/coprocessor/flint.rs`) and the root-crate `spark`-gated FFI +(`src/rust/ffi/spark_axiom.rs`, `axiom_tracker.rs`) — a silent +coverage hole. `flint`/`spark`/`chapel` *test execution* stays covered +by their dedicated workflows (`chapel-ci.yml`, the SPARK Theatre Gate, +`live-provers.yml`). + +Validated in CI: `Test Suite` goes ✗ on `main` → **✓ 3m18s** on #86; +local `cargo test --lib --features verisim` = 1092 passed, 0 failed. + +## Decision 2 — clusterfuzzlite filename (#87) + +The `google/clusterfuzzlite` `build_fuzzers` action hardcodes the path +`.clusterfuzzlite/Dockerfile` and exposes **no** filename input. House +style is `Containerfile` (Podman-not-Docker estate rule). Resolution: +a **tracked symlink** `.clusterfuzzlite/Dockerfile → Containerfile` +(git mode `120000`) — clusterfuzzlite resolves through it, the +canonical SPDX-headed file keeps the house name, zero blast radius +(nothing references `.clusterfuzzlite/Containerfile` by name). This +check is **non-gating**; the fix is hygiene so the fuzz job runs. + +## Standing guidance (do not re-litigate) + +- Before "fixing" a red echidna check: `git show origin/main:` + and check whether the same check is red on `main`. If yes → it is + baseline rot or #77; **do not** patch it on the feature PR. +- Auto-merge (squash) is the norm here. Arm it; do **not** + admin-override CI-gated PRs. +- `#77` is estate-infra. Symptoms: queued-forever jobs, ~15–30 s + fast-fail with empty `--log-failed`. Not collapsible from a repo PR.