diff --git a/.github/workflows/governance-reusable.yml b/.github/workflows/governance-reusable.yml index 95b09196..66467a33 100644 --- a/.github/workflows/governance-reusable.yml +++ b/.github/workflows/governance-reusable.yml @@ -241,6 +241,36 @@ jobs: exit 1 fi echo "✅ Security policy check passed" + - name: Tooling version integrity + # Estate Tooling Version Integrity policy (root cause: burble#39). + # Inline + dependency-free so it runs in any caller repo. + # R0 just>=1.19.0 floor (blocking when just present) and R1 + # unversioned family-tool install (blocking) are hard; R4 + # unexplained continue-on-error is advisory-first per the + # documented "advisory now, --strict later" gating doctrine. + run: | + set -uo pipefail + FAMILY='just|must|trust|adjust|bust|dust|intend' + if command -v just >/dev/null 2>&1; then + jv=$(just --version 2>/dev/null | cut -d' ' -f2) + maj=${jv%%.*}; rest=${jv#*.}; min=${rest%%.*} + if [ -z "$jv" ] || ! { [ "${maj:-0}" -gt 1 ] || { [ "${maj:-0}" -eq 1 ] && [ "${min:-0}" -ge 19 ]; }; }; then + echo "❌ [R0] just ${jv:-?} < 1.19.0 — import? unsupported"; exit 1 + fi + echo "✅ [R0] just $jv >= 1.19.0" + else + echo "ℹ️ [R0] just not on PATH — skipped" + fi + R1=0 + if [ -d .github/workflows ]; then + while IFS= read -r hit; do + [ -n "$hit" ] || continue + echo "❌ [R1] unversioned family-tool install: $hit" + R1=$((R1+1)) + done < <(grep -rnE "^[[:space:]]*tool:[[:space:]]*(${FAMILY})[[:space:]]*$" .github/workflows 2>/dev/null || true) + fi + [ "$R1" -gt 0 ] && { echo "❌ [R1] $R1 unversioned family-tool install(s) — pin tool: @"; exit 1; } + echo "✅ Tooling version integrity passed (R1 clean; R4 advisory via standards/tasks/tooling-integrity-lint.sh)" quality: name: Code quality + docs @@ -258,6 +288,10 @@ jobs: path: ./ base: ${{ github.event.pull_request.base.sha || github.event.before }} head: ${{ github.sha }} + # by-design: trufflehog is a best-effort advisory scan; a scanner + # diff/range hiccup must not fail the whole governance gate. The + # blocking secret check is the inline grep in the security job. + # (Tooling Version Integrity Rule 4 — documented soft-gate.) continue-on-error: true - name: Check TODO/FIXME run: | diff --git a/Justfile b/Justfile index f7845390..04878146 100644 --- a/Justfile +++ b/Justfile @@ -1,6 +1,11 @@ # SPDX-License-Identifier: PMPL-1.0-or-later # justfile - Just recipes for this project # See: https://github.com/hyperpolymath/mustfile +# +# requires: just >= 1.19.0 (import? optional-import support) +# Enforced by the `tooling-version-integrity` must-check, not self- +# enforcing: import? fails at parse time before any recipe can guard it. +# See TOOLING-VERSION-INTEGRITY-POLICY.adoc (root cause: burble#39). # Default recipe import? "contractile.just" diff --git a/Mustfile b/Mustfile index 4f5e24e3..5d409a5b 100644 --- a/Mustfile +++ b/Mustfile @@ -11,3 +11,5 @@ checks: run: just test - name: format run: just fmt + - name: tooling-version-integrity + run: bash tasks/tooling-integrity-lint.sh diff --git a/TOOLING-VERSION-INTEGRITY-POLICY.adoc b/TOOLING-VERSION-INTEGRITY-POLICY.adoc new file mode 100644 index 00000000..b839c8e7 --- /dev/null +++ b/TOOLING-VERSION-INTEGRITY-POLICY.adoc @@ -0,0 +1,144 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later += Hyperpolymath Tooling Version Integrity Policy +Jonathan D.A. Jewell +:toc: +:toc-placement: preamble + +Canonical policy for how the rhyming command-runner family +(`just`/Justfile, `must`/Mustfile, `trust`/Trustfile, `adjust`/Adjustfile, +`bust`/Bustfile, `dust`/Dustfile, `intend`/Intentfile) and their +`contractile`-generated fragments are versioned, installed, and gated +across all hyperpolymath repositories. All contributors and AI agents must +follow this document. + +== Why this policy exists (the burble#39 post-mortem) + +For months a burble Elixir test gate was bolted open +(`continue-on-error: true`) on the belief that "the suite is red for a +pre-existing reason". It was not. CI installed `just` *unversioned* via an +install-action's bundled manifest, which shipped `just` 1.14.0. The +Justfile used `import?` (optional import, requires `just >= 1.19.0`), so +`just` died at *parse time* with `error: Unknown start of token` — +**before any recipe ran**. `mix test --no-start` never executed. A +runner-layer crash was misdiagnosed for months as an inner-layer test +failure, and a suppressed signal froze the wrong diagnosis in place. + +Root causes, generalised: + +. *Silent transitive version skew* — nobody chose 1.14.0; a bundled + manifest did, invisibly. +. *Feature/version coupling with no declared floor* — `import?` needs + `just >= 1.19.0`; nothing declared or enforced that minimum. +. *Outer-layer failure misattributed to an inner layer* — `just` crashed + before the tests; the redness was labelled "failing tests". +. *A suppressed signal froze a wrong diagnosis* — `continue-on-error` + added on an unverified root cause, hiding the real state indefinitely. + +The transferable invariant: **a green/red bit cannot distinguish "passed" +from "never ran".** Every gate is blind to this unless it proves the work +actually happened. + +== Rule 1 — Never install a family tool unversioned + +Any workflow step that installs a rhyming-family tool MUST pin an explicit +version that satisfies every feature the consumed files use. + +[source,yaml] +---- +# WRONG — resolves via the action's bundled manifest (may be ancient) +- uses: taiki-e/install-action@ # vX + with: + tool: just + +# RIGHT — explicit, satisfies `import?` (>= 1.19.0) +- uses: taiki-e/install-action@ # vX + with: + tool: just@1.34.0 +---- + +`just@1.34.0` is the current estate-canonical pin. Bump it centrally via +the canonical CI templates (`rsr-template-repo` → `v3-templater` / +`reposystem`), never per-repo. + +== Rule 2 — Declare the minimum tool version in the file + +Every `Justfile`/`Mustfile`/`Trustfile`/etc. MUST carry a machine-greppable +floor annotation near the top, immediately documenting the coupling: + +---- +# requires: just >= 1.19.0 (import? optional-import support) +---- + +This annotation is *documentation and a lint target*. It is **not +self-enforcing**: `import?` fails at parse time, before any recipe or +guard inside the file can run. The executable assertion lives in Rule 3. + +== Rule 3 — Gates must prove execution, not exit 0 + +A "passed" check MUST assert that the underlying work actually ran, not +merely that nothing returned non-zero. Concretely: + +* The canonical `must` contract carries a `tooling-version-integrity` + check (see `contractiles/must/Mustfile`) that asserts the installed + `just` satisfies the declared floor *and* runs the workflow lint. This + check runs after `just` is on `PATH`, so it catches the parse-time + class that an in-file guard structurally cannot. +* Test steps SHOULD emit a positive execution sentinel (e.g. a non-zero + test count or an explicit marker line); CI SHOULD fail on the + *absence* of that sentinel, which is what catches "the runner died + before the work". + +== Rule 4 — Every soft-gate must be explained + +`continue-on-error: true` (or any soft-gate) MUST be explained, in a +comment within the 12 lines above it, in exactly one of two ways: + +. *Suppressed gate* — a known-failing gate temporarily bolted open. The + comment MUST contain `GATE DEACTIVATED `, the *verified* root + cause (not a guess — burble#39's guess was wrong), and the explicit + single-line re-arm trigger. This form is debt and must carry a path + back to armed. +. *By-design advisory* — a step that is best-effort by nature (e.g. + resilience to an upstream outage, optional enrichment). The comment + MUST carry a `by-design:` or `advisory:` rationale. This form is not + debt; it documents intent so reviewers do not mistake it for a + suppressed gate. + +A bare `continue-on-error` with neither is a policy violation, flagged by +`tasks/tooling-integrity-lint.sh` (rule R4). + +=== Rollout: advisory first, tighten later + +R4 ships *advisory* (reported, non-blocking) by default; R1 (unversioned +installs) is blocking immediately. This deliberately follows the same +"advisory first, tighten later" gating doctrine the estate adopted for +Hypatia SARIF (burble#35 item 3) — a new policy gate enforced as a +hard estate-wide failure on day one would itself be bolted open, exactly +the failure mode this policy exists to end. Promote R4 to blocking +(`--strict`) per repo as its existing soft-gates are explained. + +== Rule 5 — Resolve at source, never per-repo + +Fixes to any of the above are made in the canonical source +(`contractile` source under `contractiles/`, the canonical CI/Justfile +templates in `rsr-template-repo` → `v3-templater` / `reposystem`) and +propagated by re-adoption. Per-repo patches recreate the drift this +policy exists to prevent. + +NOTE: The `contractile` generator itself is currently out-of-band (the +generator repository is not part of the working estate). Until it is +in-tree, changes to generated `contractile.just` content are made in the +`contractiles/` *source* fragments and regenerated by whoever holds the +generator; consumers must re-run `contractile gen-just --dir contractiles` +to pick them up. This gap is tracked estate-wide (see the Tooling Version +Integrity sweep tracking issue). + +== Enforcement + +* `tasks/tooling-integrity-lint.sh` — scans `.github/workflows/` for + unversioned family-tool installs (Rule 1) and bare `continue-on-error` + (Rule 4). Exit non-zero on any violation. +* `contractiles/must/Mustfile` (canonical template) and this repo's live + `Mustfile` carry a `tooling-version-integrity` mandatory check that + runs the lint — so every repo adopting the canonical `must` contract + inherits the gate (Rule 3, Rule 5). diff --git a/contractiles/must/Mustfile b/contractiles/must/Mustfile index 9d8e952b..a9667f9f 100644 --- a/contractiles/must/Mustfile +++ b/contractiles/must/Mustfile @@ -35,3 +35,7 @@ checks: - name: no-template-residue description: "No placeholder text from the contractiles template must remain." run: "bash -uc '! rg -rn \"REPLACE-WITH|PLMP-1.0-or-later\" --type-not binary . | rg .'" + + - name: tooling-version-integrity + description: "Installed just must satisfy the import? floor (>= 1.19.0). Dependency-free; proves the running just is new enough — the burble#39 invariant an in-file guard cannot enforce. See standards TOOLING-VERSION-INTEGRITY-POLICY.adoc." + run: "bash -uc 'command -v just >/dev/null 2>&1 || exit 0; jv=$(just --version 2>/dev/null | cut -d\" \" -f2); test -n \"$jv\" || { echo \"just present, version unreadable\"; exit 1; }; maj=${jv%%.*}; rest=${jv#*.}; min=${rest%%.*}; { [ \"$maj\" -gt 1 ] || { [ \"$maj\" -eq 1 ] && [ \"$min\" -ge 19 ]; }; } || { echo \"just $jv < 1.19.0 import? unsupported\"; exit 1; }'" diff --git a/tasks/tooling-integrity-lint.sh b/tasks/tooling-integrity-lint.sh new file mode 100644 index 00000000..32f7321e --- /dev/null +++ b/tasks/tooling-integrity-lint.sh @@ -0,0 +1,91 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: PMPL-1.0-or-later +# +# tooling-integrity-lint.sh — enforces the Hyperpolymath Tooling Version +# Integrity Policy (see ../TOOLING-VERSION-INTEGRITY-POLICY.adoc). +# +# Scans .github/workflows/*.{yml,yaml} for: +# R1 an unversioned rhyming-family tool install +# (`tool: just` instead of `tool: just@`). ALWAYS BLOCKING: +# few instances, unambiguously wrong, the burble#39 root cause. +# R4 an *unexplained* `continue-on-error: true`. A soft-gate is +# explained if, within 12 lines above it, there is EITHER +# (a) `GATE DEACTIVATED ` — a dated, root-caused, +# temporary suppression of a known-failing gate, OR +# (b) `by-design:` / `advisory:` — a documented, intentional +# best-effort step (e.g. upstream-outage resilience). +# Bare, with neither, is a violation. ADVISORY by default +# (reported, non-blocking) per the estate "advisory first, +# tighten later" gating doctrine; pass --strict to make R4 block. +# +# Exit 0 = clean (or only advisory R4 findings in default mode). +# Exit 1 = blocking violations. +# Pure bash + grep/awk; no external deps. Run from a repo root. + +set -uo pipefail + +STRICT=0 +[ "${1:-}" = "--strict" ] && STRICT=1 + +FAMILY='just|must|trust|adjust|bust|dust|intend' +WF_DIR=".github/workflows" +r1=0 +r4=0 + +# --- R0: installed `just` must satisfy the import? floor (>= 1.19.0) -------- +# BLOCKING when `just` is present (this is the burble#39 invariant, and the +# execution-proof check an in-file guard structurally cannot do). Skipped, +# with a note, when `just` is absent (repo may not use it). +if command -v just >/dev/null 2>&1; then + jv=$(just --version 2>/dev/null | cut -d' ' -f2) + maj=${jv%%.*}; rest=${jv#*.}; min=${rest%%.*} + if [ -z "$jv" ] || ! { [ "${maj:-0}" -gt 1 ] 2>/dev/null || { [ "${maj:-0}" -eq 1 ] && [ "${min:-0}" -ge 19 ]; }; }; then + echo "::error::[R0] just ${jv:-?} < 1.19.0 — import? unsupported (Tooling Version Integrity Rule 1/3)" + echo "tooling-integrity-lint: FAIL — just below the import? floor" + exit 1 + fi + echo "tooling-integrity-lint: R0 OK — just $jv satisfies >= 1.19.0" +else + echo "tooling-integrity-lint: R0 skipped — just not on PATH" +fi + +[ -d "$WF_DIR" ] || { echo "tooling-integrity-lint: no $WF_DIR — nothing more to check"; exit 0; } + +while IFS= read -r -d '' wf; do + # --- R1: unversioned family-tool install (BLOCKING) ---------------------- + while IFS=: read -r lineno _; do + [ -n "${lineno:-}" ] || continue + echo "::error file=$wf,line=$lineno::[R1] unversioned family-tool install — pin 'tool: @' (Tooling Version Integrity Rule 1)" + r1=$((r1 + 1)) + done < <(grep -nE "^[[:space:]]*tool:[[:space:]]*(${FAMILY})[[:space:]]*$" "$wf" 2>/dev/null) + + # --- R4: unexplained continue-on-error (ADVISORY unless --strict) --------- + while IFS=: read -r lineno _; do + [ -n "${lineno:-}" ] || continue + echo "::warning file=$wf,line=$lineno::[R4] unexplained continue-on-error — add a 'GATE DEACTIVATED ' suppression block OR a 'by-design:'/'advisory:' rationale (Tooling Version Integrity Rule 4)" + r4=$((r4 + 1)) + done < <(awk ' + { line[NR] = $0 } + /^[[:space:]]*continue-on-error:[[:space:]]*true[[:space:]]*$/ { + ok = 0 + for (i = NR-1; i >= NR-12 && i >= 1; i--) { + if (line[i] ~ /GATE DEACTIVATED[[:space:]]+[0-9]{4}-[0-9]{2}-[0-9]{2}/) { ok = 1; break } + if (line[i] ~ /(by-design|advisory):/) { ok = 1; break } + } + if (!ok) print NR ":" + }' "$wf" 2>/dev/null) +done < <(find "$WF_DIR" -maxdepth 1 -type f \( -name '*.yml' -o -name '*.yaml' \) -print0 2>/dev/null) + +echo "tooling-integrity-lint: R1(blocking)=$r1 R4(soft-gate)=$r4 strict=$STRICT" + +if [ "$r1" -gt 0 ]; then + echo "tooling-integrity-lint: FAIL — $r1 unversioned family-tool install(s). See TOOLING-VERSION-INTEGRITY-POLICY.adoc Rule 1" + exit 1 +fi +if [ "$STRICT" -eq 1 ] && [ "$r4" -gt 0 ]; then + echo "tooling-integrity-lint: FAIL (--strict) — $r4 unexplained continue-on-error. See Rule 4" + exit 1 +fi +[ "$r4" -gt 0 ] && echo "tooling-integrity-lint: PASS with $r4 advisory R4 finding(s) (non-blocking; --strict to enforce)" +[ "$r4" -eq 0 ] && echo "tooling-integrity-lint: OK — no violations" +exit 0