Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 6 additions & 19 deletions .github/workflows/build-gossamer-gui.yml
Original file line number Diff line number Diff line change
Expand Up @@ -73,10 +73,11 @@ jobs:
exit 1
fi

- name: Install jsonschema
- name: Build ci-tools (panll-harness validator)
run: |
set -euo pipefail
python3 -m pip install --user --quiet jsonschema
cargo build --release --manifest-path scripts/ci-tools/Cargo.toml \
--bin validate-panll-harness

- name: Fetch panll-harness v2 schema
run: |
Expand All @@ -90,23 +91,9 @@ jobs:
- name: Validate panll.harness.toml against panll-harness/v2
run: |
set -euo pipefail
python3 - <<'PY'
import json, sys, tomllib
from jsonschema import Draft202012Validator, validate

with open("src/ui/gossamer/panll.harness.toml", "rb") as fh:
data = tomllib.load(fh)
with open("/tmp/schemas/panll-harness-v2.schema.json") as fh:
schema = json.load(fh)

Draft202012Validator.check_schema(schema)
try:
validate(instance=data, schema=schema)
except Exception as exc:
print(f"::error::panll.harness.toml fails panll-harness/v2 validation: {exc}")
sys.exit(1)
print("OK panll.harness.toml validates against panll-harness/v2")
PY
./scripts/ci-tools/target/release/validate-panll-harness \
src/ui/gossamer/panll.harness.toml \
/tmp/schemas/panll-harness-v2.schema.json

loader-smoke:
name: Gossamer loader smoke harness
Expand Down
24 changes: 3 additions & 21 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -225,27 +225,9 @@ jobs:
- name: Parse k9iser.toml + verify declared paths exist
run: |
set -euo pipefail
python3 - <<'PY'
import tomllib, sys, pathlib

with open("k9iser.toml", "rb") as fh:
data = tomllib.load(fh)

missing = []
for src in data.get("source", []):
p = pathlib.Path(src["path"])
if not p.exists():
missing.append(src["path"])

if missing:
for m in missing:
print(f"::error::k9iser.toml declares missing source {m}")
sys.exit(1)

n = len(data.get("source", []))
c = len(data.get("constraint", []))
print(f"OK k9iser.toml parses — {n} source(s), {c} constraint(s)")
PY
cargo build --release --manifest-path scripts/ci-tools/Cargo.toml \
--bin check-k9iser-paths
./scripts/ci-tools/target/release/check-k9iser-paths k9iser.toml

- name: Run k9iser build (if CLI available)
run: |
Expand Down
6 changes: 4 additions & 2 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -647,7 +647,8 @@ jobs:
if: ${{ github.event.inputs.mode != 'regenerate-baseline' }}
run: |
set -euo pipefail
python3 scripts/check-bench-regression.py \
cargo build --release --manifest-path scripts/bench-tools/Cargo.toml
./scripts/bench-tools/target/release/check-bench-regression \
bench-output.txt \
.machine_readable/benchmarks/baselines.json \
| tee -a "$GITHUB_STEP_SUMMARY"
Expand All @@ -656,7 +657,8 @@ jobs:
if: ${{ github.event.inputs.mode == 'regenerate-baseline' }}
run: |
set -euo pipefail
python3 scripts/update-bench-baselines.py \
cargo build --release --manifest-path scripts/bench-tools/Cargo.toml
./scripts/bench-tools/target/release/update-bench-baselines \
bench-output.txt \
.machine_readable/benchmarks/baselines.json
echo "## Regenerated baseline" >> "$GITHUB_STEP_SUMMARY"
Expand Down
14 changes: 0 additions & 14 deletions .hypatia-baseline.json
Original file line number Diff line number Diff line change
@@ -1,18 +1,4 @@
[
{
"severity": "critical",
"rule_module": "cicd_rules",
"type": "banned_language_file",
"file": "scripts/check-bench-regression.py",
"action": "flag"
},
{
"severity": "critical",
"rule_module": "cicd_rules",
"type": "banned_language_file",
"file": "scripts/update-bench-baselines.py",
"action": "flag"
},
{
"severity": "critical",
"rule_module": "code_safety",
Expand Down
2 changes: 0 additions & 2 deletions .hypatia-exemptions.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,6 @@ already placed at each file's site.

| File | Rule | Inline marker | Rationale | Revisit when |
|---|---|---|---|---|
| `scripts/update-bench-baselines.py` | `cicd_rules/banned_language_file` | `# hypatia:ignore cicd_rules/banned_language_file` (line 3) | Parses criterion's bencher-format output; criterion's tooling assumes Python downstream. | A maintained Rust/shell parser exists for criterion bencher format. |
| `scripts/check-bench-regression.py` | `cicd_rules/banned_language_file` | `# hypatia:ignore cicd_rules/banned_language_file` (line 3) | Pair of the above. | Same. |
| `src/abi/RuleEngine.idr` | `code_safety/believe_me`, `structural_drift/SD008` | `-- hypatia:ignore code_safety/believe_me structural_drift/SD008` (line 19) | The scanner is counting the literal token `believe_me` inside an Idris2 comment that asserts there are *no* such primitives. There is no actual `believe_me` call site in the module. | The scanner learns to skip comment lines (token vs syntactic match). |

## Audit-training and remediation-script corpora
Expand Down
15 changes: 5 additions & 10 deletions .hypatia-ignore
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,8 @@
#
# This file is for exemptions that span a whole file or directory.

# ─── Python bench helpers ───────────────────────────────────────────────
#
# Scoped exemption — RSR org policy bans Python except SaltStack. These two
# scripts are bench-data helpers used only by .github/workflows/bench.yml;
# they parse criterion output and update baseline JSON. Rust/Julia port is
# tracked but not blocking. Until the port lands, suppress the
# banned_language_file finding on these two specific paths so the gate
# treats them as a known, documented carve-out rather than baseline noise.
cicd_rules/banned_language_file:scripts/check-bench-regression.py
cicd_rules/banned_language_file:scripts/update-bench-baselines.py
# ─── Python bench helpers — REVOKED ─────────────────────────────────────
#
# The two scripts/*.py bench helpers were removed and re-implemented in
# Rust (scripts/bench-tools/). The org Python ban is now total with no
# exceptions, so this carve-out is deleted rather than carried.
3 changes: 2 additions & 1 deletion .machine_readable/benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ cargo bench --bench hypatia_bench -- \
| tee /tmp/bench.txt

# Parse the output and update baselines.json:
python3 scripts/update-bench-baselines.py /tmp/bench.txt \
cargo build --release --manifest-path scripts/bench-tools/Cargo.toml
./scripts/bench-tools/target/release/update-bench-baselines /tmp/bench.txt \
.machine_readable/benchmarks/baselines.json
```

Expand Down
7 changes: 7 additions & 0 deletions scripts/bench-tools/Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 30 additions & 0 deletions scripts/bench-tools/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# SPDX-License-Identifier: PMPL-1.0-or-later
#
# Standalone bench-data tooling — deliberately NOT a workspace member so it
# never perturbs the main build / proof gates, and zero-dependency so CI
# needs no crates.io fetch. Replaces the former scripts/*.py (org policy
# bans Python outside SaltStack; see standards Explicit-Escape Principle).
# Empty table: keep this crate out of the repo's main Cargo workspace so it
# never perturbs the main build / proof gates.
[workspace]

[package]
name = "bench-tools"
version = "0.1.0"
edition = "2021"
license = "PMPL-1.0-or-later"
publish = false

[lib]
path = "src/lib.rs"

[[bin]]
name = "check-bench-regression"
path = "src/bin/check-bench-regression.rs"

[[bin]]
name = "update-bench-baselines"
path = "src/bin/update-bench-baselines.rs"

[profile.release]
opt-level = 1
157 changes: 157 additions & 0 deletions scripts/bench-tools/src/bin/check-bench-regression.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,157 @@
// SPDX-License-Identifier: PMPL-1.0-or-later
//
// check-bench-regression — compare a criterion bencher run against
// .machine_readable/benchmarks/baselines.json and fail if any benchmark
// regressed by more than the configured threshold. A faithful Rust port of
// the former scripts/check-bench-regression.py (org policy bans Python
// outside SaltStack). Pairs with update-bench-baselines.
//
// Usage:
// check-bench-regression <bencher-output> <baselines.json>
//
// Exit status: 0 = no regressions over threshold (or no baselines yet),
// 1 = at least one regression, 2 = usage / file error.
//
// Markdown summary -> stdout (for $GITHUB_STEP_SUMMARY); `::error::`
// annotations -> stderr.

use bench_tools::{fmt_ns, parse_bencher_output, parse_json, Json};
use std::process::exit;

fn main() {
let argv: Vec<String> = std::env::args().collect();
if argv.len() != 3 {
eprintln!("usage: check-bench-regression <bencher-output> <baselines.json>");
exit(2);
}
let current_path = &argv[1];
let baselines_path = &argv[2];

let current_text = match std::fs::read_to_string(current_path) {
Ok(t) => t,
Err(_) => {
eprintln!("error: {current_path} missing");
exit(2);
}
};

let mut current = parse_bencher_output(&current_text);
current.sort_by(|a, b| a.0.cmp(&b.0)); // Python iterates `sorted(current.items())`

if current.is_empty() {
println!(
"::warning::no bench lines parsed from current run \u{2014} \
did criterion use --output-format bencher?"
);
exit(0);
}

let baseline_doc: Json = match std::fs::read_to_string(baselines_path) {
Ok(t) => match parse_json(&t) {
Ok(v) => v,
Err(_) => {
println!(
"::warning::{baselines_path} is not valid JSON; \
treating as empty baseline"
);
Json::Obj(vec![])
}
},
Err(_) => Json::Obj(vec![]),
};

let baselines: Vec<(String, f64)> = match baseline_doc.get("baselines") {
Some(Json::Obj(p)) => p
.iter()
.filter_map(|(k, v)| v.as_f64().map(|n| (k.clone(), n)))
.collect(),
_ => vec![],
};
let lookup = |name: &str| baselines.iter().find(|(k, _)| k == name).map(|(_, v)| *v);

let threshold_pct = baseline_doc
.get("_regression_threshold_pct")
.and_then(|v| v.as_f64())
.unwrap_or(50.0);

if baselines.is_empty() {
println!("## Benchmark run (advisory mode \u{2014} no baselines yet)");
println!();
println!("| Benchmark | Current |");
println!("|-----------|---------|");
for (name, ns) in &current {
println!("| `{name}` | {} |", fmt_ns(*ns));
}
println!();
println!(
"_No entries in `baselines.json` yet \u{2014} see \
`.machine_readable/benchmarks/README.md` for how to seed them._"
);
exit(0);
}

let mut regressions: Vec<(String, i64, i64, f64)> = vec![];
let mut rows: Vec<(String, String, String, String, String)> = vec![];

for (name, ns_now) in &current {
let ns_now = *ns_now;
match lookup(name) {
None => rows.push((
name.clone(),
fmt_ns(ns_now),
"\u{2014}".into(),
"new".into(),
"\u{2728}".into(),
)),
Some(ns_base) => {
let pct = if ns_base != 0.0 {
(ns_now as f64 - ns_base) / ns_base * 100.0
} else {
0.0
};
let mut verdict = "\u{2705}";
if pct > threshold_pct {
verdict = "\u{274c}";
regressions.push((name.clone(), ns_base as i64, ns_now, pct));
} else if pct > threshold_pct / 2.0 {
verdict = "\u{26a0}\u{fe0f}";
} else if pct < -10.0 {
verdict = "\u{1f680}";
}
rows.push((
name.clone(),
fmt_ns(ns_now),
fmt_ns(ns_base as i64),
format!("{pct:+.1}%"),
verdict.into(),
));
}
}
}

println!("## Benchmark comparison");
println!();
println!("Threshold: regression > **{threshold_pct:.0}%** fails CI.");
println!();
println!("| Benchmark | Current | Baseline | \u{0394} | |");
println!("|-----------|---------|----------|---|---|");
for (a, b, c, d, e) in &rows {
println!("| `{a}` | {b} | {c} | {d} | {e} |");
}
println!();

if !regressions.is_empty() {
println!("### Regressions exceeding threshold");
println!();
for (name, ns_base, ns_now, pct) in &regressions {
let msg = format!(
"{name}: {} \u{2192} {} ({pct:+.1}%, threshold {threshold_pct:.0}%)",
fmt_ns(*ns_base),
fmt_ns(*ns_now),
);
println!("- {msg}");
eprintln!("::error::benchmark regression: {msg}");
}
exit(1);
}
}
Loading