From b13cd37950d63081f3c3ea45d1daaebc60991918 Mon Sep 17 00:00:00 2001 From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com> Date: Wed, 27 May 2026 18:57:42 +0100 Subject: [PATCH] fix(sarif): suppress code_scanning_alerts (CSA00x) findings from SARIF output CSA001/CSA002/CSA003/CSA004 are lens rules that query GitHub for open code-scanning alerts. Uploading their findings as SARIF turns each one into a *new* code-scanning alert that the next scan re-observes -- the boj-server post-mortem in `code_scanning_alerts.ex` documents alerts 357-386 accumulated this way. A 2026-05-27 estate audit found this self-echo pattern is widespread: 7,724 self-referential alerts across 310 of 325 code-scan-enabled repos (39.6% of all 19,509 open alerts on the estate). The fetch-time filter in `code_scanning_alerts.ex:340` stops the loop within any single scanner instance, but does not help when the SARIF is uploaded by one runner and re-observed by the next. Filtering at SARIF render time closes the loop for good. Findings still flow through the Elixir pipeline (PatternAnalyzer, TriangleRouter, FleetDispatcher) -- only the public GitHub code-scanning surface is suppressed. Existing self-referential alerts on already-uploaded SARIFs will need to be bulk-dismissed as won't-fix in a follow-up sweep. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/hypatia/sarif.ex | 26 ++++++++++++++++++- test/sarif_test.exs | 62 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+), 1 deletion(-) diff --git a/lib/hypatia/sarif.ex b/lib/hypatia/sarif.ex index d7cd72b6..b4354db9 100644 --- a/lib/hypatia/sarif.ex +++ b/lib/hypatia/sarif.ex @@ -28,13 +28,32 @@ defmodule Hypatia.SARIF do @schema_uri "https://json.schemastore.org/sarif-2.1.0.json" @version "2.1.0" + # Rule modules whose findings are lenses over GitHub's own alert + # surfaces. Uploading them as SARIF turns each finding into a new + # code-scanning alert that the next scan re-observes (self-echo) -- + # the boj-server post-mortem in `code_scanning_alerts.ex` describes + # alerts 357-386 accumulated this way, and the 2026-05-27 estate + # audit found 7,724 such alerts across 310 repos. The fetch-time + # filter in `code_scanning_alerts.ex` stops the loop going forward + # for any single scanner instance, but does not help when the SARIF + # is uploaded by one runner and re-observed by the next. Filtering + # at SARIF render time closes the loop for good. + # + # Findings still flow through the Elixir pipeline (PatternAnalyzer, + # TriangleRouter, FleetDispatcher) -- only the public GitHub surface + # is suppressed. + @meta_rule_modules ~w(code_scanning_alerts) + @doc """ Build a complete SARIF document from a finding list. Pass `repo_root` if findings carry absolute paths so they can be relativised; defaults to the CWD which is appropriate when called from a scan rooted there. """ def from_findings(findings, repo_root \\ File.cwd!()) do - {results, rules} = build_results_and_rules(findings, repo_root) + {results, rules} = + findings + |> Enum.reject(&meta_rule_finding?/1) + |> build_results_and_rules(repo_root) %{ "$schema" => @schema_uri, @@ -66,6 +85,11 @@ defmodule Hypatia.SARIF do # ─── Internals ───────────────────────────────────────────────────────── + defp meta_rule_finding?(finding) do + mod = Map.get(finding, :rule_module) || Map.get(finding, "rule_module") + stringify(mod) in @meta_rule_modules + end + defp build_results_and_rules(findings, repo_root) do Enum.reduce(findings, {[], %{}}, fn finding, {results, rules} -> mod = stringify(Map.get(finding, :rule_module) || Map.get(finding, "rule_module") || "hypatia") diff --git a/test/sarif_test.exs b/test/sarif_test.exs index 3f265f1b..79debca0 100644 --- a/test/sarif_test.exs +++ b/test/sarif_test.exs @@ -141,6 +141,68 @@ defmodule Hypatia.SARIFTest do end end + describe "meta-rule suppression" do + test "code_scanning_alerts findings are excluded from SARIF output" do + findings = [ + %{ + severity: "high", + rule_module: "code_scanning_alerts", + type: "CSA001", + file: ".github/workflows/governance.yml", + reason: "Code scanning (Hypatia): hypatia/workflow_audit/missing_workflow ..." + }, + %{ + severity: "high", + rule_module: "workflow_audit", + type: "missing_workflow", + file: ".github/workflows/scorecard.yml", + reason: "scorecard workflow missing" + } + ] + + [run] = SARIF.from_findings(findings, "/tmp") |> Map.fetch!("runs") + results = run["results"] + rules = run["tool"]["driver"]["rules"] + + assert length(results) == 1 + assert hd(results)["ruleId"] == "hypatia/workflow_audit/missing_workflow" + + refute Enum.any?(rules, fn r -> + String.starts_with?(r["id"], "hypatia/code_scanning_alerts/") + end) + end + + test "string-keyed rule_module is also recognised as meta" do + findings = [ + %{ + "severity" => "medium", + "rule_module" => "code_scanning_alerts", + "type" => "CSA003", + "file" => "x.yml", + "reason" => "stale" + } + ] + + [run] = SARIF.from_findings(findings, "/tmp") |> Map.fetch!("runs") + assert run["results"] == [] + end + + test "atom rule_module is also recognised as meta" do + findings = [ + %{ + severity: "medium", + rule_module: :code_scanning_alerts, + type: "CSA002", + file: "repo", + reason: "summary" + } + ] + + [run] = SARIF.from_findings(findings, "/tmp") |> Map.fetch!("runs") + assert run["results"] == [] + end + end + describe "fingerprints" do test "same {ruleId, uri, type, reason} produces same fingerprint" do f = %{severity: "high", rule_module: "x", type: "y", file: "a.ex", reason: "r"}