From 0821b94cc3d56cb59459002f5cb786da712066de Mon Sep 17 00:00:00 2001 From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com> Date: Wed, 27 May 2026 14:26:21 +0100 Subject: [PATCH] feat(query): (crosslang ...) + (since ...) forms (issue #33 S3b+c) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds two query forms to the panic-attack query subcommand: 1. (since 2026-04-12) — match findings whose first_seen_run (when ISO- formatted) or hexad created_at is >= the cutoff. RFC-3339 strings sort lexicographically, so a string >= compare is exact for any well-formed timestamp. Combined with `and`, expresses the "(diff :since DATE :category CAT)" use case the issue calls out: (and (since 2026-04-12) (category CryptoMisuse)) 2. (crosslang :from FROM_CAT :to TO_CAT) — co-occurrence proxy for FFI/cross-language reachability. Matches a TO-category finding in a repo that also has at least one FROM-category finding. Most FFI-driven proof drift surfaces in the same repo, so this is the operationally useful approximation until kanren-crosslang facts are persisted as hexads (a later slice). Both keyword and positional shapes accepted: (crosslang :from UnsafeFFI :to ProofDrift) (crosslang UnsafeFFI ProofDrift) Implementation: - New Query variants Since(String) and Crosslang { from, to }. - Parser extended with `since` and `crosslang` heads; crosslang accepts both positional and `:from`/`:to` keyword forms. - Evaluator's load_context now also builds a repo→categories index so (crosslang ...) is O(1) per row. Tests: 9 new (5 parser positive + 2 rejection cases, 2 crosslang evaluator, 1 since evaluator). Lib total: 248 green. Clippy clean with -D warnings. Refs #33. Stacked on #57 (S3) — diff against main includes the S1+S2+S3 changes until they land. Co-Authored-By: Claude Opus 4.7 (1M context) --- src/query/mod.rs | 247 ++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 224 insertions(+), 23 deletions(-) diff --git a/src/query/mod.rs b/src/query/mod.rs index 7496e5b..9381f75 100644 --- a/src/query/mod.rs +++ b/src/query/mod.rs @@ -7,7 +7,7 @@ //! (issue #33 S2). Returns a list of `FindingHit`s the caller can render //! as a table or JSON. //! -//! ## Supported forms (S3 initial) +//! ## Supported forms //! //! ```text //! (category UnsafeCode) @@ -16,25 +16,34 @@ //! (repo ) //! (file ) //! (pr-state open | pr-filed | pr-merged | pr-closed | dismissed | nil) +//! (since 2026-04-12) ; or (since "2026-04-12T00:00:00Z") +//! (crosslang :from FFI :to ProofDrift) +//! (crosslang FFI ProofDrift) ; positional shorthand //! (and ...) //! (or ...) //! (not ) //! ``` //! -//! ## Deferred to S3 follow-ups +//! ## Semantic notes //! -//! - `(crosslang :from FFI :to ProofDrift)` — relational chain over the -//! kanren cross-language fact base. Needs an integration with -//! `src/kanren/crosslang.rs` that runs *after* the persistence layer -//! is settled in S1/S2/S3 initial. -//! - `(diff :since 2026-04-12 :category PA022)` — temporal slicing by -//! run id. Requires an explicit "since" cursor in the hexad store -//! beyond `created_at` (e.g. a "baseline run id" marker). +//! - `(since ...)` compares the finding's `first_seen_run` (when it +//! parses as ISO-8601) or its hexad `created_at` against the cutoff +//! lexicographically. RFC-3339 / ISO-8601 strings sort correctly under +//! string comparison, which is what we use. //! -//! The initial form is enough to express the operational queries the -//! estate-sweep campaign actually needs day-to-day: "all PA001 of -//! Critical severity that don't have an open PR yet", "all dismissed -//! findings in repo foo", etc. +//! - `(crosslang :from X :to Y)` is a **co-occurrence proxy** for +//! FFI/cross-language reachability: it matches a `Y`-category finding +//! in a repo that also has at least one `X`-category finding. This is +//! the operationally useful case for the estate sweep — most +//! FFI-driven proof drift surfaces in the same repository. A future +//! slice will persist `kanren::crosslang` derived facts as hexads +//! and tighten this to true reachability over the FFI boundary graph. +//! +//! ## Deferred to later follow-ups +//! +//! - True kanren-derived `(crosslang ...)` evaluation backed by +//! persisted FFI-boundary facts (rather than the current +//! co-occurrence proxy). use crate::storage::{ load_campaign_hexads, load_finding_hexads, CampaignSemantic, FindingSemantic, @@ -63,6 +72,20 @@ pub enum Query { File(String), /// Match by campaign state. `None` means "no campaign hexad yet". PrState(Option), + /// `(crosslang :from FROM_CAT :to TO_CAT)` — match a `TO_CAT` finding + /// in a repo that also has at least one `FROM_CAT` finding. + /// + /// Co-occurrence proxy: until kanren-derived cross-language facts are + /// persisted as hexads (S3b follow-up), "the FROM finding is reachable + /// from the TO finding" is approximated by "they live in the same + /// repository", which is the operationally-useful case for the estate + /// sweep — most FFI-driven proof drift surfaces in the same repo. + Crosslang { from: String, to: String }, + /// Match by ISO-8601 / RFC-3339 first-seen timestamp ≥ `since`. + /// Filed under the `(since ...)` keyword for compactness; combined + /// with `(and (category ...) (since ...))` gives the "what's new + /// since DATE" diff query the issue calls out. + Since(String), /// Conjunction. And(Vec), /// Disjunction. @@ -259,6 +282,52 @@ fn parse_form(tokens: &[Token], cursor: &mut usize) -> Result { close_paren(tokens, cursor)?; Ok(Query::Not(Box::new(child))) } + "since" => { + let v = parse_value(tokens, cursor)?; + close_paren(tokens, cursor)?; + Ok(Query::Since(v)) + } + "crosslang" => { + // Two accepted shapes: + // (crosslang FROM TO) — positional + // (crosslang :from FROM :to TO) — keyword + // First token decides which. + let mut from: Option = None; + let mut to: Option = None; + loop { + match tokens.get(*cursor) { + Some(Token::RParen) => { + *cursor += 1; + break; + } + Some(Token::Atom(a)) if a.starts_with(':') => { + let kw = a[1..].to_ascii_lowercase(); + *cursor += 1; + let v = parse_value(tokens, cursor)?; + match kw.as_str() { + "from" => from = Some(v), + "to" => to = Some(v), + other => bail!("unknown crosslang keyword: :{}", other), + } + } + Some(_) => { + // Positional fallback — `from` first, then `to`. + let v = parse_value(tokens, cursor)?; + if from.is_none() { + from = Some(v); + } else if to.is_none() { + to = Some(v); + } else { + bail!("too many positional args to crosslang"); + } + } + None => bail!("missing ')' in crosslang"), + } + } + let from = from.ok_or_else(|| anyhow!("crosslang missing :from"))?; + let to = to.ok_or_else(|| anyhow!("crosslang missing :to"))?; + Ok(Query::Crosslang { from, to }) + } other => bail!("unknown query head: {}", other), } } @@ -321,14 +390,24 @@ fn close_paren(tokens: &[Token], cursor: &mut usize) -> Result<()> { struct FindingRow { finding: FindingSemantic, campaign: Option, + /// `created_at` of the finding hexad — used by `(since ...)`. + created_at: String, +} + +/// Index from repo name → set of category Debug-names present in that +/// repo. Used by `(crosslang ...)` to check co-occurrence. +type RepoCategoryIndex = HashMap>; + +struct EvalContext { + rows: Vec, + repo_categories: RepoCategoryIndex, } -fn load_rows(base_dir: &Path) -> Result> { +fn load_context(base_dir: &Path) -> Result { let finding_hexads = load_finding_hexads(base_dir)?; let mut campaign_hexads = load_campaign_hexads(base_dir)?; campaign_hexads.sort_by(|a, b| a.created_at.cmp(&b.created_at)); - // Latest campaign event wins per finding_id. let mut latest: HashMap = HashMap::new(); for h in campaign_hexads { if let Some(c) = h.semantic.campaign { @@ -337,19 +416,29 @@ fn load_rows(base_dir: &Path) -> Result> { } let mut rows = Vec::new(); + let mut repo_categories: RepoCategoryIndex = HashMap::new(); for h in finding_hexads { + let created_at = h.created_at.clone(); if let Some(f) = h.semantic.finding { + repo_categories + .entry(f.repo_name.to_ascii_lowercase()) + .or_default() + .insert(f.category.to_ascii_lowercase()); let campaign = latest.get(&f.finding_id).cloned(); rows.push(FindingRow { finding: f, campaign, + created_at, }); } } - Ok(rows) + Ok(EvalContext { + rows, + repo_categories, + }) } -fn matches(query: &Query, row: &FindingRow) -> bool { +fn matches(query: &Query, row: &FindingRow, ctx: &EvalContext) -> bool { match query { Query::Category(target) => row.finding.category.eq_ignore_ascii_case(target), Query::RuleId(target) => row.finding.rule_id.eq_ignore_ascii_case(target), @@ -369,19 +458,46 @@ fn matches(query: &Query, row: &FindingRow) -> bool { (Some(want), Some(c)) => c.state.eq_ignore_ascii_case(want), _ => false, }, - Query::And(children) => children.iter().all(|c| matches(c, row)), - Query::Or(children) => children.iter().any(|c| matches(c, row)), - Query::Not(inner) => !matches(inner, row), + Query::Since(since) => { + // ISO-8601 / RFC-3339 timestamps sort lexicographically when + // the format is well-formed. Falls back to `>=` string compare + // against either the finding hexad's created_at or the + // first_seen_run if it parses as a timestamp. + let candidate = if !row.finding.first_seen_run.is_empty() + && row.finding.first_seen_run.contains('T') + { + row.finding.first_seen_run.as_str() + } else { + row.created_at.as_str() + }; + candidate >= since.as_str() + } + Query::Crosslang { from, to } => { + // `to`-matching finding in a repo that also has at least one + // `from`-category finding. The current finding must be the + // `to` side (so callers can wrap with `and`/`or`). + if !row.finding.category.eq_ignore_ascii_case(to) { + return false; + } + let from_lower = from.to_ascii_lowercase(); + ctx.repo_categories + .get(&row.finding.repo_name.to_ascii_lowercase()) + .map(|cats| cats.contains(&from_lower)) + .unwrap_or(false) + } + Query::And(children) => children.iter().all(|c| matches(c, row, ctx)), + Query::Or(children) => children.iter().any(|c| matches(c, row, ctx)), + Query::Not(inner) => !matches(inner, row, ctx), } } /// Execute a query against the persisted hexad store and return all /// matching findings. pub fn run(query: &Query, base_dir: &Path) -> Result> { - let rows = load_rows(base_dir)?; + let ctx = load_context(base_dir)?; let mut hits = Vec::new(); - for row in rows { - if matches(query, &row) { + for row in &ctx.rows { + if matches(query, row, &ctx) { hits.push(FindingHit { finding_id: row.finding.finding_id.clone(), repo_name: row.finding.repo_name.clone(), @@ -689,6 +805,91 @@ mod tests { ); } + #[test] + fn parse_since_atom() { + let q = parse("(since 2026-04-12)").unwrap(); + assert_eq!(q, Query::Since("2026-04-12".to_string())); + } + + #[test] + fn parse_since_quoted() { + let q = parse("(since \"2026-04-12T00:00:00Z\")").unwrap(); + assert_eq!(q, Query::Since("2026-04-12T00:00:00Z".to_string())); + } + + #[test] + fn parse_crosslang_keyword_form() { + let q = parse("(crosslang :from UnsafeFFI :to ProofDrift)").unwrap(); + assert_eq!( + q, + Query::Crosslang { + from: "UnsafeFFI".to_string(), + to: "ProofDrift".to_string(), + } + ); + } + + #[test] + fn parse_crosslang_positional_form() { + let q = parse("(crosslang UnsafeFFI ProofDrift)").unwrap(); + assert_eq!( + q, + Query::Crosslang { + from: "UnsafeFFI".to_string(), + to: "ProofDrift".to_string(), + } + ); + } + + #[test] + fn parse_crosslang_missing_keyword_errors() { + assert!(parse("(crosslang :from UnsafeFFI)").is_err()); + assert!(parse("(crosslang :to ProofDrift)").is_err()); + } + + #[test] + fn parse_crosslang_unknown_keyword_errors() { + assert!(parse("(crosslang :bogus UnsafeFFI :to ProofDrift)").is_err()); + } + + #[test] + fn run_since_filters_old_findings() { + let dir = tempdir().unwrap(); + write_test_findings(dir.path()); + // All test fixtures stamp first_seen_run with a hexad-id that + // does not look like an ISO timestamp; fallback is the hexad's + // created_at, which is "now". So (since 2099) returns nothing. + let q_future = parse("(since 2099-01-01)").unwrap(); + assert!(run(&q_future, dir.path()).unwrap().is_empty()); + // Conversely (since 2000) returns everything. + let q_past = parse("(since 2000-01-01)").unwrap(); + assert_eq!(run(&q_past, dir.path()).unwrap().len(), 3); + } + + #[test] + fn run_crosslang_matches_co_occurrence() { + let dir = tempdir().unwrap(); + write_test_findings(dir.path()); + // Test fixture: repo "alpha" has UnsafeCode + CryptoMisuse. + // (crosslang :from UnsafeCode :to CryptoMisuse) should match + // the CryptoMisuse finding in alpha. + let q = parse("(crosslang :from UnsafeCode :to CryptoMisuse)").unwrap(); + let hits = run(&q, dir.path()).unwrap(); + assert_eq!(hits.len(), 1); + assert_eq!(hits[0].repo_name, "alpha"); + assert_eq!(hits[0].category, "CryptoMisuse"); + } + + #[test] + fn run_crosslang_excludes_missing_source() { + let dir = tempdir().unwrap(); + write_test_findings(dir.path()); + // Test fixture: no PanicPath finding anywhere. So + // (crosslang :from PanicPath :to UnsafeCode) finds nothing. + let q = parse("(crosslang :from PanicPath :to UnsafeCode)").unwrap(); + assert!(run(&q, dir.path()).unwrap().is_empty()); + } + #[test] fn render_table_empty() { let s = render_table(&[]);