From c7558a2dd366f796995d44ecb047f60aa76c441a Mon Sep 17 00:00:00 2001 From: hyperpolymath <6759885+hyperpolymath@users.noreply.github.com> Date: Wed, 20 May 2026 10:09:44 +0100 Subject: [PATCH] =?UTF-8?q?fix(abi-emit-manifest):=20skip=20GADT-style=20`?= =?UTF-8?q?data=20=E2=80=A6=20where`=20declarations?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `iseriser abi-emit-manifest` bombed with "data declaration has no `=`" on any Idris2 module that declared a GADT — e.g. boj-server's vordr-mcp `MonotonicDegradation` proof relation: data MonotonicDegradation : IntegrityState -> IntegrityState -> Type where StayHealthy : MonotonicDegradation Healthy Healthy ... These are proof / relation types, not exported enums, and have no place in the ABI manifest. The emitter must walk past them without choking the way it must already walk past records, interfaces, and function definitions. `collect_data_body` now detects GADT form (body starts with `:`, not `=`) and delegates to a new `skip_gadt_block` helper that consumes the type signature + indented constructor block. Empty variant lists were already silently dropped by `parse_enum_declarations` so no further plumbing is needed. Two unit tests added: a vordr-mcp shape (real ADT + sibling GADT + to-int mapping) and a degenerate signature-only GADT (`data Phantom : Type` with no `where`). Both pass; all 8 emitter tests + 9 integration tests stay green. Verified end-to-end against the real cartridge: $ iseriser abi-emit-manifest \ --idris …/vordr-mcp/abi/VordrMcp/SafeVordr.idr \ --cartridge vordr-mcp --out /tmp/v.json abi-emit-manifest: wrote /tmp/v.json (0 enums, 0 transitions) $ iseriser abi-verify --manifest /tmp/v.json --zig-ffi …/vordr_ffi.zig abi-verify: OK exit: 0 Refs hyperpolymath/standards#92 (Phase 2 allowlist expansion). Refs hyperpolymath/boj-server#111 (the cartridge that motivated the fix). Co-Authored-By: Claude Opus 4.7 (1M context) --- src/abi/idris_emitter.rs | 112 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 112 insertions(+) diff --git a/src/abi/idris_emitter.rs b/src/abi/idris_emitter.rs index ae7799a..13071cc 100644 --- a/src/abi/idris_emitter.rs +++ b/src/abi/idris_emitter.rs @@ -214,7 +214,23 @@ fn find_data_keyword(src: &str) -> Option { /// After the enum name, collect the variant list. Handles both /// `= A | B | C` (one-line) and `\n = A\n | B\n | C` (multi-line). /// Returns the variant names + the number of bytes consumed from `src`. +/// +/// Also handles GADT-style declarations (`data Foo : Type -> Type where …`) +/// by skipping them: they are proof/relation types, not exported enums, +/// and have no place in the ABI manifest. Returns an empty variant list +/// with the byte-count needed to walk past the GADT block. fn collect_data_body(src: &str) -> Result<(Vec, usize)> { + // GADT detection: the body starts with `:` (signature form), not `=` + // (variant-list form). Examples seen in the wild: + // `data MonotonicDegradation : State -> State -> Type where + // StayHealthy : MonotonicDegradation Healthy Healthy + // …` + // These declare proof relations, not data with variants. + let trimmed = src.trim_start(); + if trimmed.starts_with(':') { + return Ok((Vec::new(), skip_gadt_block(src))); + } + // Find the `=` that opens the variant list. let eq_idx = src .find('=') @@ -266,6 +282,51 @@ fn collect_data_body(src: &str) -> Result<(Vec, usize)> { Ok((variants, consumed)) } +/// Skip past a GADT-style `data Foo : … where …` declaration. The block +/// ends at the first non-indented, non-blank line (or end of input). The +/// declaration itself contributes nothing to the manifest. +fn skip_gadt_block(src: &str) -> usize { + // First: consume the type signature line(s) up to `where` (or end of + // declaration if there is no `where`, e.g. `data Empty : Type`). + let where_pos = src.find("where"); + let header_end = match where_pos { + Some(w) => { + // Consume through the rest of that line. + src[w..] + .find('\n') + .map(|i| w + i + 1) + .unwrap_or(src.len()) + } + None => { + // No `where` — single-line `data Foo : ...`. Consume through eol. + src.find('\n').map(|i| i + 1).unwrap_or(src.len()) + } + }; + + // No constructor block to walk if we already hit EOF. + if header_end >= src.len() { + return src.len(); + } + + // Now consume the indented constructor block (if any). + let mut consumed = header_end; + for line in src[header_end..].split_inclusive('\n') { + let trimmed = line.trim_end_matches('\n'); + // Blank lines are tolerated inside the block; they don't end it. + if trimmed.is_empty() { + consumed += line.len(); + continue; + } + // A line not starting with whitespace ends the GADT block. + let starts_with_ws = trimmed.starts_with(' ') || trimmed.starts_with('\t'); + if !starts_with_ws { + break; + } + consumed += line.len(); + } + consumed +} + /// Parse all ` = ` equations grouped by function name. /// Returns `fn_name → (variant_name → integer)`. Patterns of the form /// `(VariantName _)` are accepted (parameterised constructor); patterns @@ -540,4 +601,55 @@ fooToInt B = 1 let m = emit_from_idris_src(src, "demo", "Foo.idr").unwrap(); assert_eq!(m.enums[0].variants.len(), 2); } + + #[test] + fn skips_gadt_data_declaration() { + // vordr-mcp shape: a real ADT `IntegrityState` plus a GADT proof + // relation `MonotonicDegradation` over it. The GADT must be + // skipped — it has no `=`-form variant list — but the surrounding + // enum and the to-int mapping must still be picked up. + let src = " +data IntegrityState = Healthy | Drifted | Tampered | Unknown + +data MonotonicDegradation : IntegrityState -> IntegrityState -> Type where + StayHealthy : MonotonicDegradation Healthy Healthy + HealthyDrift : MonotonicDegradation Healthy Drifted + HealthyTamp : MonotonicDegradation Healthy Tampered + DriftedStay : MonotonicDegradation Drifted Drifted + DriftedTamp : MonotonicDegradation Drifted Tampered + TamperedStay : MonotonicDegradation Tampered Tampered + +stateToInt : IntegrityState -> Int +stateToInt Healthy = 0 +stateToInt Drifted = 1 +stateToInt Tampered = 2 +stateToInt Unknown = 3 +"; + let m = emit_from_idris_src(src, "vordr-mcp", "SafeVordr.idr").unwrap(); + // The GADT must have been skipped, not emitted as an enum. + assert_eq!(m.enums.len(), 1, "expected only IntegrityState in manifest"); + let e = &m.enums[0]; + assert_eq!(e.name, "IntegrityState"); + let names: Vec<&str> = e.variants.iter().map(|v| v.name.as_str()).collect(); + assert_eq!(names, vec!["Healthy", "Drifted", "Tampered", "Unknown"]); + assert_eq!(e.variants[3].value, 3); + } + + #[test] + fn skips_signature_only_gadt_data_declaration() { + // Degenerate GADT with no `where` block (single-line empty type) + // followed by a real ADT that must still be picked up. + let src = " +data Phantom : Type + +data Real = Yes | No + +realToInt : Real -> Int +realToInt Yes = 1 +realToInt No = 0 +"; + let m = emit_from_idris_src(src, "demo", "P.idr").unwrap(); + assert_eq!(m.enums.len(), 1); + assert_eq!(m.enums[0].name, "Real"); + } }