From c490fd30ed5db9d1c5a559cfdbad399208d43cbe Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Wed, 13 May 2026 15:15:57 +0100 Subject: [PATCH 1/2] treat ?? as always-infix after a call arg has been collected The post-arg break in parse_call_or_atom allowed ?? through when looks_like_prefix_binary matched, since ?? is included in the prefix-binary scanner. With a 2+ atom tail (??ident ident ...) the scanner happily treated the chain as a prefix nil-coalesce, and the first call kept absorbing it as another argument. Chained 'mget m "a" ?? mget m "b" ?? 99' then failed verification with ILO-T006 arity mismatch. Once at least one argument has been collected, ?? is unambiguously the infix nil-coalesce on the call result. The first-token case is left alone so 'g ??c 7' still passes a prefix-?? sub-expression as g's arg. --- src/parser/mod.rs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 81c9ce9f..2cb5ede8 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -1870,10 +1870,16 @@ impl Parser { let arg_idx = args.len(); let in_fn_pos = self.is_fn_ref_position(&name, arg_idx); args.push(self.parse_call_arg(in_fn_pos)?); - // After each arg, if next is infix, stop + // After each arg, if next is infix, stop. `??` is always + // infix once we've already collected at least one arg — + // `f a ?? b` means `(f a) ?? b`, never `f a (??b ...)`. + // Without this, chained `f a ?? g b ?? d` mis-parses as + // `f a (?? g b) (?? d)` because the prefix-binary scanner + // sees `?? g b` as a valid prefix nil-coalesce form. if let Some(tok) = self.peek() && Self::is_infix_or_suffix_op(tok) - && !self.looks_like_prefix_binary(self.pos) + && (matches!(tok, Token::NilCoalesce) + || !self.looks_like_prefix_binary(self.pos)) { break; } From 466a76fa6e05be9631cc9c0bcc2a778ec03bbf40 Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Wed, 13 May 2026 15:16:02 +0100 Subject: [PATCH 2/2] cross-engine coverage for chained mget/at ?? defaults Extends regression_mget_default with chained 'mget m k1 ?? mget m k2 ?? d' across the first-hit, second-hit, both-miss, and no-default shapes, plus an 'at'-based pair to confirm the fix isn't mget-specific. Also drops the stale comment noting the chained form didn't parse. examples/chained-nilcoalesce.ilo demonstrates the now-correct behaviour for in-context learning and acts as a higher-level regression via tests/examples_engines.rs. --- examples/chained-nilcoalesce.ilo | 14 +++++++ tests/regression_mget_default.rs | 63 +++++++++++++++++++++++++++++--- 2 files changed, 72 insertions(+), 5 deletions(-) create mode 100644 examples/chained-nilcoalesce.ilo diff --git a/examples/chained-nilcoalesce.ilo b/examples/chained-nilcoalesce.ilo new file mode 100644 index 00000000..79a74918 --- /dev/null +++ b/examples/chained-nilcoalesce.ilo @@ -0,0 +1,14 @@ +-- Chained `??`: bare `f a ?? g b ?? d` parses as `(f a) ?? (g b) ?? d`. +-- The arity-aware call parser stops each call at its arg count, then +-- `??` falls out as left-associative infix. First non-nil wins. + +lookup k1:t k2:t>n;m=mset (mset mmap "a" 1) "b" 2;mget m k1 ?? mget m k2 ?? 99 + +-- run: lookup "a" "b" +-- out: 1 + +-- run: lookup "x" "b" +-- out: 2 + +-- run: lookup "x" "y" +-- out: 99 diff --git a/tests/regression_mget_default.rs b/tests/regression_mget_default.rs index 5450e1aa..0a425370 100644 --- a/tests/regression_mget_default.rs +++ b/tests/regression_mget_default.rs @@ -61,11 +61,29 @@ const PATHKEY_HIT: &str = r#"f>n;m=mset mmap "k" 5;ks=["k"];mget m ks.0 ?? 0"#; const CALLKEY_HIT: &str = r#"f>n;m=mset mmap "5" 7;mget m str 5 ?? 0"#; // Parenthesised key — defensive lower bound on the precedence. const PARENKEY_HIT: &str = r#"f>n;m=mset mmap "5" 7;mget m (str 5) ?? 0"#; -// Note: bare-chained `mget m "a" ?? mget m "b" ?? 99` does NOT parse -// today — the arity-aware call parser doesn't extend through a `??` -// boundary, so the second `mget` slurps `m "b" ?? 99` as three args. -// Bind each lookup into a temp first (`a=mget m "a";b=mget m "b";a??b??99`) -// or parenthesise. Logged as a separate adjacent finding. + +// --- chained `mget m k ?? mget m k2 ?? d` --- +// +// Earlier, the post-arg break in `parse_call_or_atom` let `??` through +// when followed by 2+ atoms (the prefix-binary lookahead matched), so +// `mget m "a" ?? mget m "b" ?? 99` parsed as `mget m "a" (?? mget m) (?? "b" ...)` +// and failed with ILO-T006 "expects 2 args, got 3". Now `??` is always +// infix once at least one call arg has been collected. + +// First lookup hits — short-circuits before second `mget` runs. +const CHAIN_FIRST_HIT: &str = r#"f>n;m=mset mmap "a" 1;mget m "a" ?? mget m "b" ?? 99"#; +// First miss, second hits. +const CHAIN_SECOND_HIT: &str = r#"f>n;m=mset mmap "b" 2;mget m "a" ?? mget m "b" ?? 99"#; +// Both miss — default wins. +const CHAIN_BOTH_MISS: &str = r#"f>n;m=mmap;mget m "a" ?? mget m "b" ?? 99"#; +// Two-element chain with no trailing default. +const CHAIN_NO_DEFAULT_HIT: &str = r#"f>O n;m=mset mmap "a" 1;mget m "a" ?? mget m "b""#; +const CHAIN_NO_DEFAULT_MISS: &str = r#"f>O n;m=mset mmap "b" 2;mget m "a" ?? mget m "b""#; +// `at` is another arity-2 builtin — confirm the fix isn't `mget`-specific. +// (Skip the out-of-bounds case: engines disagree on whether `at` returns nil +// or raises ILO-R004/ILO-R009, which is orthogonal to the parser fix.) +const CHAIN_AT_FIRST_HIT: &str = r#"f>n;xs=[10 20 30];at xs 0 ?? at xs 1 ?? 0"#; +const CHAIN_AT_SECOND_HIT: &str = r#"f>n;xs=[10 20 30];at xs 1 ?? at xs 2 ?? 0"#; fn check_all(engine: &str) { assert_eq!( @@ -113,6 +131,41 @@ fn check_all(engine: &str) { "7", "parenkey hit engine={engine}" ); + assert_eq!( + run_file(engine, CHAIN_FIRST_HIT, "f"), + "1", + "chain first hit engine={engine}" + ); + assert_eq!( + run_file(engine, CHAIN_SECOND_HIT, "f"), + "2", + "chain second hit engine={engine}" + ); + assert_eq!( + run_file(engine, CHAIN_BOTH_MISS, "f"), + "99", + "chain both miss engine={engine}" + ); + assert_eq!( + run_file(engine, CHAIN_NO_DEFAULT_HIT, "f"), + "1", + "chain no-default hit engine={engine}" + ); + assert_eq!( + run_file(engine, CHAIN_NO_DEFAULT_MISS, "f"), + "2", + "chain no-default miss engine={engine}" + ); + assert_eq!( + run_file(engine, CHAIN_AT_FIRST_HIT, "f"), + "10", + "chain at first hit engine={engine}" + ); + assert_eq!( + run_file(engine, CHAIN_AT_SECOND_HIT, "f"), + "20", + "chain at second hit engine={engine}" + ); } #[test]