From 58f4dc58f3c493a4eda40621f6bfcc2019939a7c Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Mon, 11 May 2026 21:14:19 +0100 Subject: [PATCH] parser: stop body loop slurping sibling fn decls tests + example covering sibling helper case. --- examples/sibling-fns.ilo | 18 +++ src/parser/mod.rs | 46 +++++++- tests/regression_sibling_helper_slurp.rs | 134 +++++++++++++++++++++++ 3 files changed, 197 insertions(+), 1 deletion(-) create mode 100644 examples/sibling-fns.ilo create mode 100644 tests/regression_sibling_helper_slurp.rs diff --git a/examples/sibling-fns.ilo b/examples/sibling-fns.ilo new file mode 100644 index 00000000..973989cd --- /dev/null +++ b/examples/sibling-fns.ilo @@ -0,0 +1,18 @@ +-- Sibling functions: a non-last function can end with a bare call. +-- The parser detects that the next tokens form a fn-decl header and +-- terminates the current body cleanly — no parentheses needed. + +-- ends in a bare call to `*` +dbl x:n>n;*x 2 + +-- adjacent fn declaration that previously got slurped as an arg to `*` +sq x:n>n;*x x + +main>n;a=dbl 3;b=sq 4;+a b + +-- run: main +-- out: 22 +-- run: dbl 21 +-- out: 42 +-- run: sq 5 +-- out: 25 diff --git a/src/parser/mod.rs b/src/parser/mod.rs index 55c7ba64..fdd765e7 100644 --- a/src/parser/mod.rs +++ b/src/parser/mod.rs @@ -211,6 +211,37 @@ impl Parser { } } + /// Stricter variant of `is_fn_decl_start` used at top-level body boundaries + /// to disambiguate fn declarations from record construction. A real fn decl + /// always has `>` followed by a return type before the body's first `;`, + /// while a record `Outer a:1 b:2` never has a `>` before its terminator. + /// Returns true only when a `>` is visible before the next `;`/`}`/`{`/EOF + /// at the same bracket depth. + fn is_fn_decl_start_strict(&self, pos: usize) -> bool { + if !self.is_fn_decl_start(pos) { + return false; + } + // Fast path: `Ident >` is unambiguous in body position because a leading + // `name>` statement is not legal here (no expression starts with a bare + // identifier followed by `>` in a way that doesn't look like a fn decl + // header). Even `a > b` would only appear after a `;`, but it has no + // following `;type;` shape — but we still want to confirm by scanning. + let mut i = pos + 1; + let mut depth: i32 = 0; + while let Some(tok) = self.token_at(i) { + match tok { + Token::LParen | Token::LBracket | Token::LBrace => depth += 1, + Token::RParen | Token::RBracket => depth -= 1, + _ if depth > 0 => {} + Token::Greater if depth == 0 => return true, + Token::Semi | Token::RBrace => return false, + _ => {} + } + i += 1; + } + false + } + /// Advance past tokens until we reach what looks like the start of the next /// declaration (or EOF). Returns the span of the last token consumed. /// Tracks brace depth so nested `{…}` blocks are skipped atomically. @@ -509,7 +540,7 @@ impl Parser { if self.peek() == Some(&Token::Semi) { self.advance(); } - let body = self.parse_body()?; + let body = self.parse_body_with(true)?; let end = self.prev_span(); Ok(Decl::Function { name, @@ -675,6 +706,16 @@ impl Parser { /// Parse a semicolon-separated body, wrapping each statement with its source span. fn parse_body(&mut self) -> Result>> { + self.parse_body_with(false) + } + + /// Parse a semicolon-separated body. When `top_level` is true, the body + /// also terminates if the tokens after a `;` look like the start of the + /// next top-level function declaration. This closes the "sibling helper + /// slurp" trap where a body's final bare call would otherwise consume the + /// next function's name as an argument (and the trailing `>type;` would + /// then be parsed as a comparison, hiding the boundary). + fn parse_body_with(&mut self, top_level: bool) -> Result>> { let mut stmts = Vec::new(); if !self.at_body_end() { let span_start = self.peek_span(); @@ -688,6 +729,9 @@ impl Parser { if self.at_body_end() { break; } + if top_level && self.is_fn_decl_start_strict(self.pos) { + break; + } let span_start = self.peek_span(); let stmt = self.parse_stmt()?; stmts.push(Spanned { diff --git a/tests/regression_sibling_helper_slurp.rs b/tests/regression_sibling_helper_slurp.rs new file mode 100644 index 00000000..0d11f353 --- /dev/null +++ b/tests/regression_sibling_helper_slurp.rs @@ -0,0 +1,134 @@ +// Regression: a function ending in a bare call followed by a sibling function +// declaration must not slurp the next function's name as an argument. +// +// Previously, the doc rule was: "non-last function must not end in a bare call; +// wrap the last expression in (...)". The fix detects, at the top-level body +// boundary, when the next tokens form a function-declaration header and +// terminates the current body cleanly. +// +// Discriminator: a real fn decl has `>` before its body's first `;`. Record +// constructions (`Outer a:1 b:2`) and other `Ident Ident :` shapes do not, +// so they continue to parse as statements. + +use std::process::Command; + +fn ilo() -> Command { + Command::new(env!("CARGO_BIN_EXE_ilo")) +} + +fn run(engine: &str, src: &str, entry: &str) -> (bool, String, String) { + let out = ilo() + .args([src, engine, entry]) + .output() + .expect("failed to run ilo"); + ( + out.status.success(), + String::from_utf8_lossy(&out.stdout).trim().to_string(), + String::from_utf8_lossy(&out.stderr).trim().to_string(), + ) +} + +// The doc's failing shape: three functions, the first ends in a bare call. +// Before the fix, `cntval` was slurped as a third argument to `has`. +const DOC_REPRO: &str = "isn nm:t>b;has nm \"and \";cntval s:t>n;5;main>n;cntval \"hello\""; + +fn check_doc_repro(engine: &str) { + let (ok, stdout, stderr) = run(engine, DOC_REPRO, "main"); + assert!(ok, "engine={engine}: expected success, stderr={stderr}"); + assert_eq!(stdout, "5", "engine={engine}"); +} + +#[test] +fn doc_repro_tree() { + check_doc_repro("--run-tree"); +} + +#[test] +fn doc_repro_vm() { + check_doc_repro("--run-vm"); +} + +#[test] +#[cfg(feature = "cranelift")] +fn doc_repro_cranelift() { + check_doc_repro("--run-cranelift"); +} + +// A function ending in a bare call followed by another function declaration +// must work without parenthesising the trailing expression. +const BARE_CALL_THEN_SIBLING: &str = "dbl x:n>n;*x 2;f>n;dbl 21"; + +fn check_bare_call_then_sibling(engine: &str) { + let (ok, stdout, stderr) = run(engine, BARE_CALL_THEN_SIBLING, "f"); + assert!(ok, "engine={engine}: stderr={stderr}"); + assert_eq!(stdout, "42", "engine={engine}"); +} + +#[test] +fn bare_call_then_sibling_tree() { + check_bare_call_then_sibling("--run-tree"); +} + +#[test] +fn bare_call_then_sibling_vm() { + check_bare_call_then_sibling("--run-vm"); +} + +#[test] +#[cfg(feature = "cranelift")] +fn bare_call_then_sibling_cranelift() { + check_bare_call_then_sibling("--run-cranelift"); +} + +// Record construction inside a statement following a `;` must still parse as +// a record, not be mistaken for a fn-decl header. Discriminator: a real fn +// decl always has `>` before the body's first `;`; this record does not. +const RECORD_AFTER_SEMI: &str = "type pt{x:n;y:n} f>n;p=pt x:3 y:4;+p.x p.y"; + +fn check_record_after_semi(engine: &str) { + let (ok, stdout, stderr) = run(engine, RECORD_AFTER_SEMI, "f"); + assert!(ok, "engine={engine}: stderr={stderr}"); + assert_eq!(stdout, "7", "engine={engine}"); +} + +#[test] +fn record_after_semi_tree() { + check_record_after_semi("--run-tree"); +} + +#[test] +fn record_after_semi_vm() { + check_record_after_semi("--run-vm"); +} + +#[test] +#[cfg(feature = "cranelift")] +fn record_after_semi_cranelift() { + check_record_after_semi("--run-cranelift"); +} + +// The parenthesised workaround must continue to work (no regression). +const PAREN_WORKAROUND: &str = + "isn nm:t>b;(has nm \"and \");cntval s:t>n;5;main>n;cntval \"hello\""; + +fn check_paren_workaround(engine: &str) { + let (ok, stdout, stderr) = run(engine, PAREN_WORKAROUND, "main"); + assert!(ok, "engine={engine}: stderr={stderr}"); + assert_eq!(stdout, "5", "engine={engine}"); +} + +#[test] +fn paren_workaround_tree() { + check_paren_workaround("--run-tree"); +} + +#[test] +fn paren_workaround_vm() { + check_paren_workaround("--run-vm"); +} + +#[test] +#[cfg(feature = "cranelift")] +fn paren_workaround_cranelift() { + check_paren_workaround("--run-cranelift"); +}