Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions examples/wh-gt-condition.ilo
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
-- wh >cond{...} mid-body must parse as a while loop, not a fresh fn decl.
-- Reserved statement keywords (wh/ret/brk/cnt) never start a fn header,
-- so the parser's body-boundary heuristic now skips them.

dec s:n>n;v=+s 0;wh >v 0{v=- v 1};+v 0

cnt5 s:n>n;v=+s 0;c=0;wh >v 0{v=- v 1;c=+c 1};+c 0

-- run: dec 5
-- out: 0
-- run: dec 0
-- out: 0
-- run: dec 1
-- out: 0
-- run: cnt5 5
-- out: 5
-- run: cnt5 3
-- out: 3
21 changes: 20 additions & 1 deletion src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -199,8 +199,18 @@ impl Parser {

/// Return true if the tokens at `pos` look like the start of a function declaration:
/// `Ident` followed by `>` (no-param function) OR `Ident Ident :` (has params).
///
/// Reserved statement-keyword identifiers (`wh`/`ret`/`brk`/`cnt`) are never
/// valid function names — `parse_stmt` intercepts them as control-flow forms.
/// Short-circuiting here closes the `wh >cond{...}` mid-body re-parse trap,
/// where the body-boundary heuristic in `parse_body_with` would otherwise
/// treat `wh >v 0{...}` as a fresh fn decl named `wh` returning `v`.
fn is_fn_decl_start(&self, pos: usize) -> bool {
if !matches!(self.token_at(pos), Some(Token::Ident(_))) {
let name = match self.token_at(pos) {
Some(Token::Ident(n)) => n,
_ => return false,
};
if is_reserved_stmt_keyword(name) {
return false;
}
match self.token_at(pos + 1) {
Expand Down Expand Up @@ -2395,6 +2405,15 @@ fn wrap_body_as_let(name: &str, mut body: Vec<Spanned<Stmt>>) -> Vec<Spanned<Stm
body
}

/// Identifier-keywords intercepted by `parse_stmt` as control-flow forms.
/// These names can never legitimately start a function declaration, so the
/// `is_fn_decl_start` heuristic must reject them — otherwise `wh >v 0{...}`
/// gets mis-parsed as a fn decl named `wh` returning `v` (see the gis-analyst
/// and routing-tsp persona reports).
fn is_reserved_stmt_keyword(name: &str) -> bool {
matches!(name, "wh" | "ret" | "brk" | "cnt")
}

/// Map a reserved-keyword token to its `(message, hint)` pair for ILO-P011.
fn reserved_keyword_message(tok: &Token) -> Option<(String, String)> {
let (name, hint) = match tok {
Expand Down
139 changes: 139 additions & 0 deletions tests/regression_wh_gt_trap.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
// Regression: `wh >cond{...}` mid-body must parse as a while loop, not be
// mis-read as a fresh function declaration named `wh` returning some type.
//
// Before the fix, `is_fn_decl_start` accepted any `Ident Greater ...` as a
// zero-param fn header, so `parse_body_with`'s top-level boundary heuristic
// terminated the outer body at `wh` and tried to parse `wh >v 0{...}` as
// `wh` returning `v`. Symptoms: ILO-T008 "return type mismatch" plus a
// cascade of "undefined variable" errors with note `in function 'wh'`.
//
// Reported by gis-analyst and routing-tsp persona reruns against v0.11.1.
//
// Discriminator: reserved statement-keyword identifiers (`wh`/`ret`/`brk`/`cnt`)
// are intercepted by `parse_stmt` as control-flow forms and can never start a
// fn declaration. `is_fn_decl_start` now short-circuits to false for them.

use std::process::Command;

fn ilo() -> Command {
Command::new(env!("CARGO_BIN_EXE_ilo"))
}

fn run(engine: &str, src: &str, entry: &str, arg: &str) -> (bool, String, String) {
let mut cmd = ilo();
cmd.args([src, engine, entry]);
if !arg.is_empty() {
cmd.arg(arg);
}
let out = cmd.output().expect("failed to run ilo");
(
out.status.success(),
String::from_utf8_lossy(&out.stdout).trim().to_string(),
String::from_utf8_lossy(&out.stderr).trim().to_string(),
)
}

// Headline gis-analyst repro: `wh >v 0` after a let binding.
const WH_GT_AFTER_LET: &str = "foo s:n>n;v=+s 0;wh >v 0{v=- v 1};+v 0";

fn check_wh_gt_after_let(engine: &str) {
let (ok, stdout, stderr) = run(engine, WH_GT_AFTER_LET, "foo", "5");
assert!(ok, "engine={engine}: stderr={stderr}");
assert_eq!(stdout, "0", "engine={engine}");
}

#[test]
fn wh_gt_after_let_tree() {
check_wh_gt_after_let("--run-tree");
}

#[test]
fn wh_gt_after_let_vm() {
check_wh_gt_after_let("--run-vm");
}

#[test]
#[cfg(feature = "cranelift")]
fn wh_gt_after_let_cranelift() {
check_wh_gt_after_let("--run-cranelift");
}

// `wh >cond{...}` followed by a sibling function — must not slurp the next
// fn's header. The body-boundary heuristic should still find the real
// boundary at the `;` between the loop's closing `}` and the next fn header.
const WH_GT_THEN_SIBLING: &str = "foo s:n>n;v=+s 0;wh >v 0{v=- v 1};+v 0;main>n;foo 3";

fn check_wh_gt_then_sibling(engine: &str) {
let (ok, stdout, stderr) = run(engine, WH_GT_THEN_SIBLING, "main", "");
assert!(ok, "engine={engine}: stderr={stderr}");
assert_eq!(stdout, "0", "engine={engine}");
}

#[test]
fn wh_gt_then_sibling_tree() {
check_wh_gt_then_sibling("--run-tree");
}

#[test]
fn wh_gt_then_sibling_vm() {
check_wh_gt_then_sibling("--run-vm");
}

#[test]
#[cfg(feature = "cranelift")]
fn wh_gt_then_sibling_cranelift() {
check_wh_gt_then_sibling("--run-cranelift");
}

// `wh >=v 0` (GreaterEq prefix) — same family, must not be misread.
const WH_GE: &str = "foo s:n>n;v=+s 0;wh >=v 1{v=- v 1};+v 0";

fn check_wh_ge(engine: &str) {
let (ok, stdout, stderr) = run(engine, WH_GE, "foo", "5");
assert!(ok, "engine={engine}: stderr={stderr}");
assert_eq!(stdout, "0", "engine={engine}");
}

#[test]
fn wh_ge_tree() {
check_wh_ge("--run-tree");
}

#[test]
fn wh_ge_vm() {
check_wh_ge("--run-vm");
}

#[test]
#[cfg(feature = "cranelift")]
fn wh_ge_cranelift() {
check_wh_ge("--run-cranelift");
}

// Sanity: a legitimate zero-param fn decl still parses fine — the reserved
// shortlist (`wh`/`ret`/`brk`/`cnt`) is the only set that short-circuits.
// Multi-decl program with a zero-param fn first; the body-boundary heuristic
// must still find this boundary.
const ZERO_PARAM_FN_OK: &str = "answer>n;42;dbl x:n>n;*x 2";

fn check_zero_param_fn_ok(engine: &str) {
let (ok, stdout, stderr) = run(engine, ZERO_PARAM_FN_OK, "answer", "");
assert!(ok, "engine={engine}: stderr={stderr}");
assert_eq!(stdout, "42", "engine={engine}");
}

#[test]
fn zero_param_fn_ok_tree() {
check_zero_param_fn_ok("--run-tree");
}

#[test]
fn zero_param_fn_ok_vm() {
check_zero_param_fn_ok("--run-vm");
}

#[test]
#[cfg(feature = "cranelift")]
fn zero_param_fn_ok_cranelift() {
check_zero_param_fn_ok("--run-cranelift");
}
Loading