Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 27 additions & 0 deletions examples/kebab-vs-subtract.ilo
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
-- Kebab-case identifiers vs binary subtraction.
-- A dash inside a name is part of the identifier; the lexer is greedy
-- (`[a-z][a-z0-9]*(-[a-z0-9]+)*`), so `best-d` is always one token, never
-- `best - d`. To subtract, separate the operator with spaces.

-- Binary subtraction: prefix `-` followed by two atoms, spaces required.
sub-explicit best:n d:n>n;- best d

-- Kebab-case lookup: `best-d` is a single identifier, distinct from `best` and `d`.
look best:n d:n best-d:n>n;best-d

-- All three coexist: same scope binds `best`, `d`, and `best-d`. Each
-- resolves independently. `str best-d` prints the kebab value (99),
-- proving the parser never splits the ident into `best - d`.
mix>L t;best=10;d=3;best-d=99;[str best-d, str best, str d]

-- Multi-segment kebab works the same way.
chain>n;a-b-c=42;a-b-c

-- run: sub-explicit 10 3
-- out: 7
-- run: look 10 3 99
-- out: 99
-- run: mix
-- out: [99, 10, 3]
-- run: chain
-- out: 42
47 changes: 45 additions & 2 deletions src/verify.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,6 +212,43 @@ fn compatible(a: &Ty, b: &Ty) -> bool {
}
}

/// Diagnostic-layer hint for an undefined kebab-case identifier whose halves
/// are themselves bound in scope. Misreading `best-d` (single identifier) as
/// `best - d` (subtraction) is a recurring persona footgun; the lexer always
/// keeps `best-d` atomic, so when both `best` and `d` resolve as values the
/// most useful nudge is to spell that out and show the explicit subtraction
/// form. Returns `None` if the name is not kebab-case or if any part is not
/// in scope as a variable / function / builtin.
fn kebab_subtract_hint<'a>(
name: &str,
candidates: impl Iterator<Item = &'a String> + Clone,
) -> Option<String> {
if !name.contains('-') {
return None;
}
let parts: Vec<&str> = name.split('-').collect();
if parts.len() < 2 || parts.iter().any(|p| p.is_empty()) {
return None;
}
let all_resolved = parts.iter().all(|p| {
candidates.clone().any(|c| c == p) || is_builtin(p) || builtin_as_fn_ty(p).is_some()
});
if !all_resolved {
return None;
}
if parts.len() == 2 {
Some(format!(
"'{name}' is a single identifier (kebab-case); for subtraction write '- {a} {b}'",
a = parts[0],
b = parts[1],
))
} else {
Some(format!(
"'{name}' is a single identifier (kebab-case); '-' inside an identifier never means subtraction"
))
}
}

fn closest_match<'a>(name: &str, candidates: impl Iterator<Item = &'a String>) -> Option<String> {
let mut best: Option<(String, usize)> = None;
for candidate in candidates {
Expand Down Expand Up @@ -2702,8 +2739,14 @@ impl VerifyContext {
.flat_map(|frame| frame.keys().cloned())
.collect();
candidates.extend(self.functions.keys().cloned());
let hint = closest_match(name, candidates.iter())
.map(|s| format!("did you mean '{s}'?"));
// Prefer the kebab-case clarification when every dash-separated
// half resolves as a value: that's the high-signal case where
// the model is liable to misread the atomic ident as a binop.
// Fall back to the standard closest-match suggestion otherwise.
let hint = kebab_subtract_hint(name, candidates.iter()).or_else(|| {
closest_match(name, candidates.iter())
.map(|s| format!("did you mean '{s}'?"))
});
self.err(
"ILO-T004",
func,
Expand Down
171 changes: 171 additions & 0 deletions tests/regression_kebab_precedence.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
// Regression tests pinning kebab-case identifier behaviour and the
// diagnostic-layer hint that fires when an undefined kebab-case ident's
// halves are both bound in scope.
//
// The lexer rule `[a-z][a-z0-9]*(-[a-z0-9]+)*` (logos, priority 1) makes
// kebab-case atomic: `best-d` is always one `Ident` token, never `best`
// `-` `d`. These tests lock that guarantee in across tree/VM/Cranelift
// engines so a future lexer change cannot silently re-introduce the
// persona-reported confusion. The diagnostic test then covers the
// secondary issue: when both halves resolve but the kebab ident does
// not, the error should explicitly tell the model the ident is atomic.

use std::process::Command;

fn ilo() -> Command {
Command::new(env!("CARGO_BIN_EXE_ilo"))
}

fn run_ok(engine: &str, src: &str, func_argv: &[&str]) -> String {
let mut cmd = ilo();
cmd.arg(src).arg(engine);
for a in func_argv {
cmd.arg(a);
}
let out = cmd.output().expect("failed to run ilo");
assert!(
out.status.success(),
"ilo {engine} failed for `{src}` argv={func_argv:?}: stderr={}",
String::from_utf8_lossy(&out.stderr)
);
String::from_utf8_lossy(&out.stdout).trim().to_string()
}

fn check_all(src: &str, func: &str, expected: &str) {
// Split `func` on whitespace so multi-arg invocations like "f 10 3"
// are passed as separate argv entries (not a single quoted string).
let argv: Vec<&str> = func.split_whitespace().collect();
for engine in ["--run-tree", "--run-vm"] {
let actual = run_ok(engine, src, &argv);
assert_eq!(
actual, expected,
"engine={engine} src=`{src}` func=`{func}`"
);
}
#[cfg(feature = "cranelift")]
{
let engine = "--run-cranelift";
let actual = run_ok(engine, src, &argv);
assert_eq!(
actual, expected,
"engine={engine} src=`{src}` func=`{func}`"
);
}
}

// ---- Lexer / parser guarantee: kebab-case is always one identifier ----

#[test]
fn kebab_ident_in_str_call_arg() {
// The persona-reported case: `str best-d` must look up the kebab-case
// ident, not compute `str(best) - d`. If the parser ever split the
// ident this would either error (type mismatch) or print 7.
check_all("f>t;best=10;d=3;best-d=99;str best-d", "f", "99");
}

#[test]
fn kebab_ident_distinct_from_halves_in_list() {
// All three idents coexist; each resolves independently. Catches any
// scope leakage where `best-d` would alias `best` or `d`.
check_all(
"f>L t;best=10;d=3;best-d=99;[str best-d, str best, str d]",
"f",
"[99, 10, 3]",
);
}

#[test]
fn explicit_subtraction_still_works() {
// The escape hatch the diagnostic recommends: `- best d` with spaces.
check_all("f best:n d:n>n;- best d", "f 10 3", "7");
}

#[test]
fn multi_segment_kebab_ident() {
// `a-b-c` is one ident even with three segments. Locks the regex's
// `(-[a-z0-9]+)*` repetition.
check_all("f>n;a-b-c=42;a-b-c", "f", "42");
}

#[test]
fn kebab_with_digit_segment() {
// The regex permits digits in segments after the first letter; this
// pins that case so a tightened regex doesn't break real-world names
// like `v2-config` or `item-1`.
check_all("f>n;v2-config=7;v2-config", "f", "7");
}

// ---- Diagnostic-layer: helpful hint when halves are bound but kebab is not ----

fn run_err(src: &str, func: &str) -> String {
// Bare `ilo "<src>"` dumps AST and exits 0; verify only runs when a
// function is named. Pass a func so the verifier path executes.
let out = ilo().args([src, func]).output().expect("failed to run ilo");
assert!(
!out.status.success(),
"expected failure for `{src}` func=`{func}`, stdout={} stderr={}",
String::from_utf8_lossy(&out.stdout),
String::from_utf8_lossy(&out.stderr)
);
String::from_utf8_lossy(&out.stderr).into_owned()
}

#[test]
fn diagnostic_hint_when_kebab_unbound_but_halves_bound() {
// Originating persona report: `str best-d` errors as
// "undefined variable 'best-d'" with the default suggestion
// "did you mean 'best'?" — which reads like the parser split the
// ident. The new hint should instead tell the reader the ident is
// atomic and show the explicit subtraction form.
let err = run_err("f>t;best=10;d=3;str best-d", "f");
assert!(err.contains("ILO-T004"), "stderr: {err}");
assert!(err.contains("undefined variable 'best-d'"), "stderr: {err}");
assert!(
err.contains("single identifier"),
"expected kebab clarification, stderr: {err}"
);
assert!(
err.contains("- best d"),
"expected explicit subtraction form, stderr: {err}"
);
// The misleading default "did you mean 'best'?" must not appear.
assert!(
!err.contains("did you mean 'best'?"),
"old suggestion leaked: {err}"
);
}

#[test]
fn diagnostic_hint_multi_segment_no_subtract_form() {
// For 3+ segments there's no single binary-subtract spelling to
// recommend, so the hint just clarifies atomicity without a form.
let err = run_err("f>n;a=1;b=2;c=3;+a-b-c 0", "f");
assert!(err.contains("ILO-T004"), "stderr: {err}");
assert!(
err.contains("single identifier"),
"expected kebab clarification, stderr: {err}"
);
// No 2-arg subtract suggestion for 3-segment names.
assert!(
!err.contains("'- a b'"),
"should not suggest 2-arg subtract for 3-segment name: {err}"
);
}

#[test]
fn diagnostic_falls_back_to_closest_match_when_half_unbound() {
// If only one half resolves, the kebab-confusion theory doesn't
// apply — fall back to the standard closest-match suggestion. Locks
// that the new hint is targeted, not a blanket override.
let err = run_err("f>t;best=10;str best-d", "f");
assert!(err.contains("ILO-T004"), "stderr: {err}");
assert!(
!err.contains("single identifier"),
"kebab hint should not fire when 'd' is unbound: {err}"
);
// Standard suggestion still appears.
assert!(
err.contains("did you mean"),
"expected closest-match fallback, stderr: {err}"
);
}
Loading