diff --git a/examples/unknown-flag-equals-form.ilo b/examples/unknown-flag-equals-form.ilo new file mode 100644 index 00000000..7fd0dc8e --- /dev/null +++ b/examples/unknown-flag-equals-form.ilo @@ -0,0 +1,20 @@ +-- The unknown-flag guard also rejects the `--key=value` shape. Before +-- the fix, `--engine=tree` and `--foo=bar` slipped past the guard (the +-- shape check excluded any token containing `=`) and got silently +-- consumed as positional args, surfacing as a misleading ILO-R012 +-- "no functions defined" or ILO-R004 "main: expected N args, got N+1". +-- The guard now splits on `=` and shape-checks the `--key` head, so +-- `--engine=tree` is caught the same way as `--engine tree`. +-- +-- To pass a `--key=value` token as literal data, insert `--` first: +-- ilo main.ilo -- --key=val +-- +-- This file is exercised by the engine harness on the happy path so +-- the guard's no-op behaviour on plain positional args stays under +-- cross-engine regression coverage. The rejection path is covered by +-- tests/regression_unknown_flag_guard.rs. + +main x:n>n;+x 1 + +-- run: main 41 +-- out: 42 diff --git a/src/cli/args.rs b/src/cli/args.rs index fac7ff53..32b5c2dc 100644 --- a/src/cli/args.rs +++ b/src/cli/args.rs @@ -352,11 +352,14 @@ impl Global { /// To pass a hyphen-prefixed token as a literal arg, separate with `--` first: /// `ilo main.ilo -- --foo`. Anything after the first `--` is data. /// -/// Shape match: `^--[a-z][a-z0-9]*(-[a-z0-9]+)*$`. Tokens containing `=`, -/// digits-first, or non-ASCII are NOT flagged as flags — they're data. -/// Short flags (`-x`, `-V`) are NOT flagged here either — clap rejects -/// unknown short flags upfront at parse time; only the long-flag shape -/// slips through the trailing_var_arg sink. +/// Shape match: `^--[a-z][a-z0-9]*(-[a-z0-9]+)*(=.*)?$`. The `--key=value` +/// form is normalised by splitting on the first `=` before the shape check, +/// so `--engine=tree` and `--foo=bar` are rejected the same way as the +/// space-separated forms. Tokens with digits-first or non-ASCII prefixes +/// are NOT treated as flags, they're data. Short flags (`-x`, `-V`) are +/// NOT flagged here either, clap rejects unknown short flags upfront at +/// parse time; only the long-flag shape slips through the trailing_var_arg +/// sink. pub fn reject_unknown_flags(args: &[String]) -> Result<(), String> { reject_unknown_flags_with_allowlist(args, &[]) } @@ -376,7 +379,16 @@ pub fn reject_unknown_flags_with_allowlist( // Separator reached: everything after is data. return Ok(()); } - if looks_like_clean_long_flag(a) && !allowlist.contains(&a.as_str()) { + // Normalise `--key=value` to its `--key` head before the shape check + // so the equals form is rejected the same way as the space form. + // Without this split, `--engine=tree` slips past the guard and gets + // consumed as a positional, surfacing as a misleading ILO-R012/R004 + // downstream. + let head = a.split_once('=').map(|(h, _)| h).unwrap_or(a.as_str()); + if looks_like_clean_long_flag(head) + && !allowlist.contains(&head) + && !allowlist.contains(&a.as_str()) + { return Err(format!( "error: unrecognised flag '{a}'. Use 'ilo --help' for valid flags. To pass it as a literal arg, separate with '--' first." )); @@ -918,12 +930,57 @@ mod tests { } #[test] - fn equals_form_not_treated_as_flag() { - // `--key=val` shape is data, not a clean flag. We err on the side of - // accepting it so users who paste config strings aren't surprised; - // clap's recognised `--key=val` flags are bound by clap before this - // guard runs. + fn equals_form_unknown_flag_rejected() { + // `--key=value` shape used to slip past the guard and get consumed as + // a positional, surfacing as a misleading ILO-R012/R004 downstream. + // The guard now splits on `=` before the shape check so this form is + // caught the same way as the space-separated form. let args = vec!["main.ilo".to_string(), "--foo=bar".to_string()]; + let err = reject_unknown_flags(&args).unwrap_err(); + assert!(err.contains("--foo=bar"), "msg={err}"); + assert!(err.contains("unrecognised flag")); + } + + #[test] + fn equals_form_engine_rejected() { + // Concrete repro from the originating bug report: `--engine=tree` + // slipped past while `--engine tree` was caught. + let args = vec!["main.ilo".to_string(), "--engine=tree".to_string()]; + let err = reject_unknown_flags(&args).unwrap_err(); + assert!(err.contains("--engine=tree"), "msg={err}"); + } + + #[test] + fn equals_form_allowlisted_head_accepted() { + // Allowlist matches against the `--key` head, so callers can + // pre-approve a known flag and its `--key=value` form is accepted. + let args = vec!["main.ilo".to_string(), "--bench=on".to_string()]; + assert!(reject_unknown_flags_with_allowlist(&args, &["--bench"]).is_ok()); + } + + #[test] + fn equals_form_after_dash_dash_accepted() { + // The `--` separator still escapes everything that follows, including + // the equals form. + let args = vec![ + "main.ilo".to_string(), + "--".to_string(), + "--foo=bar".to_string(), + ]; + assert!(reject_unknown_flags(&args).is_ok()); + } + + #[test] + fn equals_form_with_empty_value_rejected() { + // `--foo=` (empty value) is still an unrecognised flag. + let args = vec!["main.ilo".to_string(), "--foo=".to_string()]; + assert!(reject_unknown_flags(&args).is_err()); + } + + #[test] + fn equals_form_with_non_flag_head_accepted() { + // `key=value` (no leading `--`) is data, not a flag. + let args = vec!["main.ilo".to_string(), "key=value".to_string()]; assert!(reject_unknown_flags(&args).is_ok()); } diff --git a/tests/regression_unknown_flag_guard.rs b/tests/regression_unknown_flag_guard.rs index 516835d5..7836d300 100644 --- a/tests/regression_unknown_flag_guard.rs +++ b/tests/regression_unknown_flag_guard.rs @@ -11,11 +11,11 @@ // security-researcher) independently burned minutes on this trap. // // The contract: -// 1. Any clean long-flag shape (`--word` or `--word-with-dashes`) that -// isn't a recognised flag is rejected upfront with a clear -// "unrecognised flag" message and exit 1. +// 1. Any clean long-flag shape (`--word`, `--word-with-dashes`, or the +// `--word=value` equals form) that isn't a recognised flag is rejected +// upfront with a clear "unrecognised flag" message and exit 1. // 2. To pass a hyphen-prefixed token as a literal arg, the user inserts -// `--` first: `ilo main.ilo -- --foo`. +// `--` first: `ilo main.ilo -- --foo` or `ilo main.ilo -- --foo=bar`. // 3. All recognised long flags (`--run-vm`, `--bench`, etc.) still work. // 4. Holds across every engine (default, --run-tree, --run-vm, // --run-cranelift), the bare-positional dispatcher AND the `run` @@ -233,11 +233,55 @@ fn negative_number_arg_not_treated_as_flag() { } #[test] -fn equals_form_data_arg_not_treated_as_flag() { - // `--key=val` shape is data (e.g. a config string); not flagged. - let (_, _, stderr) = run_args(&["f x:t>t;x", "f", "--key=val"]); +fn equals_form_unknown_flag_rejected_bare() { + // `--key=value` used to slip past the guard (the shape check excluded + // tokens containing `=`) and got silently consumed as a positional, + // surfacing as a misleading ILO-R012/R004 downstream. The guard now + // splits on `=` and checks the `--key` head, so this form is rejected + // the same way as the space-separated form. + let p = temp_main("bare_engine_eq"); + let path_str = p.to_str().unwrap(); + assert_unrecognised(run_args(&[path_str, "--engine=tree"]), "--engine=tree"); +} + +#[test] +fn equals_form_unknown_foo_flag_rejected_bare() { + let p = temp_main("bare_foo_eq"); + let path_str = p.to_str().unwrap(); + assert_unrecognised(run_args(&[path_str, "--foo=bar"]), "--foo=bar"); +} + +#[test] +fn equals_form_unknown_flag_rejected_run_subcmd() { + let p = temp_main("run_engine_eq"); + let path_str = p.to_str().unwrap(); + assert_unrecognised( + run_args(&["run", path_str, "--engine=tree"]), + "--engine=tree", + ); +} + +#[test] +fn equals_form_after_dash_dash_passes_through() { + // `--` separator still escapes everything that follows, including the + // equals form. + let (code, stdout, stderr) = run_args(&["f x:t>t;x", "f", "--", "--key=val"]); + assert_eq!( + code, 0, + "expected exit 0 with `--` separator on equals form; stdout=\n{stdout}\nstderr=\n{stderr}" + ); + assert!( + !stderr.contains("unrecognised flag"), + "should not error on equals form after `--`; stderr={stderr}" + ); +} + +#[test] +fn non_flag_equals_data_passes_through() { + // `key=value` (no leading `--`) is data, not a flag. Must pass through. + let (_, _, stderr) = run_args(&["f x:t>t;x", "f", "key=value"]); assert!( !stderr.contains("unrecognised flag"), - "--key=val shape should pass through as data; stderr={stderr}" + "`key=value` shape should pass through as data; stderr={stderr}" ); }