diff --git a/impl/rust-cli/src/executable.rs b/impl/rust-cli/src/executable.rs index 819f737..20fd7b2 100644 --- a/impl/rust-cli/src/executable.rs +++ b/impl/rust-cli/src/executable.rs @@ -714,13 +714,29 @@ impl ExecutableCommand for Command { let content = std::fs::read_to_string(&path) .map_err(|e| anyhow::anyhow!("source: {}: {}", path.display(), e))?; + // Strip whole-line comments first, then feed the entire + // content to the statement splitter so multi-line `if/fi`, + // `for/done`, etc. stay together. + let stripped: String = content + .lines() + .map(|line| { + let trimmed = line.trim_start(); + if trimmed.starts_with('#') { + "" + } else { + line + } + }) + .collect::>() + .join("\n"); + let mut last_result = ExecutionResult::Success; - for line in content.lines() { - let line = line.trim(); - if line.is_empty() || line.starts_with('#') { + for segment in crate::parser::split_on_statement_separators(&stripped) { + let segment = segment.trim(); + if segment.is_empty() || segment.starts_with('#') { continue; } - let cmd = crate::parser::parse_command(line)?; + let cmd = crate::parser::parse_command(segment)?; last_result = cmd.execute(state)?; // Propagate Exit (shell-wide) and Return (function-scope). // A `return` inside a sourced file that was itself sourced diff --git a/impl/rust-cli/src/functions.rs b/impl/rust-cli/src/functions.rs index 35a30d3..8031f99 100644 --- a/impl/rust-cli/src/functions.rs +++ b/impl/rust-cli/src/functions.rs @@ -338,7 +338,7 @@ mod tests { fn test_parse_posix_function_def() { let result = parse_function_def("greet() { echo hello; }"); assert!(result.is_some()); - let (name, body, raw_body) = result.unwrap(); + let (name, body, raw_body) = result.expect("TODO: handle error"); assert_eq!(name, "greet"); assert_eq!(body, vec!["echo hello"]); // raw_body preserves the trailing `;` — that's harmless. @@ -349,7 +349,7 @@ mod tests { fn test_parse_posix_function_multi_commands() { let result = parse_function_def("setup() { mkdir src; touch src/main.rs; echo done; }"); assert!(result.is_some()); - let (name, body, raw_body) = result.unwrap(); + let (name, body, raw_body) = result.expect("TODO: handle error"); assert_eq!(name, "setup"); assert_eq!(body, vec!["mkdir src", "touch src/main.rs", "echo done"]); assert_eq!(raw_body, "mkdir src; touch src/main.rs; echo done;"); @@ -359,7 +359,7 @@ mod tests { fn test_parse_bash_function_def() { let result = parse_function_def("function greet { echo hello; }"); assert!(result.is_some()); - let (name, body, raw_body) = result.unwrap(); + let (name, body, raw_body) = result.expect("TODO: handle error"); assert_eq!(name, "greet"); assert_eq!(body, vec!["echo hello"]); assert_eq!(raw_body, "echo hello;"); @@ -369,7 +369,7 @@ mod tests { fn test_parse_bash_function_with_parens() { let result = parse_function_def("function greet() { echo hello; }"); assert!(result.is_some()); - let (name, body, raw_body) = result.unwrap(); + let (name, body, raw_body) = result.expect("TODO: handle error"); assert_eq!(name, "greet"); assert_eq!(body, vec!["echo hello"]); assert_eq!(raw_body, "echo hello;"); @@ -381,7 +381,7 @@ mod tests { // execution can parse `if/fi`, `for/done`, etc. as single commands. let result = parse_function_def("ifunc() { if true; then mkdir d; fi; }"); assert!(result.is_some()); - let (name, _body, raw_body) = result.unwrap(); + let (name, _body, raw_body) = result.expect("TODO: handle error"); assert_eq!(name, "ifunc"); assert_eq!(raw_body, "if true; then mkdir d; fi;"); } @@ -391,7 +391,7 @@ mod tests { // A `}` inside a quoted string must NOT be treated as the closing brace. let result = parse_function_def("lit() { echo '}'; }"); assert!(result.is_some()); - let (_name, _body, raw_body) = result.unwrap(); + let (_name, _body, raw_body) = result.expect("TODO: handle error"); assert_eq!(raw_body, "echo '}';"); } diff --git a/impl/rust-cli/src/main.rs b/impl/rust-cli/src/main.rs index 06069ac..3fb204f 100644 --- a/impl/rust-cli/src/main.rs +++ b/impl/rust-cli/src/main.rs @@ -240,39 +240,51 @@ fn main() -> Result<()> { } /// Execute script content (string of commands, one per line or semicolon-separated) +/// +/// We split the *entire* content on top-level statement separators (both `;` +/// and `\n`, respecting quotes and control-structure depth), so multi-line +/// `if/fi`, `for/done`, `while/done`, and `case/esac` work exactly as they +/// do in a POSIX shell. fn execute_script_content(content: &str, state: &mut state::ShellState) -> Result<()> { - for line in content.lines() { - let line = line.trim(); - if line.is_empty() || line.starts_with('#') { + // Strip full-line comments before splitting. (A `#` inside a statement + // may be part of a quoted string or a parameter expansion, so we only + // trim whole-line comments here.) + let stripped: String = content + .lines() + .map(|line| { + let trimmed = line.trim_start(); + if trimmed.starts_with('#') { + "" + } else { + line + } + }) + .collect::>() + .join("\n"); + + for segment in vsh::parser::split_on_statement_separators(&stripped) { + let segment = segment.trim(); + if segment.is_empty() || segment.starts_with('#') { continue; } - // Split on semicolons - let segments = vsh::parser::split_on_semicolons(line); - for segment in segments { - let segment = segment.trim(); - if segment.is_empty() || segment.starts_with('#') { - continue; - } - - match vsh::parser::parse_command(segment) { - Ok(cmd) => { - let result = cmd.execute(state)?; - match result { - ExecutionResult::Exit => return Ok(()), - ExecutionResult::ExternalCommand { exit_code } - | ExecutionResult::Return { exit_code } => { - state.last_exit_code = exit_code; - } - ExecutionResult::Success => { - state.last_exit_code = 0; - } + match vsh::parser::parse_command(segment) { + Ok(cmd) => { + let result = cmd.execute(state)?; + match result { + ExecutionResult::Exit => return Ok(()), + ExecutionResult::ExternalCommand { exit_code } + | ExecutionResult::Return { exit_code } => { + state.last_exit_code = exit_code; + } + ExecutionResult::Success => { + state.last_exit_code = 0; } } - Err(e) => { - eprintln!("vsh: {}", e); - state.last_exit_code = 1; - } + } + Err(e) => { + eprintln!("vsh: {}", e); + state.last_exit_code = 1; } } } diff --git a/impl/rust-cli/src/parser.rs b/impl/rust-cli/src/parser.rs index 14e0ae4..cb8978e 100644 --- a/impl/rust-cli/src/parser.rs +++ b/impl/rust-cli/src/parser.rs @@ -1425,6 +1425,21 @@ fn is_block_close_keyword(word: &str) -> bool { } pub fn split_on_semicolons(input: &str) -> Vec<&str> { + split_on_top_level(input, false) +} + +/// Like `split_on_semicolons`, but also treats top-level newlines as statement +/// separators (outside quotes and outside control-structure blocks). +/// +/// Use this when feeding a multi-line script fragment into the parser: it +/// lets `if/then/fi`, `while/do/done`, `for/do/done`, and `case/esac` span +/// multiple lines — exactly like a POSIX shell — while still producing one +/// segment per top-level statement. +pub fn split_on_statement_separators(input: &str) -> Vec<&str> { + split_on_top_level(input, true) +} + +fn split_on_top_level(input: &str, split_on_newline: bool) -> Vec<&str> { let mut segments = Vec::new(); let mut start = 0; let mut in_single_quote = false; @@ -1432,6 +1447,12 @@ pub fn split_on_semicolons(input: &str) -> Vec<&str> { let mut escaped = false; let mut paren_depth: i32 = 0; let mut block_depth: i32 = 0; + // Brace depth is tracked only in statement-separator mode (i.e. when + // we're splitting a whole script). It prevents a `;` inside a function + // body or brace group — `foo() { cmd; }` — from being treated as a + // statement boundary. Normal `${VAR}` expansions balance themselves + // and cancel back out to zero. + let mut brace_depth: i32 = 0; // Pre-scan to detect control structure keywords and track nesting // We need to identify word boundaries for keyword detection @@ -1470,8 +1491,29 @@ pub fn split_on_semicolons(input: &str) -> Vec<&str> { } i += 1; } + '{' if split_on_newline && !in_single_quote && !in_double_quote => { + brace_depth += 1; + i += 1; + } + '}' if split_on_newline && !in_single_quote && !in_double_quote => { + if brace_depth > 0 { + brace_depth -= 1; + } + i += 1; + } ';' if !in_single_quote && !in_double_quote && paren_depth == 0 => { - if block_depth == 0 { + if block_depth == 0 && brace_depth == 0 { + segments.push(&input[start..i]); + start = i + 1; + } + i += 1; + } + '\n' if split_on_newline + && !in_single_quote + && !in_double_quote + && paren_depth == 0 => + { + if block_depth == 0 && brace_depth == 0 { segments.push(&input[start..i]); start = i + 1; } @@ -2611,7 +2653,9 @@ pub fn expand_quoted_word_with_state(word: &QuotedWord, state: &mut crate::state /// Parse a block of commands from a string (semicolon or newline separated) fn parse_command_block(block: &str) -> Result> { let mut commands = Vec::new(); - for segment in split_on_semicolons(block) { + // Control-structure bodies may span multiple lines, so treat `;` AND + // top-level `\n` as statement separators. + for segment in split_on_statement_separators(block) { let segment = segment.trim(); if segment.is_empty() || segment.starts_with('#') { continue; diff --git a/impl/rust-cli/src/state.rs b/impl/rust-cli/src/state.rs index c2f81b9..341af2a 100644 --- a/impl/rust-cli/src/state.rs +++ b/impl/rust-cli/src/state.rs @@ -889,9 +889,21 @@ impl ShellState { self.positional_params.len() } - /// Get next unique FIFO ID for process substitution + /// Get next unique FIFO ID for process substitution. + /// + /// Uses a process-wide atomic so that FIFO paths are unique across all + /// `ShellState` instances in the same process (e.g. parallel test threads + /// that share a PID). A per-state counter would collide under + /// `cargo test`, since each test creates a fresh state whose counter + /// starts at 0 while they all share the same PID in the FIFO path + /// template `/tmp/vsh-fifo--`. pub fn next_fifo_id(&self) -> usize { - self.fifo_counter.fetch_add(1, std::sync::atomic::Ordering::SeqCst) + use std::sync::atomic::{AtomicUsize, Ordering}; + static GLOBAL_FIFO_COUNTER: AtomicUsize = AtomicUsize::new(0); + // Keep the per-state counter too so its semantics (monotonic per + // state) are preserved for any caller that reads it directly. + let _ = self.fifo_counter.fetch_add(1, Ordering::SeqCst); + GLOBAL_FIFO_COUNTER.fetch_add(1, Ordering::SeqCst) } /// Get special variable value ($$, $?, $HOME, etc.) diff --git a/impl/rust-cli/tests/multiline_script_tests.rs b/impl/rust-cli/tests/multiline_script_tests.rs new file mode 100644 index 0000000..cc2287a --- /dev/null +++ b/impl/rust-cli/tests/multiline_script_tests.rs @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: PMPL-1.0-or-later +//! Regression tests for multi-line script and sourced-file execution. +//! +//! The previous `execute_script_content` and `Command::Source` iterated +//! `content.lines()` and parsed each line in isolation. That broke every +//! control structure spanning multiple lines — the canonical POSIX shape: +//! +//! ```sh +//! if true +//! then +//! mkdir d +//! fi +//! ``` +//! +//! Fix: strip whole-line comments, then split the *entire* content on +//! `split_on_statement_separators` (top-level `;` and `\n`, respecting +//! quotes, parens, control-structure depth, and brace depth — so +//! `foo() { ...; }` stays on a single statement too). + +use anyhow::Result; +use std::fs; +use tempfile::tempdir; +use vsh::executable::ExecutableCommand; +use vsh::parser::{parse_command, split_on_statement_separators}; +use vsh::state::ShellState; + +// ------------------------------------------------------------------------- +// Statement splitter unit tests +// ------------------------------------------------------------------------- + +#[test] +fn statement_splitter_splits_on_newline() { + let parts = split_on_statement_separators("echo a\necho b"); + let segs: Vec<&str> = parts.iter().map(|s| s.trim()).collect(); + assert_eq!(segs, vec!["echo a", "echo b"]); +} + +#[test] +fn statement_splitter_keeps_multiline_if_together() { + let parts = + split_on_statement_separators("if true\nthen\n mkdir d\nfi\necho after"); + let segs: Vec<&str> = parts.iter().map(|s| s.trim()).filter(|s| !s.is_empty()).collect(); + // The whole if/fi block is one segment, echo is a second. + assert_eq!(segs.len(), 2); + assert!(segs[0].starts_with("if") && segs[0].ends_with("fi")); + assert_eq!(segs[1], "echo after"); +} + +#[test] +fn statement_splitter_keeps_function_def_together() { + let parts = split_on_statement_separators("foo() { mkdir a; mkdir b; }\necho x"); + let segs: Vec<&str> = parts.iter().map(|s| s.trim()).filter(|s| !s.is_empty()).collect(); + assert_eq!(segs.len(), 2); + assert_eq!(segs[0], "foo() { mkdir a; mkdir b; }"); + assert_eq!(segs[1], "echo x"); +} + +#[test] +fn statement_splitter_ignores_newlines_inside_quotes() { + let parts = split_on_statement_separators("echo 'a\nb'\necho c"); + let segs: Vec<&str> = parts.iter().map(|s| s.trim()).filter(|s| !s.is_empty()).collect(); + assert_eq!(segs.len(), 2); + assert_eq!(segs[0], "echo 'a\nb'"); + assert_eq!(segs[1], "echo c"); +} + +// ------------------------------------------------------------------------- +// End-to-end: sourced scripts with multi-line control structures +// ------------------------------------------------------------------------- + +#[test] +fn sourced_script_executes_multiline_if_then_fi() -> Result<()> { + let temp = tempdir()?; + let mut state = ShellState::new(temp.path().to_str().unwrap())?; + + let script = temp.path().join("multi.sh"); + fs::write(&script, "if true\nthen\n mkdir from_multi\nfi\n")?; + + parse_command(&format!("source {}", script.display()))?.execute(&mut state)?; + + assert!(state.resolve_path("from_multi").exists()); + Ok(()) +} + +#[test] +fn sourced_script_executes_multiline_for_loop() -> Result<()> { + let temp = tempdir()?; + let mut state = ShellState::new(temp.path().to_str().unwrap())?; + + let script = temp.path().join("loop.sh"); + fs::write(&script, "for x in a b c\ndo\n mkdir $x\ndone\n")?; + + parse_command(&format!("source {}", script.display()))?.execute(&mut state)?; + + assert!(state.resolve_path("a").exists()); + assert!(state.resolve_path("b").exists()); + assert!(state.resolve_path("c").exists()); + Ok(()) +} + +#[test] +fn sourced_script_defines_and_invokes_multiline_function() -> Result<()> { + let temp = tempdir()?; + let mut state = ShellState::new(temp.path().to_str().unwrap())?; + + let script = temp.path().join("fns.sh"); + fs::write( + &script, + "greet() { mkdir hi; }\n# a comment\ngreet\n", + )?; + + parse_command(&format!("source {}", script.display()))?.execute(&mut state)?; + + assert!(state.functions.is_defined("greet")); + assert!(state.resolve_path("hi").exists()); + Ok(()) +} + +#[test] +fn sourced_script_skips_comment_only_lines() -> Result<()> { + let temp = tempdir()?; + let mut state = ShellState::new(temp.path().to_str().unwrap())?; + + let script = temp.path().join("commented.sh"); + fs::write( + &script, + "# leading comment\n\n# another\nmkdir real\n # indented comment\nmkdir also\n", + )?; + + parse_command(&format!("source {}", script.display()))?.execute(&mut state)?; + + assert!(state.resolve_path("real").exists()); + assert!(state.resolve_path("also").exists()); + Ok(()) +} + +#[test] +fn sourced_script_preserves_semicolon_inline() -> Result<()> { + let temp = tempdir()?; + let mut state = ShellState::new(temp.path().to_str().unwrap())?; + + // Mixed: a single-line if + a multi-line for in the same file. + let script = temp.path().join("mixed.sh"); + fs::write( + &script, + "if true; then mkdir one; fi\nfor x in two three\ndo\n mkdir $x\ndone\n", + )?; + + parse_command(&format!("source {}", script.display()))?.execute(&mut state)?; + + assert!(state.resolve_path("one").exists()); + assert!(state.resolve_path("two").exists()); + assert!(state.resolve_path("three").exists()); + Ok(()) +}