Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
55 changes: 55 additions & 0 deletions examples/multiline-bodies.ilo
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
-- Multi-line function bodies — readable indented form.
--
-- ilo's spec says newlines are non-semantic: an entire program can be one
-- line, but a multi-line file with indented continuations must work too.
-- These shapes all used to fail with ILO-P009 ("expected expression, got
-- Semi" / "got PipeOp") because `normalize_newlines` injected a `;` inside
-- brackets or before a continuation pipe. Now they all run.

-- Multi-line list literal.
nums>L n
xs=[
1,
2,
3
]
xs

-- Multi-line list literal with leading commas (common when copy-pasting
-- columns of values).
items>L n
xs=[1
,2
,3]
xs

-- Multi-line paren-grouped expression.
gp x:n>n
y=(+x
1)
y

-- Pipe chain across multiple lines — `>>` on a continuation line is never
-- a statement start, so the `;` injection is suppressed.
pipe x:n>n
x
>>str
>>len

-- Indented multi-statement body (already worked, kept as a regression
-- baseline).
tot p:n q:n>n
s=*p q
t=*s q
+s t

-- run: nums
-- out: [1, 2, 3]
-- run: items
-- out: [1, 2, 3]
-- run: gp 5
-- out: 6
-- run: pipe 42
-- out: 2
-- run: tot 3 4
-- out: 60
58 changes: 52 additions & 6 deletions src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,14 @@ pub enum Token {
/// - `\n` followed by whitespace (indented continuation) → `;`
/// - `\n` at column 0 (new declaration) → kept as `\n`
/// - `;` immediately after `{` or before `}` → removed
/// - Inside `(...)` or `[...]` (list literal, paren-group, fn-call arg list),
/// `\n` is treated as whitespace: no `;` is emitted, so multi-line list and
/// paren expressions parse correctly. String literals are walked through so
/// `(`/`[` inside text don't affect depth.
/// - Continuation lines starting with `>>` (pipe operator) suppress the `;`
/// so `xs\n >>map{...}` chains correctly. `>>` is never a valid statement
/// start, so this is unambiguous. Other operators (`+`, `-`, `*`, ...) are
/// valid prefix-call statement heads and are NOT special-cased.
pub fn normalize_newlines(source: &str) -> String {
if !source.contains('\n') {
return source.to_string();
Expand All @@ -185,13 +193,17 @@ pub fn normalize_newlines(source: &str) -> String {
let mut chars = source.chars().peekable();
// Track the last non-whitespace char pushed to `out` to avoid O(n) trim_end scans.
let mut last_significant: Option<char> = None;
// Depth of open `(` and `[` we're currently inside. `{` is tracked
// separately by `last_significant` (existing precedent).
let mut bracket_depth: u32 = 0;

while let Some(c) = chars.next() {
if c == '"' {
// Pass through string literal content verbatim so `--` inside a
// string isn't mistaken for a comment, and so `\n` (if ever present
// inside a string) isn't rewritten to `;`. Mirrors logos's string
// regex: closing quote terminates unless escaped.
// string isn't mistaken for a comment, `\n` (if ever present
// inside a string) isn't rewritten to `;`, and `(`/`[` inside
// text don't bump bracket depth. Mirrors logos's string regex:
// closing quote terminates unless escaped.
out.push(c);
last_significant = Some(c);
while let Some(sc) = chars.next() {
Expand Down Expand Up @@ -223,14 +235,41 @@ pub fn normalize_newlines(source: &str) -> String {
// surrounding `\n` handling on the next loop iteration emits the
// appropriate `;` or newline based on the line that follows.
} else if c == '\n' {
// Inside `(...)` or `[...]`, treat newlines as whitespace —
// don't emit `;` or `\n`, but emit a single space so tokens on
// adjacent lines don't get glued together (e.g. `(+x\n 1)`
// must not become `(+x1)`). Then skip indent on the next line.
if bracket_depth > 0 {
out.push(' ');
while matches!(chars.peek(), Some(' ') | Some('\t')) {
chars.next();
}
continue;
}
// Check if next line is indented (starts with space or tab)
if matches!(chars.peek(), Some(' ') | Some('\t')) {
// Peek past indent at the first real char on the next line
// so we can decide whether to emit a `;` before it.
let mut lookahead = chars.clone();
while matches!(lookahead.peek(), Some(' ') | Some('\t')) {
lookahead.next();
}
// `>>` (pipe operator) at the start of a continuation line is
// never a statement start — it must be chaining the previous
// line's expression. Suppress the `;` so the chain parses.
// Other operators (`+`/`-`/`*`) are valid prefix-call
// statement starts and must NOT trigger this.
let next_is_pipe = {
let mut probe = lookahead.clone();
probe.next() == Some('>') && probe.next() == Some('>')
};
// Indented continuation → emit `;` and skip the whitespace
// But first check if the last non-whitespace char was `{` — if so, skip the `;`
// Also skip if `out` already ends in `;` (e.g. previous line
// was a comment that produced no significant output).
if last_significant == Some('{') || out.ends_with(';') {
// Don't emit `;` after `{` or an existing `;`, just skip whitespace
// was a comment that produced no significant output), or if
// the continuation begins with `>>` (pipe chain).
if last_significant == Some('{') || out.ends_with(';') || next_is_pipe {
// Don't emit `;`
} else {
out.push(';');
}
Expand All @@ -254,6 +293,13 @@ pub fn normalize_newlines(source: &str) -> String {
if !c.is_ascii_whitespace() {
last_significant = Some(c);
}
match c {
'(' | '[' => bracket_depth += 1,
')' | ']' => {
bracket_depth = bracket_depth.saturating_sub(1);
}
_ => {}
}
}
}

Expand Down
68 changes: 64 additions & 4 deletions tests/regression_multiline_fn_body.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,10 +33,16 @@ fn run_file(engine: &str, src: &str, entry: &str) -> String {
seq
));
std::fs::write(&path, src).unwrap();
let out = ilo()
.args([path.to_str().unwrap(), engine, entry])
.output()
.expect("failed to run ilo");
// `entry` may be a bare function name (`f`) or a function name plus
// whitespace-separated CLI args (`gp 5`). Split on whitespace so the
// CLI receives each token as its own argv slot — matches how the
// `examples_engines` harness invokes things.
let mut cmd = ilo();
cmd.arg(path.to_str().unwrap()).arg(engine);
for arg in entry.split_whitespace() {
cmd.arg(arg);
}
let out = cmd.output().expect("failed to run ilo");
assert!(
out.status.success(),
"ilo {engine} failed for `{src}`: stderr={}",
Expand All @@ -60,6 +66,30 @@ const SL_SIMPLE: &str = "f>n;5\n";
// Single-line baseline, multi-token return type.
const SL_RESULT: &str = "f>R t t;~\"hello\"\n";

// Multi-line list literal — items spread across lines. Previously
// `normalize_newlines` injected a `;` after `[` and between items, producing
// ILO-P009 "expected expression, got Semi". `[`/`]` now suppress newlines.
const ML_LIST_LITERAL: &str = "nums>L n\n xs=[\n 1,\n 2,\n 3\n ]\n xs\n";
// Multi-line list literal with leading commas (common when paginating
// long literal columns).
const ML_LIST_LEADING_COMMA: &str = "nums>L n\n xs=[1\n ,2\n ,3]\n xs\n";
// Multi-line paren-grouped expression. `(`/`)` now suppress newlines and
// emit a space so adjacent-line tokens don't glue together (`(+x\n 1)`
// must not normalise to `(+x1)`).
const ML_PAREN: &str = "gp x:n>n\n y=(+x\n 1)\n y\n";
// Pipe chain across continuation lines. `>>` is never a statement start,
// so the `;` is suppressed when a continuation line begins with `>>`.
const ML_PIPE: &str = "pipe x:n>n\n x\n >>str\n >>len\n";
// Nested `(...)` inside `[...]` exercises both depth counters in the same
// source. The leading-comma layout is common when columns of expressions
// are spread across lines.
const ML_NESTED_BRACKETS: &str = "nest>L n\n xs=[(+1 2)\n ,(+3 4)\n ,(+5 6)]\n xs\n";
// Multi-line `>>` pipe chain inside a `{...}` loop body. The pipe
// suppression must coexist with the `last_significant == '{'` rule for
// the line right after `{`.
const ML_PIPE_IN_BLOCK: &str =
"agg xs:L n>n\n s=0\n @x xs{\n v=x\n >>str\n >>len\n s=+s v\n }\n s\n";

fn check_all(engine: &str) {
assert_eq!(
run_file(engine, ML_RESULT, "f"),
Expand Down Expand Up @@ -91,6 +121,36 @@ fn check_all(engine: &str) {
"~hello",
"single-line R t t engine={engine}"
);
assert_eq!(
run_file(engine, ML_LIST_LITERAL, "nums"),
"[1, 2, 3]",
"multi-line list literal engine={engine}"
);
assert_eq!(
run_file(engine, ML_LIST_LEADING_COMMA, "nums"),
"[1, 2, 3]",
"multi-line list literal leading-comma engine={engine}"
);
assert_eq!(
run_file(engine, ML_PAREN, "gp 5"),
"6",
"multi-line paren expression engine={engine}"
);
assert_eq!(
run_file(engine, ML_PIPE, "pipe 42"),
"2",
"multi-line pipe chain engine={engine}"
);
assert_eq!(
run_file(engine, ML_NESTED_BRACKETS, "nest"),
"[3, 7, 11]",
"nested ( inside [ engine={engine}"
);
assert_eq!(
run_file(engine, ML_PIPE_IN_BLOCK, "agg [1,22,333]"),
"6",
"multi-line pipe inside loop body engine={engine}"
);
}

#[test]
Expand Down
Loading