ilo-lang · danieljohnmorris · May 19, 2026 · May 19, 2026 · May 19, 2026 · May 19, 2026
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,10 @@
 
 - `ls dir` renamed to `lsd dir`. Six rerun10 personas tripped ILO-P011 on `ls=rdl! p` because `ls` was reserved; rename frees `ls` for user code. `walk`, `glob` unchanged.
 
+### Fixed
+
+- CSV/TSV reader now tracks quote state across record separators. A cell containing `\n` (which the writer correctly emits as a quoted multi-line field per RFC 4180) used to be re-parsed as two rows, so `rd path "csv"` silently disagreed with `wr path data "csv"`. The reader is now a single-pass scanner over the whole document and round-trips multi-line quoted fields, embedded quotes, and CRLF line endings byte-stably across tree and VM. Surfaced by csv-pipeline rerun10.
+
 ## 0.12.0 - 2026-05-19
 
 ### Breaking

diff --git a/examples/csv-multiline-roundtrip.ilo b/examples/csv-multiline-roundtrip.ilo
@@ -0,0 +1,53 @@
+-- Multi-line quoted CSV fields round-trip cleanly.
+-- The writer emits cells containing `\n` as quoted multi-line records per
+-- RFC 4180; the reader tracks quote state across newlines so the same
+-- file reads back with the original row count and cell value.
+-- Regression for csv-pipeline rerun10.
+
+roundtrip>R n t;
+  path="/tmp/ilo_example_csv_multiline.csv";
+  rows=[["name","note","n"],["plain","line\nbreak","2"]];
+  wr! path rows "csv";
+  back=rd! path "csv";
+  ~len back
+
+celllen>R n t;
+  path="/tmp/ilo_example_csv_multiline_cell.csv";
+  rows=[["a","line\nbreak","b"]];
+  wr! path rows "csv";
+  back=rd! path "csv";
+  ~len (at back 0)
+
+-- An embedded double-quote inside a multi-line cell. The writer escapes
+-- the inner quote as `""` per RFC 4180 and wraps the whole cell in `"`.
+-- The reader keeps the cell intact across both the embedded `""` and `\n`.
+-- We assert on cell length (16 chars: `he said "hi"\nfoo`) so the example
+-- harness can compare a single-line stdout value.
+quoteml>R n t;
+  path="/tmp/ilo_example_csv_quote_multiline.csv";
+  rows=[["a","he said \"hi\"\nfoo","b"]];
+  wr! path rows "csv";
+  back=rd! path "csv";
+  ~len (at (at back 0) 1)
+
+-- CRLF inside a quoted cell must be kept verbatim, not collapsed or split.
+-- The writer emits LF row separators; this example constructs a CRLF cell
+-- explicitly and round-trips it.
+crlfcell>R n t;
+  path="/tmp/ilo_example_csv_crlf_cell.csv";
+  rows=[["a","x\r\ny","b"]];
+  wr! path rows "csv";
+  back=rd! path "csv";
+  ~len (at (at back 0) 1)
+
+-- run: roundtrip
+-- out: 2
+
+-- run: celllen
+-- out: 3
+
+-- run: quoteml
+-- out: 16
+
+-- run: crlfcell
+-- out: 4
diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs
@@ -642,13 +642,11 @@ fn parse_format(fmt: &str, content: &str) -> std::result::Result<Value, String>
     match fmt {
         "csv" | "tsv" => {
             let sep = if fmt == "tsv" { '\t' } else { ',' };
-            let rows: Vec<Value> = content
-                .lines()
-                .map(|line| {
-                    let fields: Vec<Value> = parse_csv_row(line, sep)
-                        .into_iter()
-                        .map(|s| Value::Text(Arc::new(s)))
-                        .collect();
+            let rows: Vec<Value> = parse_csv_content(content, sep)
+                .into_iter()
+                .map(|row| {
+                    let fields: Vec<Value> =
+                        row.into_iter().map(|s| Value::Text(Arc::new(s))).collect();
                     Value::List(Arc::new(fields))
                 })
                 .collect();
@@ -661,12 +659,26 @@ fn parse_format(fmt: &str, content: &str) -> std::result::Result<Value, String>
     }
 }
 
-/// Parse one CSV/TSV row respecting double-quoted fields.
-fn parse_csv_row(line: &str, sep: char) -> Vec<String> {
-    let mut fields = Vec::new();
+/// Parse a full CSV/TSV document into rows of fields, RFC 4180 compliant.
+///
+/// Unlike a line-based split, this scanner tracks quote state across record
+/// boundaries so that a quoted field containing an embedded newline is
+/// preserved as a single cell. Both `\n` and `\r\n` are accepted as record
+/// separators outside quotes; inside quotes they are kept verbatim. A final
+/// trailing newline does not produce an extra empty row.
+///
+/// History: the previous implementation called `content.lines()` then handed
+/// each line to a per-line quote-aware parser. That meant any cell containing
+/// `\n` (which `write_csv_tsv` correctly emits as a quoted multi-line field
+/// per RFC 4180) was re-parsed as two rows on the way back in, so ilo silently
+/// mis-parsed CSV it had just written. csv-pipeline rerun10 flagged this as
+/// the one blocker on round-trip integrity.
+fn parse_csv_content(content: &str, sep: char) -> Vec<Vec<String>> {
+    let mut rows: Vec<Vec<String>> = Vec::new();
+    let mut row: Vec<String> = Vec::new();
     let mut field = String::new();
     let mut in_quotes = false;
-    let mut chars = line.chars().peekable();
+    let mut chars = content.chars().peekable();
     while let Some(c) = chars.next() {
         if in_quotes {
             if c == '"' {
@@ -677,20 +689,46 @@ fn parse_csv_row(line: &str, sep: char) -> Vec<String> {
                     in_quotes = false;
                 }
             } else {
+                // Inside quotes, newlines (including \r\n) are part of the
+                // field. Keep them verbatim.
                 field.push(c);
             }
         } else if c == '"' {
             in_quotes = true;
         } else if c == sep {
-            fields.push(std::mem::take(&mut field));
+            row.push(std::mem::take(&mut field));
+        } else if c == '\n' {
+            row.push(std::mem::take(&mut field));
+            rows.push(std::mem::take(&mut row));
+        } else if c == '\r' {
+            // Accept \r\n as a record terminator; bare \r outside quotes is
+            // treated the same way (matches `content.lines()` previously and
+            // keeps platform-CR-only files readable).
+            if chars.peek() == Some(&'\n') {
+                chars.next();
+            }
+            row.push(std::mem::take(&mut field));
+            rows.push(std::mem::take(&mut row));
         } else {
             field.push(c);
         }
     }
-    fields.push(field);
-    fields
+    // Flush the trailing record. A file that ends with `\n` already emitted
+    // its last row in the loop and field/row are empty here — skip pushing
+    // a spurious empty row in that case. A file with no trailing newline
+    // still has one record left to flush.
+    if !field.is_empty() || !row.is_empty() || in_quotes {
+        row.push(field);
+        rows.push(row);
+    }
+    rows
 }
 
+// Note: a separate per-line `parse_csv_row` previously existed but its only
+// caller (`parse_format`) was the source of the multi-line round-trip bug
+// fixed in csv-pipeline rerun10. The full-document `parse_csv_content` above
+// is now the single entry point for csv/tsv parsing.
+
 // ── Linear algebra helpers ──────────────────────────────────────────
 
 /// Coerce a `Value` into a row-major matrix `Vec<Vec<f64>>`.
@@ -9535,22 +9573,168 @@ mod tests {
         assert_eq!(format!("{}", Value::FnRef("add".into())), "<fn:add>");
     }
 
-    // L268-279: parse_csv_row with quoted fields
+    // Single-row quoted-field coverage now lives on parse_csv_content;
+    // the previous per-line `parse_csv_row` helper was removed as part of
+    // the csv-pipeline rerun10 fix. See parse_csv_content_* tests below.
+    #[test]
+    fn parse_csv_content_single_row_escaped_quote() {
+        let rows = parse_csv_content(r#""he said ""hello""","world""#, ',');
+        assert_eq!(rows.len(), 1);
+        assert_eq!(rows[0], vec![r#"he said "hello""#, "world"]);
+    }
+
+    #[test]
+    fn parse_csv_content_single_row_simple_quoted() {
+        let rows = parse_csv_content(r#""hello","world""#, ',');
+        assert_eq!(rows.len(), 1);
+        assert_eq!(rows[0], vec!["hello", "world"]);
+    }
+
+    // ── parse_csv_content: quote-state tracking across newlines ───────────────
+    // Regression for csv-pipeline rerun10: the reader used to split content
+    // on `\n` before tracking quote state, so a multi-line quoted field
+    // (which the writer correctly emits per RFC 4180) was mis-parsed as
+    // two rows. parse_csv_content now scans the full document in one pass.
+
+    #[test]
+    fn parse_csv_content_multiline_quoted_field() {
+        // The writer emits "line\nbreak" as a quoted multi-line cell.
+        // The reader must keep it as a single cell across the embedded \n.
+        let input = "name,note,n\nplain,\"line\nbreak\",2\n";
+        let rows = parse_csv_content(input, ',');
+        assert_eq!(rows.len(), 2);
+        assert_eq!(rows[0], vec!["name", "note", "n"]);
+        assert_eq!(rows[1], vec!["plain", "line\nbreak", "2"]);
+    }
+
+    #[test]
+    fn parse_csv_content_basic_no_trailing_newline() {
+        let rows = parse_csv_content("a,b\nc,d", ',');
+        assert_eq!(rows, vec![vec!["a", "b"], vec!["c", "d"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_basic_trailing_newline_no_phantom_row() {
+        // A file ending in `\n` should NOT yield a trailing empty row.
+        let rows = parse_csv_content("a,b\nc,d\n", ',');
+        assert_eq!(rows, vec![vec!["a", "b"], vec!["c", "d"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_crlf_line_endings() {
+        let rows = parse_csv_content("a,b\r\nc,d\r\n", ',');
+        assert_eq!(rows, vec![vec!["a", "b"], vec!["c", "d"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_crlf_inside_quoted_field_preserved() {
+        // \r\n inside a quoted cell is part of the cell, not a record break.
+        let rows = parse_csv_content("a,\"x\r\ny\"\n", ',');
+        assert_eq!(rows, vec![vec!["a".to_string(), "x\r\ny".to_string()]]);
+    }
+
     #[test]
-    fn parse_csv_row_quoted_fields() {
-        // quoted field + escaped double-quote inside
-        let rows = parse_csv_row(r#""he said ""hello""","world""#, ',');
+    fn parse_csv_content_escaped_quote_inside_multiline_field() {
+        // Combined edge case: embedded newline AND escaped quote in one cell.
+        let input = "a,\"he said \"\"hi\"\"\nfoo\",b\n";
+        let rows = parse_csv_content(input, ',');
+        assert_eq!(rows.len(), 1);
+        assert_eq!(rows[0], vec!["a", "he said \"hi\"\nfoo", "b"]);
+    }
+
+    #[test]
+    fn parse_csv_content_tsv_separator() {
+        // Same scanner, tab separator.
+        let rows = parse_csv_content("a\tb\nc\td\n", '\t');
+        assert_eq!(rows, vec![vec!["a", "b"], vec!["c", "d"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_empty_input() {
+        let rows = parse_csv_content("", ',');
+        assert!(rows.is_empty());
+    }
+
+    #[test]
+    fn parse_csv_content_embedded_comma_in_quoted_field() {
+        // A comma inside a quoted cell is part of the cell, not a separator.
+        let rows = parse_csv_content("a,\"x,y\",b\n", ',');
+        assert_eq!(rows, vec![vec!["a", "x,y", "b"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_mixed_quoted_and_unquoted_in_same_row() {
+        // Real-world CSV mixes quoted and unquoted cells freely. The scanner
+        // must handle both in a single row.
+        let rows = parse_csv_content("alice,\"engineer, sr.\",30,\"London\"\n", ',');
+        assert_eq!(rows, vec![vec!["alice", "engineer, sr.", "30", "London"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_empty_trailing_field() {
+        // A row ending with a separator means the last cell is empty. This is
+        // a common spreadsheet artefact ("alice,30," for a missing column).
+        let rows = parse_csv_content("a,b,\nc,d,\n", ',');
+        assert_eq!(rows, vec![vec!["a", "b", ""], vec!["c", "d", ""]]);
+    }
+
+    #[test]
+    fn parse_csv_content_empty_field_in_middle() {
+        // ",,," produces ["", "", "", ""] -- four cells, three of them empty.
+        let rows = parse_csv_content("a,,b\n", ',');
+        assert_eq!(rows, vec![vec!["a", "", "b"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_empty_quoted_field() {
+        // "" is the canonical empty quoted cell -- not a stray escape.
+        let rows = parse_csv_content("a,\"\",b\n", ',');
+        assert_eq!(rows, vec![vec!["a", "", "b"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_utf8_bom_preserved_in_first_cell() {
+        // We don't currently strip a leading UTF-8 BOM. Pin the current
+        // behaviour so future BOM handling (if added) is a deliberate change,
+        // not silent drift. The BOM is the three bytes EF BB BF (\u{feff}).
+        let rows = parse_csv_content("\u{feff}name,age\nalice,30\n", ',');
         assert_eq!(rows.len(), 2);
-        assert_eq!(rows[0], r#"he said "hello""#);
-        assert_eq!(rows[1], "world");
+        assert_eq!(rows[0], vec!["\u{feff}name", "age"]);
+        assert_eq!(rows[1], vec!["alice", "30"]);
     }
 
     #[test]
-    fn parse_csv_row_simple_quoted() {
-        // plain quoted field (no escaped quotes)
-        let rows = parse_csv_row(r#""hello","world""#, ',');
-        assert_eq!(rows[0], "hello");
-        assert_eq!(rows[1], "world");
+    fn parse_csv_content_single_unterminated_quoted_field() {
+        // Malformed input: open quote with no close. The scanner should not
+        // panic and should still produce the partial cell so the user can
+        // see the broken data rather than getting a silent empty result.
+        let rows = parse_csv_content("a,\"oops\n", ',');
+        assert_eq!(rows.len(), 1);
+        assert_eq!(rows[0], vec!["a", "oops\n"]);
+    }
+
+    #[test]
+    fn parse_csv_content_round_trip_via_write_csv_tsv() {
+        // End-to-end: the canonical regression. Take a row with a multi-line
+        // cell, write it via write_csv_tsv, then parse_csv_content the result.
+        // The original cells should come back byte-for-byte.
+        let original = vec![
+            Value::List(Arc::new(vec![
+                Value::Text(Arc::new("name".to_string())),
+                Value::Text(Arc::new("note".to_string())),
+                Value::Text(Arc::new("n".to_string())),
+            ])),
+            Value::List(Arc::new(vec![
+                Value::Text(Arc::new("plain".to_string())),
+                Value::Text(Arc::new("line\nbreak".to_string())),
+                Value::Number(2.0),
+            ])),
+        ];
+        let serialised = write_csv_tsv(&original, ',').expect("write_csv_tsv failed");
+        let rows = parse_csv_content(&serialised, ',');
+        assert_eq!(rows.len(), 2, "round-trip produced wrong row count");
+        assert_eq!(rows[0], vec!["name", "note", "n"]);
+        assert_eq!(rows[1], vec!["plain", "line\nbreak", "2"]);
     }
 
     // L299: len on Map