From 6b81a7fe74b122c4a4acf729661ae4448c4d1b53 Mon Sep 17 00:00:00 2001
From: Daniel Morris <danieljohnmorris@gmail.com>
Date: Tue, 19 May 2026 17:52:23 +0100
Subject: [PATCH 1/4] fix(interpreter): scan CSV/TSV in one pass with
 quote-state tracking

parse_format split content via content.lines() before tracking quote
state, so a cell containing \n (which write_csv_tsv correctly emits as
a quoted multi-line field per RFC 4180) was re-parsed as two rows. ilo
silently mis-read CSV it had just written. csv-pipeline rerun10 caught
the round-trip drift: wrote 4 rows, read back 5.

Replace the line-by-line approach with parse_csv_content: a single-pass
scanner that walks the whole document, treats \n / \r\n as record
separators only when out of quotes, keeps embedded newlines and CRLF
verbatim inside quoted cells, and never emits a phantom trailing row
for files that end in \n. Same signature shape going in, Vec<Vec<String>>
coming out.

Unit tests cover multi-line cells, escaped quotes inside multi-line
cells, CRLF row separators, CRLF inside a quoted field, embedded commas,
mixed quoted/unquoted cells in one row, empty trailing fields, empty
quoted fields, the UTF-8 BOM (pinned as preserved), unterminated quoted
fields, and a full write_csv_tsv -> parse_csv_content round-trip.
---
 src/interpreter/mod.rs | 234 ++++++++++++++++++++++++++++++++++++-----
 1 file changed, 209 insertions(+), 25 deletions(-)
diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs
index 7175a40c..97a1dc81 100644
--- a/src/interpreter/mod.rs
+++ b/src/interpreter/mod.rs
@@ -642,13 +642,11 @@ fn parse_format(fmt: &str, content: &str) -> std::result::Result<Value, String>
     match fmt {
         "csv" | "tsv" => {
             let sep = if fmt == "tsv" { '\t' } else { ',' };
-            let rows: Vec<Value> = content
-                .lines()
-                .map(|line| {
-                    let fields: Vec<Value> = parse_csv_row(line, sep)
-                        .into_iter()
-                        .map(|s| Value::Text(Arc::new(s)))
-                        .collect();
+            let rows: Vec<Value> = parse_csv_content(content, sep)
+                .into_iter()
+                .map(|row| {
+                    let fields: Vec<Value> =
+                        row.into_iter().map(|s| Value::Text(Arc::new(s))).collect();
                     Value::List(Arc::new(fields))
                 })
                 .collect();
@@ -661,12 +659,26 @@ fn parse_format(fmt: &str, content: &str) -> std::result::Result<Value, String>
     }
 }
 
-/// Parse one CSV/TSV row respecting double-quoted fields.
-fn parse_csv_row(line: &str, sep: char) -> Vec<String> {
-    let mut fields = Vec::new();
+/// Parse a full CSV/TSV document into rows of fields, RFC 4180 compliant.
+///
+/// Unlike a line-based split, this scanner tracks quote state across record
+/// boundaries so that a quoted field containing an embedded newline is
+/// preserved as a single cell. Both `\n` and `\r\n` are accepted as record
+/// separators outside quotes; inside quotes they are kept verbatim. A final
+/// trailing newline does not produce an extra empty row.
+///
+/// History: the previous implementation called `content.lines()` then handed
+/// each line to a per-line quote-aware parser. That meant any cell containing
+/// `\n` (which `write_csv_tsv` correctly emits as a quoted multi-line field
+/// per RFC 4180) was re-parsed as two rows on the way back in, so ilo silently
+/// mis-parsed CSV it had just written. csv-pipeline rerun10 flagged this as
+/// the one blocker on round-trip integrity.
+fn parse_csv_content(content: &str, sep: char) -> Vec<Vec<String>> {
+    let mut rows: Vec<Vec<String>> = Vec::new();
+    let mut row: Vec<String> = Vec::new();
     let mut field = String::new();
     let mut in_quotes = false;
-    let mut chars = line.chars().peekable();
+    let mut chars = content.chars().peekable();
     while let Some(c) = chars.next() {
         if in_quotes {
             if c == '"' {
@@ -677,20 +689,46 @@ fn parse_csv_row(line: &str, sep: char) -> Vec<String> {
                     in_quotes = false;
                 }
             } else {
+                // Inside quotes, newlines (including \r\n) are part of the
+                // field. Keep them verbatim.
                 field.push(c);
             }
         } else if c == '"' {
             in_quotes = true;
         } else if c == sep {
-            fields.push(std::mem::take(&mut field));
+            row.push(std::mem::take(&mut field));
+        } else if c == '\n' {
+            row.push(std::mem::take(&mut field));
+            rows.push(std::mem::take(&mut row));
+        } else if c == '\r' {
+            // Accept \r\n as a record terminator; bare \r outside quotes is
+            // treated the same way (matches `content.lines()` previously and
+            // keeps platform-CR-only files readable).
+            if chars.peek() == Some(&'\n') {
+                chars.next();
+            }
+            row.push(std::mem::take(&mut field));
+            rows.push(std::mem::take(&mut row));
         } else {
             field.push(c);
         }
     }
-    fields.push(field);
-    fields
+    // Flush the trailing record. A file that ends with `\n` already emitted
+    // its last row in the loop and field/row are empty here — skip pushing
+    // a spurious empty row in that case. A file with no trailing newline
+    // still has one record left to flush.
+    if !field.is_empty() || !row.is_empty() || in_quotes {
+        row.push(field);
+        rows.push(row);
+    }
+    rows
 }
 
+// Note: a separate per-line `parse_csv_row` previously existed but its only
+// caller (`parse_format`) was the source of the multi-line round-trip bug
+// fixed in csv-pipeline rerun10. The full-document `parse_csv_content` above
+// is now the single entry point for csv/tsv parsing.
+
 // ── Linear algebra helpers ──────────────────────────────────────────
 
 /// Coerce a `Value` into a row-major matrix `Vec<Vec<f64>>`.
@@ -9535,22 +9573,168 @@ mod tests {
         assert_eq!(format!("{}", Value::FnRef("add".into())), "<fn:add>");
     }
 
-    // L268-279: parse_csv_row with quoted fields
+    // Single-row quoted-field coverage now lives on parse_csv_content;
+    // the previous per-line `parse_csv_row` helper was removed as part of
+    // the csv-pipeline rerun10 fix. See parse_csv_content_* tests below.
+    #[test]
+    fn parse_csv_content_single_row_escaped_quote() {
+        let rows = parse_csv_content(r#""he said ""hello""","world""#, ',');
+        assert_eq!(rows.len(), 1);
+        assert_eq!(rows[0], vec![r#"he said "hello""#, "world"]);
+    }
+
+    #[test]
+    fn parse_csv_content_single_row_simple_quoted() {
+        let rows = parse_csv_content(r#""hello","world""#, ',');
+        assert_eq!(rows.len(), 1);
+        assert_eq!(rows[0], vec!["hello", "world"]);
+    }
+
+    // ── parse_csv_content: quote-state tracking across newlines ───────────────
+    // Regression for csv-pipeline rerun10: the reader used to split content
+    // on `\n` before tracking quote state, so a multi-line quoted field
+    // (which the writer correctly emits per RFC 4180) was mis-parsed as
+    // two rows. parse_csv_content now scans the full document in one pass.
+
+    #[test]
+    fn parse_csv_content_multiline_quoted_field() {
+        // The writer emits "line\nbreak" as a quoted multi-line cell.
+        // The reader must keep it as a single cell across the embedded \n.
+        let input = "name,note,n\nplain,\"line\nbreak\",2\n";
+        let rows = parse_csv_content(input, ',');
+        assert_eq!(rows.len(), 2);
+        assert_eq!(rows[0], vec!["name", "note", "n"]);
+        assert_eq!(rows[1], vec!["plain", "line\nbreak", "2"]);
+    }
+
+    #[test]
+    fn parse_csv_content_basic_no_trailing_newline() {
+        let rows = parse_csv_content("a,b\nc,d", ',');
+        assert_eq!(rows, vec![vec!["a", "b"], vec!["c", "d"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_basic_trailing_newline_no_phantom_row() {
+        // A file ending in `\n` should NOT yield a trailing empty row.
+        let rows = parse_csv_content("a,b\nc,d\n", ',');
+        assert_eq!(rows, vec![vec!["a", "b"], vec!["c", "d"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_crlf_line_endings() {
+        let rows = parse_csv_content("a,b\r\nc,d\r\n", ',');
+        assert_eq!(rows, vec![vec!["a", "b"], vec!["c", "d"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_crlf_inside_quoted_field_preserved() {
+        // \r\n inside a quoted cell is part of the cell, not a record break.
+        let rows = parse_csv_content("a,\"x\r\ny\"\n", ',');
+        assert_eq!(rows, vec![vec!["a".to_string(), "x\r\ny".to_string()]]);
+    }
+
     #[test]
-    fn parse_csv_row_quoted_fields() {
-        // quoted field + escaped double-quote inside
-        let rows = parse_csv_row(r#""he said ""hello""","world""#, ',');
+    fn parse_csv_content_escaped_quote_inside_multiline_field() {
+        // Combined edge case: embedded newline AND escaped quote in one cell.
+        let input = "a,\"he said \"\"hi\"\"\nfoo\",b\n";
+        let rows = parse_csv_content(input, ',');
+        assert_eq!(rows.len(), 1);
+        assert_eq!(rows[0], vec!["a", "he said \"hi\"\nfoo", "b"]);
+    }
+
+    #[test]
+    fn parse_csv_content_tsv_separator() {
+        // Same scanner, tab separator.
+        let rows = parse_csv_content("a\tb\nc\td\n", '\t');
+        assert_eq!(rows, vec![vec!["a", "b"], vec!["c", "d"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_empty_input() {
+        let rows = parse_csv_content("", ',');
+        assert!(rows.is_empty());
+    }
+
+    #[test]
+    fn parse_csv_content_embedded_comma_in_quoted_field() {
+        // A comma inside a quoted cell is part of the cell, not a separator.
+        let rows = parse_csv_content("a,\"x,y\",b\n", ',');
+        assert_eq!(rows, vec![vec!["a", "x,y", "b"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_mixed_quoted_and_unquoted_in_same_row() {
+        // Real-world CSV mixes quoted and unquoted cells freely. The scanner
+        // must handle both in a single row.
+        let rows = parse_csv_content("alice,\"engineer, sr.\",30,\"London\"\n", ',');
+        assert_eq!(rows, vec![vec!["alice", "engineer, sr.", "30", "London"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_empty_trailing_field() {
+        // A row ending with a separator means the last cell is empty. This is
+        // a common spreadsheet artefact ("alice,30," for a missing column).
+        let rows = parse_csv_content("a,b,\nc,d,\n", ',');
+        assert_eq!(rows, vec![vec!["a", "b", ""], vec!["c", "d", ""]]);
+    }
+
+    #[test]
+    fn parse_csv_content_empty_field_in_middle() {
+        // ",,," produces ["", "", "", ""] -- four cells, three of them empty.
+        let rows = parse_csv_content("a,,b\n", ',');
+        assert_eq!(rows, vec![vec!["a", "", "b"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_empty_quoted_field() {
+        // "" is the canonical empty quoted cell -- not a stray escape.
+        let rows = parse_csv_content("a,\"\",b\n", ',');
+        assert_eq!(rows, vec![vec!["a", "", "b"]]);
+    }
+
+    #[test]
+    fn parse_csv_content_utf8_bom_preserved_in_first_cell() {
+        // We don't currently strip a leading UTF-8 BOM. Pin the current
+        // behaviour so future BOM handling (if added) is a deliberate change,
+        // not silent drift. The BOM is the three bytes EF BB BF (\u{feff}).
+        let rows = parse_csv_content("\u{feff}name,age\nalice,30\n", ',');
         assert_eq!(rows.len(), 2);
-        assert_eq!(rows[0], r#"he said "hello""#);
-        assert_eq!(rows[1], "world");
+        assert_eq!(rows[0], vec!["\u{feff}name", "age"]);
+        assert_eq!(rows[1], vec!["alice", "30"]);
     }
 
     #[test]
-    fn parse_csv_row_simple_quoted() {
-        // plain quoted field (no escaped quotes)
-        let rows = parse_csv_row(r#""hello","world""#, ',');
-        assert_eq!(rows[0], "hello");
-        assert_eq!(rows[1], "world");
+    fn parse_csv_content_single_unterminated_quoted_field() {
+        // Malformed input: open quote with no close. The scanner should not
+        // panic and should still produce the partial cell so the user can
+        // see the broken data rather than getting a silent empty result.
+        let rows = parse_csv_content("a,\"oops\n", ',');
+        assert_eq!(rows.len(), 1);
+        assert_eq!(rows[0], vec!["a", "oops\n"]);
+    }
+
+    #[test]
+    fn parse_csv_content_round_trip_via_write_csv_tsv() {
+        // End-to-end: the canonical regression. Take a row with a multi-line
+        // cell, write it via write_csv_tsv, then parse_csv_content the result.
+        // The original cells should come back byte-for-byte.
+        let original = vec![
+            Value::List(Arc::new(vec![
+                Value::Text(Arc::new("name".to_string())),
+                Value::Text(Arc::new("note".to_string())),
+                Value::Text(Arc::new("n".to_string())),
+            ])),
+            Value::List(Arc::new(vec![
+                Value::Text(Arc::new("plain".to_string())),
+                Value::Text(Arc::new("line\nbreak".to_string())),
+                Value::Number(2.0),
+            ])),
+        ];
+        let serialised = write_csv_tsv(&original, ',').expect("write_csv_tsv failed");
+        let rows = parse_csv_content(&serialised, ',');
+        assert_eq!(rows.len(), 2, "round-trip produced wrong row count");
+        assert_eq!(rows[0], vec!["name", "note", "n"]);
+        assert_eq!(rows[1], vec!["plain", "line\nbreak", "2"]);
     }
 
     // L299: len on Map

From 25f5c129e3485bd6ebcb0ae41908b34552149781 Mon Sep 17 00:00:00 2001
From: Daniel Morris <danieljohnmorris@gmail.com>
Date: Tue, 19 May 2026 17:52:31 +0100
Subject: [PATCH 2/4] fix(vm): scan CSV/TSV in one pass to match interpreter
 scanner

vm_parse_format had the same bug as the tree-walker: content.lines()
ran before quote-state tracking, so a quoted multi-line cell came back
as two rows. The VM is the default engine on 0.12.x so any csv-pipeline
user hit this on every read.

vm_parse_csv_content mirrors interpreter::parse_csv_content -- single
pass, in_quotes state, \n and \r\n as record separators outside quotes,
preserved inside. Both code paths now use the same algorithm so a write
on one engine and a read on another stays byte-stable.
---
 src/vm/mod.rs | 52 +++++++++++++++++++++++++++++++++++++--------------
 1 file changed, 38 insertions(+), 14 deletions(-)

diff --git a/src/vm/mod.rs b/src/vm/mod.rs
index 42f8ba96..f377fd09 100644
--- a/src/vm/mod.rs
+++ b/src/vm/mod.rs
@@ -12846,13 +12846,10 @@ fn vm_parse_format(fmt: &str, content: &str) -> Result<NanVal, NanVal> {
     match fmt {
         "csv" | "tsv" => {
             let sep = if fmt == "tsv" { '\t' } else { ',' };
-            let rows: Vec<NanVal> = content
-                .lines()
-                .map(|line| {
-                    let fields: Vec<NanVal> = vm_parse_csv_row(line, sep)
-                        .into_iter()
-                        .map(NanVal::heap_string)
-                        .collect();
+            let rows: Vec<NanVal> = vm_parse_csv_content(content, sep)
+                .into_iter()
+                .map(|row| {
+                    let fields: Vec<NanVal> = row.into_iter().map(NanVal::heap_string).collect();
                     NanVal::heap_list(fields)
                 })
                 .collect();
@@ -12865,11 +12862,26 @@ fn vm_parse_format(fmt: &str, content: &str) -> Result<NanVal, NanVal> {
     }
 }
 
-fn vm_parse_csv_row(line: &str, sep: char) -> Vec<String> {
-    let mut fields = Vec::new();
+/// Parse a full CSV/TSV document into rows of fields, RFC 4180 compliant.
+///
+/// Mirrors `interpreter::parse_csv_content`: tracks quote state across
+/// record separators so a quoted field containing an embedded newline is
+/// preserved as a single cell. Accepts both `\n` and `\r\n` as record
+/// terminators outside quotes; inside quotes they are kept verbatim. A
+/// trailing newline does not produce an extra empty row.
+///
+/// History: csv-pipeline rerun10. The VM's vm_parse_format previously
+/// called `content.lines()` then handed each line to a per-line quote
+/// parser, so any cell containing `\n` (which `write_csv_tsv` correctly
+/// emits as a quoted multi-line field) was mis-read as two rows. The
+/// tree-walker had the same bug in `parse_format`; both code paths now
+/// use a single-pass scanner so cross-engine round-trip is byte-stable.
+fn vm_parse_csv_content(content: &str, sep: char) -> Vec<Vec<String>> {
+    let mut rows: Vec<Vec<String>> = Vec::new();
+    let mut row: Vec<String> = Vec::new();
     let mut field = String::new();
     let mut in_quotes = false;
-    let mut chars = line.chars().peekable();
+    let mut chars = content.chars().peekable();
     while let Some(c) = chars.next() {
         if in_quotes {
             if c == '"' {
@@ -12885,13 +12897,25 @@ fn vm_parse_csv_row(line: &str, sep: char) -> Vec<String> {
         } else if c == '"' {
             in_quotes = true;
         } else if c == sep {
-            fields.push(std::mem::take(&mut field));
+            row.push(std::mem::take(&mut field));
+        } else if c == '\n' {
+            row.push(std::mem::take(&mut field));
+            rows.push(std::mem::take(&mut row));
+        } else if c == '\r' {
+            if chars.peek() == Some(&'\n') {
+                chars.next();
+            }
+            row.push(std::mem::take(&mut field));
+            rows.push(std::mem::take(&mut row));
         } else {
             field.push(c);
         }
     }
-    fields.push(field);
-    fields
+    if !field.is_empty() || !row.is_empty() || in_quotes {
+        row.push(field);
+        rows.push(row);
+    }
+    rows
 }
 
 fn nanval_equal(a: NanVal, b: NanVal) -> bool {
@@ -27206,7 +27230,7 @@ mod tests {
         assert_eq!(*inner, Value::Text(Arc::new("hello raw".to_string())));
     }
 
-    // ── vm_parse_csv_row quoted fields (lines 4295-4306) ─────────────────────
+    // ── vm_parse_csv_content quoted fields (lines 4295-4306) ─────────────────
 
     // lines 4295-4306: OP_RD on .csv file with quoted fields (double-quote escaping)
     #[test]

From 7cd42f3386103f8fec4f6cd87c71e6773ff81b2e Mon Sep 17 00:00:00 2001
From: Daniel Morris <danieljohnmorris@gmail.com>
Date: Tue, 19 May 2026 17:52:38 +0100
Subject: [PATCH 3/4] test: cross-engine CSV multi-line round-trip regression +
 example

tests/regression_csv_multiline_roundtrip.rs drives wr! then rd! across
tree and VM. Pins the row count, the multi-line cell value, the
quote+newline combined case, and the plain (no-newline) negative
control so a future regression in either engine fails loudly.

examples/csv-multiline-roundtrip.ilo gives agents an in-context example
of the now-correct behaviour and rides the existing examples_engines
harness so every engine runs it on each test pass. Covers a row-count
round-trip, a single-cell length round-trip, an embedded-quote +
embedded-newline cell, and a CRLF cell.
---
 examples/csv-multiline-roundtrip.ilo        |  53 +++++++++
 tests/regression_csv_multiline_roundtrip.rs | 117 ++++++++++++++++++++
 2 files changed, 170 insertions(+)
 create mode 100644 examples/csv-multiline-roundtrip.ilo
 create mode 100644 tests/regression_csv_multiline_roundtrip.rs

diff --git a/examples/csv-multiline-roundtrip.ilo b/examples/csv-multiline-roundtrip.ilo
new file mode 100644
index 00000000..fb6335d3
--- /dev/null
+++ b/examples/csv-multiline-roundtrip.ilo
@@ -0,0 +1,53 @@
+-- Multi-line quoted CSV fields round-trip cleanly.
+-- The writer emits cells containing `\n` as quoted multi-line records per
+-- RFC 4180; the reader tracks quote state across newlines so the same
+-- file reads back with the original row count and cell value.
+-- Regression for csv-pipeline rerun10.
+
+roundtrip>R n t;
+  path="/tmp/ilo_example_csv_multiline.csv";
+  rows=[["name","note","n"],["plain","line\nbreak","2"]];
+  wr! path rows "csv";
+  back=rd! path "csv";
+  ~len back
+
+celllen>R n t;
+  path="/tmp/ilo_example_csv_multiline_cell.csv";
+  rows=[["a","line\nbreak","b"]];
+  wr! path rows "csv";
+  back=rd! path "csv";
+  ~len (at back 0)
+
+-- An embedded double-quote inside a multi-line cell. The writer escapes
+-- the inner quote as `""` per RFC 4180 and wraps the whole cell in `"`.
+-- The reader keeps the cell intact across both the embedded `""` and `\n`.
+-- We assert on cell length (16 chars: `he said "hi"\nfoo`) so the example
+-- harness can compare a single-line stdout value.
+quoteml>R n t;
+  path="/tmp/ilo_example_csv_quote_multiline.csv";
+  rows=[["a","he said \"hi\"\nfoo","b"]];
+  wr! path rows "csv";
+  back=rd! path "csv";
+  ~len (at (at back 0) 1)
+
+-- CRLF inside a quoted cell must be kept verbatim, not collapsed or split.
+-- The writer emits LF row separators; this example constructs a CRLF cell
+-- explicitly and round-trips it.
+crlfcell>R n t;
+  path="/tmp/ilo_example_csv_crlf_cell.csv";
+  rows=[["a","x\r\ny","b"]];
+  wr! path rows "csv";
+  back=rd! path "csv";
+  ~len (at (at back 0) 1)
+
+-- run: roundtrip
+-- out: 2
+
+-- run: celllen
+-- out: 3
+
+-- run: quoteml
+-- out: 16
+
+-- run: crlfcell
+-- out: 4
diff --git a/tests/regression_csv_multiline_roundtrip.rs b/tests/regression_csv_multiline_roundtrip.rs
new file mode 100644
index 00000000..b6d78841
--- /dev/null
+++ b/tests/regression_csv_multiline_roundtrip.rs
@@ -0,0 +1,117 @@
+// Regression for csv-pipeline rerun10:
+//
+//   `wr path data "csv"` correctly emits a quoted, multi-line field per
+//   RFC 4180. But `rd path "csv"` used to split content on `\n` before
+//   tracking quote state, so the embedded newline was treated as a record
+//   break and one logical row came back as two.
+//
+//   Repro from the persona report:
+//     wrote 3 rows: [name,note,n], [Frame, Gamma,"has ""quote""",1],
+//                   [plain,"line\nbreak",2]
+//     re-read got 4 rows: [name,note,n], [Frame, Gamma,has "quote",1],
+//                         [plain,line], [break,2]
+//
+// The fix replaces the line-by-line approach in `parse_format` with a
+// single-pass quote-aware scanner (`parse_csv_content`). These tests pin
+// the round-trip across every available engine — if any engine regresses,
+// or a future change drifts the writer and reader out of step, this fails.
+
+use std::process::Command;
+
+fn ilo() -> Command {
+    Command::new(env!("CARGO_BIN_EXE_ilo"))
+}
+
+fn run_ok(engine: &str, src: &str, entry: &str) -> String {
+    let out = ilo()
+        .args([src, engine, entry])
+        .output()
+        .expect("failed to run ilo");
+    assert!(
+        out.status.success(),
+        "ilo {engine} failed for `{src}`: stderr={}",
+        String::from_utf8_lossy(&out.stderr)
+    );
+    String::from_utf8_lossy(&out.stdout).trim().to_string()
+}
+
+fn engines() -> &'static [&'static str] {
+    &["--run-tree", "--run-vm"]
+}
+
+// Canonical regression: write a row with an embedded newline, then read
+// the file back as csv. The row count must survive the round-trip.
+#[test]
+fn csv_multiline_quoted_field_round_trip_row_count() {
+    for (i, engine) in engines().iter().enumerate() {
+        let path = format!("/tmp/ilo_csv_ml_rt_count_{i}.csv");
+        let _ = std::fs::remove_file(&path);
+        // Two rows: a header and a body row whose middle cell contains a
+        // literal newline. Entry returns the row count read back.
+        let src = format!(
+            r#"f>R n t;wr! "{path}" [["name","note","n"],["plain","line\nbreak","2"]] "csv";rows=rd! "{path}" "csv";~len rows"#
+        );
+        let got = run_ok(engine, &src, "f");
+        assert_eq!(
+            got, "2",
+            "engine={engine}: round-trip row count drifted (writer emitted multi-line quoted cell, reader split it)"
+        );
+        let _ = std::fs::remove_file(&path);
+    }
+}
+
+// The embedded-newline cell must come back as a single cell with the
+// `\n` preserved, not as two cells across two rows.
+#[test]
+fn csv_multiline_quoted_field_round_trip_cell_value() {
+    for (i, engine) in engines().iter().enumerate() {
+        let path = format!("/tmp/ilo_csv_ml_rt_cell_{i}.csv");
+        let _ = std::fs::remove_file(&path);
+        // After read-back, rows[1][1] should be "line\nbreak".
+        let src = format!(
+            r#"f>R t t;wr! "{path}" [["name","note","n"],["plain","line\nbreak","2"]] "csv";rows=rd! "{path}" "csv";~at (at rows 1) 1"#
+        );
+        let got = run_ok(engine, &src, "f");
+        assert_eq!(
+            got, "line\nbreak",
+            "engine={engine}: multi-line cell did not round-trip verbatim"
+        );
+        let _ = std::fs::remove_file(&path);
+    }
+}
+
+// Combined edge case: a single cell with BOTH an embedded newline AND an
+// escaped double-quote. This exercises quote-state tracking across the
+// embedded `""` and the embedded `\n`.
+#[test]
+fn csv_multiline_with_escaped_quote_round_trip() {
+    for (i, engine) in engines().iter().enumerate() {
+        let path = format!("/tmp/ilo_csv_ml_rt_qn_{i}.csv");
+        let _ = std::fs::remove_file(&path);
+        let src = format!(
+            r#"f>R t t;wr! "{path}" [["a","he said \"hi\"\nfoo","b"]] "csv";rows=rd! "{path}" "csv";~at (at rows 0) 1"#
+        );
+        let got = run_ok(engine, &src, "f");
+        assert_eq!(
+            got, "he said \"hi\"\nfoo",
+            "engine={engine}: quote+newline cell did not round-trip"
+        );
+        let _ = std::fs::remove_file(&path);
+    }
+}
+
+// Negative control: round-trip on a CSV with NO embedded newlines must
+// still produce the same number of rows as before the fix.
+#[test]
+fn csv_plain_round_trip_unchanged() {
+    for (i, engine) in engines().iter().enumerate() {
+        let path = format!("/tmp/ilo_csv_plain_rt_{i}.csv");
+        let _ = std::fs::remove_file(&path);
+        let src = format!(
+            r#"f>R n t;wr! "{path}" [["name","n"],["alice","1"],["bob","2"]] "csv";rows=rd! "{path}" "csv";~len rows"#
+        );
+        let got = run_ok(engine, &src, "f");
+        assert_eq!(got, "3", "engine={engine}: plain csv row count regressed");
+        let _ = std::fs::remove_file(&path);
+    }
+}

From d6ece14a4a1cf7fb4dcac6751fdace745d37bf54 Mon Sep 17 00:00:00 2001
From: Daniel Morris <danieljohnmorris@gmail.com>
Date: Tue, 19 May 2026 17:52:42 +0100
Subject: [PATCH 4/4] docs: changelog 0.12.1 entry for CSV multi-line
 round-trip fix

---
 CHANGELOG.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f8a0f130..1881d27c 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,10 @@
 
 - `ls dir` renamed to `lsd dir`. Six rerun10 personas tripped ILO-P011 on `ls=rdl! p` because `ls` was reserved; rename frees `ls` for user code. `walk`, `glob` unchanged.
 
+### Fixed
+
+- CSV/TSV reader now tracks quote state across record separators. A cell containing `\n` (which the writer correctly emits as a quoted multi-line field per RFC 4180) used to be re-parsed as two rows, so `rd path "csv"` silently disagreed with `wr path data "csv"`. The reader is now a single-pass scanner over the whole document and round-trips multi-line quoted fields, embedded quotes, and CRLF line endings byte-stably across tree and VM. Surfaced by csv-pipeline rerun10.
+
 ## 0.12.0 - 2026-05-19
 
 ### Breaking