Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions examples/csv-tsv-writer.ilo
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
-- 3-arg wr: serialise a list of rows as csv or tsv and write it in one call.
-- Each row is itself a list of fields. RFC 4180 quoting applied (fields with
-- the separator, a quote, or a newline are wrapped in "…" and inner quotes
-- are escaped as "").

dumpcsv>R t t;wr "/tmp/ilo_wr_csv_example.csv" [["name", "age"], ["alice", 30], ["bob", 25]] "csv"

dumptsv>R t t;wr "/tmp/ilo_wr_tsv_example.tsv" [["name", "age"], ["alice", 30], ["bob", 25]] "tsv"

-- run: dumpcsv
-- out: ~/tmp/ilo_wr_csv_example.csv

-- run: dumptsv
-- out: ~/tmp/ilo_wr_tsv_example.tsv
158 changes: 115 additions & 43 deletions src/interpreter/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -405,6 +405,120 @@ fn vec_from_value(v: &Value, name: &str) -> Result<Vec<f64>> {
Ok(out)
}

/// Format one field for csv/tsv output following RFC 4180 quoting.
/// Quote the field if it contains the separator, `"`, or newline.
/// Inner `"` are escaped as `""`.
fn fmt_csv_field(v: &Value, sep: char) -> String {
let raw = match v {
Value::Text(s) => s.clone(),
Value::Number(n) => {
if *n == (*n as i64) as f64 {
format!("{}", *n as i64)
} else {
format!("{n}")
}
}
Value::Bool(b) => format!("{b}"),
Value::Nil => String::new(),
other => format!("{other}"),
};
if raw.contains(sep) || raw.contains('"') || raw.contains('\n') {
format!("\"{}\"", raw.replace('"', "\"\""))
} else {
raw
}
}

/// Serialise a list of rows as csv or tsv.
///
/// Row shapes:
/// * `L (L _)` — list of lists, no header row.
/// * `L record` — header from the first record's fields (keys sorted for
/// stable output across runs since fields are stored in a HashMap).
/// * `L (M k v)` — header from the first map's keys (sorted likewise).
pub(crate) fn write_csv_tsv(rows: &[Value], sep: char) -> Result<String> {
let mut out = String::new();
let first = match rows.first() {
Some(r) => r,
None => return Ok(out),
};
let (header, use_keys): (Option<Vec<String>>, bool) = match first {
Value::List(_) => (None, false),
Value::Record { fields, .. } => {
let mut keys: Vec<String> = fields.keys().cloned().collect();
keys.sort();
(Some(keys), true)
}
Value::Map(m) => {
let mut keys: Vec<String> = m.keys().cloned().collect();
keys.sort();
(Some(keys), true)
}
other => {
return Err(RuntimeError::new(
"ILO-R009",
format!(
"wr: each row must be a list, record, or map, got {:?}",
other
),
));
}
};
if let Some(ref keys) = header {
for (i, k) in keys.iter().enumerate() {
if i > 0 {
out.push(sep);
}
out.push_str(&fmt_csv_field(&Value::Text(k.clone()), sep));
}
out.push('\n');
}
for row in rows {
match (row, use_keys, header.as_ref()) {
(Value::List(fields), false, _) => {
for (i, f) in fields.iter().enumerate() {
if i > 0 {
out.push(sep);
}
out.push_str(&fmt_csv_field(f, sep));
}
out.push('\n');
}
(Value::Record { fields, .. }, true, Some(keys)) => {
for (i, k) in keys.iter().enumerate() {
if i > 0 {
out.push(sep);
}
let v = fields.get(k).cloned().unwrap_or(Value::Nil);
out.push_str(&fmt_csv_field(&v, sep));
}
out.push('\n');
}
(Value::Map(m), true, Some(keys)) => {
for (i, k) in keys.iter().enumerate() {
if i > 0 {
out.push(sep);
}
let v = m.get(k).cloned().unwrap_or(Value::Nil);
out.push_str(&fmt_csv_field(&v, sep));
}
out.push('\n');
}
(other, _, _) => {
return Err(RuntimeError::new(
"ILO-R009",
format!(
"wr: row shape mismatch (expected {} rows), got {:?}",
if use_keys { "record/map" } else { "list" },
other
),
));
}
}
}
Ok(out)
}

/// LU decomposition with partial pivoting, in-place on an owned matrix.
/// Returns (LU, pivot indices, determinant, singular flag). When `singular`
/// is true, the LU and det values are still well-formed (det = 0) but the
Expand Down Expand Up @@ -2066,49 +2180,7 @@ fn call_function(env: &mut Env, name: &str, args: Vec<Value>) -> Result<Value> {
));
}
};
let mut out = String::new();
for row in rows {
match row {
Value::List(fields) => {
for (i, f) in fields.iter().enumerate() {
if i > 0 {
out.push(sep);
}
let s = match f {
Value::Text(s) => {
if s.contains(sep)
|| s.contains('"')
|| s.contains('\n')
{
format!("\"{}\"", s.replace('"', "\"\""))
} else {
out.push_str(s);
continue;
}
}
Value::Number(n) => {
if *n == (*n as i64) as f64 {
format!("{}", *n as i64)
} else {
format!("{n}")
}
}
Value::Bool(b) => format!("{b}"),
other => format!("{other}"),
};
out.push_str(&s);
}
out.push('\n');
}
other => {
return Err(RuntimeError::new(
"ILO-R009",
format!("wr: each row must be a list, got {:?}", other),
));
}
}
}
out
write_csv_tsv(rows, sep)?
}
"json" => {
fn value_to_json(v: &Value) -> serde_json::Value {
Expand Down
88 changes: 61 additions & 27 deletions src/vm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -209,6 +209,12 @@ pub(crate) const OP_DROP: u8 = 114; // R[A] = drop(R[B], R[C]) (skip first B el
pub(crate) const OP_DTFMT: u8 = 131; // R[A] = dtfmt(R[B] epoch, R[C] fmt) → t
pub(crate) const OP_DTPARSE: u8 = 132; // R[A] = dtparse(R[B] text, R[C] fmt) → R n t

// Format-dump helper for csv/tsv: parallels OP_JDMP for the `wr path data fmt`
// 3-arg overload. R[A] = serialised string; R[B] = data list; C = 0 for csv
// (separator `,`), C = 1 for tsv (separator `\t`). The literal format is known
// at compile time, so we encode it in the C field rather than via a register.
pub(crate) const OP_CSVDMP: u8 = 134;

// ABC mode — text formatting
pub(crate) const OP_FMT2: u8 = 104; // R[A] = fmt2(R[B], R[C]) (format number to N decimal places → t)

Expand Down Expand Up @@ -2246,22 +2252,32 @@ impl RegCompiler {
}
return ra;
}
// 3-arg `wr path data "json"` — serialise with jdmp,
// then write. Only the literal "json" format is inlined;
// other formats fall through (and the verifier rejects
// unknown literal formats up-front).
// 3-arg `wr path data fmt` — when fmt is a literal
// "json"/"csv"/"tsv", serialise to a string register
// then OP_WR. json uses OP_JDMP; csv/tsv uses OP_CSVDMP
// (separator encoded in C: 0=csv, 1=tsv).
(Builtin::Wr, 3) => {
if let crate::ast::Expr::Literal(crate::ast::Literal::Text(fmt_str)) =
&args[2]
&& fmt_str == "json"
&& matches!(fmt_str.as_str(), "json" | "csv" | "tsv")
{
let rpath = self.compile_expr(&args[0]);
let rdata = self.compile_expr(&args[1]);
// jdmp(data) → text
let rjson = self.alloc_reg();
self.emit_abc(OP_JDMP, rjson, rdata, 0);
let rcontent = self.alloc_reg();
match fmt_str.as_str() {
"json" => {
self.emit_abc(OP_JDMP, rcontent, rdata, 0);
}
"csv" => {
self.emit_abc(OP_CSVDMP, rcontent, rdata, 0);
}
"tsv" => {
self.emit_abc(OP_CSVDMP, rcontent, rdata, 1);
}
_ => unreachable!(),
}
let ra = self.alloc_reg();
self.emit_abc(OP_WR, ra, rpath, rjson);
self.emit_abc(OP_WR, ra, rpath, rcontent);
if *unwrap {
let check_reg = self.alloc_reg();
self.emit_abc(OP_ISOK, check_reg, ra, 0);
Expand All @@ -2273,11 +2289,11 @@ impl RegCompiler {
}
return ra;
}
// non-"json" 3-arg wr (csv/tsv or dynamic): no VM
// path yet — surface a clear compile error.
// Dynamic format string: no VM path yet — surface
// a clear compile error pointing at --run-tree.
self.first_error.get_or_insert(
CompileError::UndefinedFunction {
name: "wr (3-arg non-json format not yet supported in VM; use --run-tree)".to_string(),
name: "wr (3-arg with dynamic format not yet supported in VM; use --run-tree)".to_string(),
},
);
return self.alloc_reg();
Expand Down Expand Up @@ -2820,12 +2836,13 @@ fn chunk_is_all_numeric(chunk: &Chunk) -> bool {
match op {
OP_RECNEW | OP_LISTNEW | OP_RECWITH | OP_WRAPOK | OP_WRAPERR | OP_STR | OP_CAT
| OP_SPL | OP_REV | OP_SRT | OP_SRTDESC | OP_SLC | OP_TAKE | OP_DROP | OP_UNQ
| OP_UNIQBY | OP_FRQ | OP_PARTITION | OP_LISTAPPEND | OP_JPAR | OP_JDMP | OP_ENV
| OP_GET | OP_GETH | OP_GETMANY | OP_POST | OP_POSTH | OP_RD | OP_RDL | OP_RDJL
| OP_WR | OP_WRL | OP_MAPNEW | OP_MGET | OP_MSET | OP_MKEYS | OP_MVALS | OP_HD
| OP_AT | OP_LST | OP_TL | OP_FMT2 | OP_RGXSUB | OP_ZIP | OP_ENUMERATE | OP_WINDOW
| OP_FFT | OP_IFFT | OP_RANGE | OP_CHUNKS | OP_CUMSUM | OP_SETUNION | OP_SETINTER
| OP_SETDIFF | OP_TRANSPOSE | OP_MATMUL | OP_INV | OP_SOLVE | OP_DTFMT | OP_DTPARSE => {
| OP_UNIQBY | OP_FRQ | OP_PARTITION | OP_LISTAPPEND | OP_JPAR | OP_JDMP | OP_CSVDMP
| OP_ENV | OP_GET | OP_GETH | OP_GETMANY | OP_POST | OP_POSTH | OP_RD | OP_RDL
| OP_RDJL | OP_WR | OP_WRL | OP_MAPNEW | OP_MGET | OP_MSET | OP_MKEYS | OP_MVALS
| OP_HD | OP_AT | OP_LST | OP_TL | OP_FMT2 | OP_RGXSUB | OP_ZIP | OP_ENUMERATE
| OP_WINDOW | OP_FFT | OP_IFFT | OP_RANGE | OP_CHUNKS | OP_CUMSUM | OP_SETUNION
| OP_SETINTER | OP_SETDIFF | OP_TRANSPOSE | OP_MATMUL | OP_INV | OP_SOLVE
| OP_DTFMT | OP_DTPARSE => {
return false;
}
_ => {}
Expand Down Expand Up @@ -6260,6 +6277,28 @@ impl<'a> VM<'a> {
let json_val = nanval_to_json(v);
reg_set!(a, NanVal::heap_string(json_val.to_string()));
}
OP_CSVDMP => {
// Format the value in R[B] as csv (C==0) or tsv (C==1).
// Re-uses the tree-walker writer so quoting/header rules
// stay identical across engines.
let a = ((inst >> 16) & 0xFF) as usize + base;
let b = ((inst >> 8) & 0xFF) as usize + base;
let c = (inst & 0xFF) as u8;
let sep = if c == 0 { ',' } else { '\t' };
let data = reg!(b).to_value();
let rows = match data {
Value::List(l) => l,
_ => {
vm_err!(VmError::Type("wr csv/tsv: data must be a list of rows"));
}
};
match crate::interpreter::write_csv_tsv(&rows, sep) {
Ok(s) => reg_set!(a, NanVal::heap_string(s)),
Err(_e) => {
vm_err!(VmError::Type("wr csv/tsv: row shape mismatch"));
}
}
}
OP_JPAR => {
let a = ((inst >> 16) & 0xFF) as usize + base;
let b = ((inst >> 8) & 0xFF) as usize + base;
Expand Down Expand Up @@ -24319,17 +24358,12 @@ f>n;r=mk 10 20;+r.x r.y";
}

#[test]
fn vm_compile_wr_3arg_non_json_emits_compile_error() {
// csv/tsv 3-arg form is not yet implemented in the VM compiler;
// it should report a clear UndefinedFunction error.
fn vm_compile_wr_3arg_csv_compiles() {
// csv/tsv 3-arg form now compiles. Pinning the successful path.
let src = r#"f>R t t;wr "/tmp/__ilo_test_wr.csv" [[1,2]] "csv""#;
let prog = parse_for_vm(src);
let err = match crate::vm::compile(&prog) {
Ok(_) => panic!("csv 3-arg should not compile"),
Err(e) => e,
};
let msg = format!("{err:?}");
assert!(msg.contains("3-arg non-json"), "got {msg}");
let compiled = crate::vm::compile(&prog).expect("compile should succeed for csv overload");
assert!(!compiled.chunks.is_empty());
}

// ---- OP_RNDN VM dispatch + jit_rndn helper coverage ----
Expand Down
Loading
Loading