From 58b87cc4bfc685da25d415720e0d04928bfd7774 Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Tue, 12 May 2026 09:40:27 +0100 Subject: [PATCH] builtins: add zip xs ys for pairing two lists Pairing two lists element-wise was a missing primitive. Producing [[x0,y0],[x1,y1],...] forced users to map + at + idx ranges, which is verbose for what is a one-token idiom in every other language. Opcode allocated: zip. Truncates to the shorter input. Works on any list type pair, returning a list of two-element lists. --- examples/zip.ilo | 10 +++ src/builtins.rs | 11 ++- src/interpreter/mod.rs | 26 ++++++ src/verify.rs | 43 ++++++++++ src/vm/compile_cranelift.rs | 13 ++- src/vm/jit_cranelift.rs | 13 ++- src/vm/mod.rs | 72 ++++++++++++++++- tests/regression_zip.rs | 154 ++++++++++++++++++++++++++++++++++++ 8 files changed, 335 insertions(+), 7 deletions(-) create mode 100644 examples/zip.ilo create mode 100644 tests/regression_zip.rs diff --git a/examples/zip.ilo b/examples/zip.ilo new file mode 100644 index 00000000..fba95b33 --- /dev/null +++ b/examples/zip.ilo @@ -0,0 +1,10 @@ +-- zip xs ys: pair-wise combine two lists into a list of [x,y] pairs. +-- Truncates to the shorter of the two inputs (Python convention). + +pairs xs:L n ys:L n>L (L n);zip xs ys +trunc xs:L n ys:L n>L (L n);zip xs ys + +-- run: pairs [1,2,3] [10,20,30] +-- out: [[1, 10], [2, 20], [3, 30]] +-- run: trunc [1,2,3,4] [10,20] +-- out: [[1, 10], [2, 20]] diff --git a/src/builtins.rs b/src/builtins.rs index dd5e72ae..799c00a1 100644 --- a/src/builtins.rs +++ b/src/builtins.rs @@ -46,6 +46,7 @@ pub enum Builtin { Has, Spl, Cat, + Zip, // Higher-order Map, @@ -132,6 +133,7 @@ impl Builtin { "has" => Some(Builtin::Has), "spl" => Some(Builtin::Spl), "cat" => Some(Builtin::Cat), + "zip" => Some(Builtin::Zip), "map" => Some(Builtin::Map), "flt" => Some(Builtin::Flt), "fld" => Some(Builtin::Fld), @@ -205,6 +207,7 @@ impl Builtin { Builtin::Has => "has", Builtin::Spl => "spl", Builtin::Cat => "cat", + Builtin::Zip => "zip", Builtin::Map => "map", Builtin::Flt => "flt", Builtin::Fld => "fld", @@ -253,10 +256,10 @@ mod tests { let all = [ "str", "num", "abs", "flr", "cel", "rou", "min", "max", "mod", "pow", "sqrt", "log", "exp", "sin", "cos", "tan", "log10", "log2", "atan2", "sum", "avg", "len", "hd", "at", - "tl", "rev", "srt", "rsrt", "slc", "lst", "unq", "flat", "has", "spl", "cat", "map", - "flt", "fld", "grp", "rnd", "now", "rd", "rdl", "rdb", "wr", "wrl", "prnt", "env", - "trm", "fmt", "fmt2", "rgx", "rgxsub", "jpth", "jdmp", "jpar", "get", "post", "mmap", - "mget", "mset", "mhas", "mkeys", "mvals", "mdel", + "tl", "rev", "srt", "rsrt", "slc", "lst", "unq", "flat", "has", "spl", "cat", "zip", + "map", "flt", "fld", "grp", "rnd", "now", "rd", "rdl", "rdb", "wr", "wrl", "prnt", + "env", "trm", "fmt", "fmt2", "rgx", "rgxsub", "jpth", "jdmp", "jpar", "get", "post", + "mmap", "mget", "mset", "mhas", "mkeys", "mvals", "mdel", ]; for name in &all { let b = Builtin::from_name(name).unwrap_or_else(|| panic!("missing builtin: {name}")); diff --git a/src/interpreter/mod.rs b/src/interpreter/mod.rs index b58f2e63..b1097ad6 100644 --- a/src/interpreter/mod.rs +++ b/src/interpreter/mod.rs @@ -775,6 +775,32 @@ fn call_function(env: &mut Env, name: &str, args: Vec) -> Result { )), }; } + if builtin == Some(Builtin::Zip) && args.len() == 2 { + let xs = match &args[0] { + Value::List(items) => items, + other => { + return Err(RuntimeError::new( + "ILO-R009", + format!("zip arg 1 requires a list, got {:?}", other), + )); + } + }; + let ys = match &args[1] { + Value::List(items) => items, + other => { + return Err(RuntimeError::new( + "ILO-R009", + format!("zip arg 2 requires a list, got {:?}", other), + )); + } + }; + let n = xs.len().min(ys.len()); + let mut out = Vec::with_capacity(n); + for i in 0..n { + out.push(Value::List(vec![xs[i].clone(), ys[i].clone()])); + } + return Ok(Value::List(out)); + } if builtin == Some(Builtin::Tl) && args.len() == 1 { return match &args[0] { Value::List(items) => { diff --git a/src/verify.rs b/src/verify.rs index 4122f5ae..5e9f7242 100644 --- a/src/verify.rs +++ b/src/verify.rs @@ -281,6 +281,7 @@ const BUILTINS: &[(&str, &[&str], &str)] = &[ ("trm", &["t"], "t"), ("spl", &["t", "t"], "L t"), ("cat", &["L t", "t"], "t"), + ("zip", &["list", "list"], "list"), ("has", &["list_or_text", "any"], "b"), ("hd", &["list_or_text"], "any"), ("at", &["list_or_text", "n"], "any"), @@ -617,6 +618,48 @@ fn builtin_check_args( } (Ty::Unknown, errors) } + "zip" => { + // zip xs ys — returns a list of 2-element pairs [[x,y],...]. + // Truncates to the shorter list. Inner element type is the unification + // (or fallback to Unknown) of the two list element types. + let elem_a = match arg_types.first() { + Some(Ty::List(inner)) => Some((**inner).clone()), + Some(Ty::Unknown) | None => None, + Some(other) => { + errors.push(VerifyError { + code: "ILO-T013", + function: func_ctx.to_string(), + message: format!("'zip' arg 1 expects a list, got {other}"), + hint: None, + span, + is_warning: false, + }); + None + } + }; + let elem_b = match arg_types.get(1) { + Some(Ty::List(inner)) => Some((**inner).clone()), + Some(Ty::Unknown) | None => None, + Some(other) => { + errors.push(VerifyError { + code: "ILO-T013", + function: func_ctx.to_string(), + message: format!("'zip' arg 2 expects a list, got {other}"), + hint: None, + span, + is_warning: false, + }); + None + } + }; + let inner = match (elem_a, elem_b) { + (Some(a), Some(b)) if compatible(&a, &b) => a, + (Some(a), None) => a, + (None, Some(b)) => b, + _ => Ty::Unknown, + }; + (Ty::List(Box::new(Ty::List(Box::new(inner)))), errors) + } "tl" => { if let Some(arg) = arg_types.first() { match arg { diff --git a/src/vm/compile_cranelift.rs b/src/vm/compile_cranelift.rs index 2683a19e..1e65eb4a 100644 --- a/src/vm/compile_cranelift.rs +++ b/src/vm/compile_cranelift.rs @@ -74,6 +74,7 @@ struct HelperFuncs { hd: FuncId, at: FuncId, fmt2: FuncId, + zip: FuncId, tl: FuncId, rev: FuncId, srt: FuncId, @@ -199,6 +200,7 @@ fn declare_all_helpers(module: &mut ObjectModule) -> HelperFuncs { hd: declare_helper(module, "jit_hd", 1, 1), at: declare_helper(module, "jit_at", 2, 1), fmt2: declare_helper(module, "jit_fmt2", 2, 1), + zip: declare_helper(module, "jit_zip", 2, 1), tl: declare_helper(module, "jit_tl", 1, 1), rev: declare_helper(module, "jit_rev", 1, 1), srt: declare_helper(module, "jit_srt", 1, 1), @@ -924,7 +926,8 @@ fn compile_function_body( | OP_SPL | OP_CAT | OP_GET | OP_POST | OP_GETH | OP_POSTH | OP_ENV | OP_JPTH | OP_JDMP | OP_JPAR | OP_MAPNEW | OP_MGET | OP_MSET | OP_MDEL | OP_MKEYS | OP_MVALS | OP_LISTNEW | OP_LISTAPPEND | OP_RECNEW | OP_RECWITH | OP_PRT - | OP_RD | OP_RDL | OP_WR | OP_WRL | OP_TRM | OP_UNQ | OP_NUM | OP_RGXSUB => { + | OP_RD | OP_RDL | OP_WR | OP_WRL | OP_TRM | OP_UNQ | OP_NUM | OP_RGXSUB + | OP_ZIP => { non_num_write[a] = true; non_bool_write[a] = true; } @@ -1907,6 +1910,14 @@ fn compile_function_body( let result = builder.inst_results(call_inst)[0]; builder.def_var(vars[a_idx], result); } + OP_ZIP => { + let bv = builder.use_var(vars[b_idx]); + let cv = builder.use_var(vars[c_idx]); + let fref = get_func_ref(&mut builder, module, helpers.zip); + let call_inst = builder.ins().call(fref, &[bv, cv]); + let result = builder.inst_results(call_inst)[0]; + builder.def_var(vars[a_idx], result); + } OP_TL => { let bv = builder.use_var(vars[b_idx]); let fref = get_func_ref(&mut builder, module, helpers.tl); diff --git a/src/vm/jit_cranelift.rs b/src/vm/jit_cranelift.rs index 57b031f8..b490cf0a 100644 --- a/src/vm/jit_cranelift.rs +++ b/src/vm/jit_cranelift.rs @@ -80,6 +80,7 @@ struct HelperFuncs { hd: FuncId, at: FuncId, fmt2: FuncId, + zip: FuncId, tl: FuncId, rev: FuncId, srt: FuncId, @@ -195,6 +196,7 @@ fn register_helpers(builder: &mut JITBuilder) { ("jit_hd", jit_hd as *const u8), ("jit_at", jit_at as *const u8), ("jit_fmt2", jit_fmt2 as *const u8), + ("jit_zip", jit_zip as *const u8), ("jit_tl", jit_tl as *const u8), ("jit_rev", jit_rev as *const u8), ("jit_srt", jit_srt as *const u8), @@ -301,6 +303,7 @@ fn declare_all_helpers(module: &mut JITModule) -> HelperFuncs { hd: declare_helper(module, "jit_hd", 1, 1), at: declare_helper(module, "jit_at", 2, 1), fmt2: declare_helper(module, "jit_fmt2", 2, 1), + zip: declare_helper(module, "jit_zip", 2, 1), tl: declare_helper(module, "jit_tl", 1, 1), rev: declare_helper(module, "jit_rev", 1, 1), srt: declare_helper(module, "jit_srt", 1, 1), @@ -905,7 +908,7 @@ fn compile_function_body( | OP_WRAPOK | OP_WRAPERR | OP_UNWRAP | OP_RECFLD | OP_RECFLD_NAME | OP_LISTGET | OP_INDEX | OP_STR | OP_HD | OP_AT | OP_FMT2 | OP_TL | OP_REV | OP_SRT | OP_SRTDESC - | OP_SLC | OP_LST + | OP_SLC | OP_LST | OP_ZIP | OP_SPL | OP_CAT | OP_GET | OP_POST | OP_GETH | OP_POSTH | OP_ENV | OP_JPTH | OP_JDMP | OP_JPAR | OP_MAPNEW | OP_MGET | OP_MSET | OP_MDEL | OP_MKEYS | OP_MVALS @@ -1956,6 +1959,14 @@ fn compile_function_body( let result = builder.inst_results(call_inst)[0]; builder.def_var(vars[a_idx], result); } + OP_ZIP => { + let bv = builder.use_var(vars[b_idx]); + let cv = builder.use_var(vars[c_idx]); + let fref = get_func_ref(&mut builder, module, helpers.zip); + let call_inst = builder.ins().call(fref, &[bv, cv]); + let result = builder.inst_results(call_inst)[0]; + builder.def_var(vars[a_idx], result); + } OP_TL => { let bv = builder.use_var(vars[b_idx]); let fref = get_func_ref(&mut builder, module, helpers.tl); diff --git a/src/vm/mod.rs b/src/vm/mod.rs index e5423861..c2e83829 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -195,6 +195,7 @@ pub(crate) const OP_LOG2: u8 = 107; // R[A] = log2(R[B]) pub(crate) const OP_ATAN2: u8 = 108; // R[A] = atan2(R[B], R[C]) (y first, x second) pub(crate) const OP_SRTDESC: u8 = 117; // R[A] = rsrt(R[B]) (descending sort of list) pub(crate) const OP_RGXSUB: u8 = 115; // R[A] = rgxsub(R[B], R[C], R[D]) (pattern, replacement, subject; D in data word A field) +pub(crate) const OP_ZIP: u8 = 111; // R[A] = zip(R[B], R[C]) (list of [x,y] pairs, truncated) // ABC mode — text formatting pub(crate) const OP_FMT2: u8 = 104; // R[A] = fmt2(R[B], R[C]) (format number to N decimal places → t) @@ -1688,6 +1689,13 @@ impl RegCompiler { self.emit_abc(OP_FMT2, ra, rb, rc); return ra; } + (Builtin::Zip, 2) => { + let rb = self.compile_expr(&args[0]); + let rc = self.compile_expr(&args[1]); + let ra = self.alloc_reg(); + self.emit_abc(OP_ZIP, ra, rb, rc); + return ra; + } (Builtin::Tl, 1) => { let rb = self.compile_expr(&args[0]); let ra = self.alloc_reg(); @@ -2506,7 +2514,7 @@ fn chunk_is_all_numeric(chunk: &Chunk) -> bool { | OP_SPL | OP_REV | OP_SRT | OP_SRTDESC | OP_SLC | OP_UNQ | OP_LISTAPPEND | OP_JPAR | OP_JDMP | OP_ENV | OP_GET | OP_GETH | OP_POST | OP_POSTH | OP_RD | OP_RDL | OP_WR | OP_WRL | OP_MAPNEW | OP_MGET | OP_MSET | OP_MKEYS | OP_MVALS | OP_HD | OP_AT - | OP_LST | OP_TL | OP_FMT2 | OP_RGXSUB => { + | OP_LST | OP_TL | OP_FMT2 | OP_RGXSUB | OP_ZIP => { return false; } _ => {} @@ -5651,6 +5659,40 @@ impl<'a> VM<'a> { }; reg_set!(a, result); } + OP_ZIP => { + let a = ((inst >> 16) & 0xFF) as usize + base; + let b = ((inst >> 8) & 0xFF) as usize + base; + let c = (inst & 0xFF) as usize + base; + let vb = reg!(b); + let vc = reg!(c); + let xs = if vb.is_heap() { + match unsafe { vb.as_heap_ref() } { + HeapObj::List(items) => items, + _ => vm_err!(VmError::Type("zip arg 1 requires a list")), + } + } else { + vm_err!(VmError::Type("zip arg 1 requires a list")); + }; + let ys = if vc.is_heap() { + match unsafe { vc.as_heap_ref() } { + HeapObj::List(items) => items, + _ => vm_err!(VmError::Type("zip arg 2 requires a list")), + } + } else { + vm_err!(VmError::Type("zip arg 2 requires a list")); + }; + let n = xs.len().min(ys.len()); + let mut out: Vec = Vec::with_capacity(n); + for i in 0..n { + let x = xs[i]; + let y = ys[i]; + x.clone_rc(); + y.clone_rc(); + let pair = NanVal::heap_list(vec![x, y]); + out.push(pair); + } + reg_set!(a, NanVal::heap_list(out)); + } OP_TL => { let a = ((inst >> 16) & 0xFF) as usize + base; let b = ((inst >> 8) & 0xFF) as usize + base; @@ -7143,6 +7185,34 @@ pub(crate) extern "C" fn jit_lst(list: u64, idx: u64, val: u64) -> u64 { TAG_NIL } +#[cfg(feature = "cranelift")] +#[unsafe(no_mangle)] +pub(crate) extern "C" fn jit_zip(a: u64, b: u64) -> u64 { + let va = NanVal(a); + let vb = NanVal(b); + if !va.is_heap() || !vb.is_heap() { + return TAG_NIL; + } + let xs = match unsafe { va.as_heap_ref() } { + HeapObj::List(items) => items, + _ => return TAG_NIL, + }; + let ys = match unsafe { vb.as_heap_ref() } { + HeapObj::List(items) => items, + _ => return TAG_NIL, + }; + let n = xs.len().min(ys.len()); + let mut out: Vec = Vec::with_capacity(n); + for i in 0..n { + let x = xs[i]; + let y = ys[i]; + x.clone_rc(); + y.clone_rc(); + out.push(NanVal::heap_list(vec![x, y])); + } + NanVal::heap_list(out).0 +} + #[cfg(feature = "cranelift")] #[unsafe(no_mangle)] pub(crate) extern "C" fn jit_tl(a: u64) -> u64 { diff --git a/tests/regression_zip.rs b/tests/regression_zip.rs new file mode 100644 index 00000000..b6f0a409 --- /dev/null +++ b/tests/regression_zip.rs @@ -0,0 +1,154 @@ +// Regression tests for the `zip xs ys` builtin — pair-wise combine two lists. +// +// Returns L (L a) of 2-element pairs. Truncates to the shorter input +// (Python convention). Verified across tree, vm, and cranelift engines. + +use std::process::Command; + +fn ilo() -> Command { + Command::new(env!("CARGO_BIN_EXE_ilo")) +} + +fn run(engine: &str, src: &str, entry: &str) -> String { + let out = ilo() + .args([src, engine, entry]) + .output() + .expect("failed to run ilo"); + assert!( + out.status.success(), + "ilo {engine} failed for `{src}`: stderr={}", + String::from_utf8_lossy(&out.stderr) + ); + String::from_utf8_lossy(&out.stdout).trim().to_string() +} + +// Basic: zip two same-length number lists. +const BASIC_SRC: &str = "f>L (L n);zip [1,2,3] [10,20,30]"; + +fn check_basic(engine: &str) { + assert_eq!( + run(engine, BASIC_SRC, "f"), + "[[1, 10], [2, 20], [3, 30]]", + "engine={engine}" + ); +} + +#[test] +fn zip_basic_tree() { + check_basic("--run-tree"); +} + +#[test] +fn zip_basic_vm() { + check_basic("--run-vm"); +} + +#[test] +#[cfg(feature = "cranelift")] +fn zip_basic_cranelift() { + check_basic("--run-cranelift"); +} + +// Truncate to shorter (xs longer than ys). +const TRUNC_LONG_XS_SRC: &str = "f>L (L n);zip [1,2,3,4] [10,20]"; + +fn check_trunc_long_xs(engine: &str) { + assert_eq!( + run(engine, TRUNC_LONG_XS_SRC, "f"), + "[[1, 10], [2, 20]]", + "engine={engine}" + ); +} + +#[test] +fn zip_trunc_long_xs_tree() { + check_trunc_long_xs("--run-tree"); +} + +#[test] +fn zip_trunc_long_xs_vm() { + check_trunc_long_xs("--run-vm"); +} + +#[test] +#[cfg(feature = "cranelift")] +fn zip_trunc_long_xs_cranelift() { + check_trunc_long_xs("--run-cranelift"); +} + +// Truncate to shorter (ys longer than xs). +const TRUNC_LONG_YS_SRC: &str = "f>L (L n);zip [1,2] [10,20,30,40]"; + +fn check_trunc_long_ys(engine: &str) { + assert_eq!( + run(engine, TRUNC_LONG_YS_SRC, "f"), + "[[1, 10], [2, 20]]", + "engine={engine}" + ); +} + +#[test] +fn zip_trunc_long_ys_tree() { + check_trunc_long_ys("--run-tree"); +} + +#[test] +fn zip_trunc_long_ys_vm() { + check_trunc_long_ys("--run-vm"); +} + +#[test] +#[cfg(feature = "cranelift")] +fn zip_trunc_long_ys_cranelift() { + check_trunc_long_ys("--run-cranelift"); +} + +// Empty list: either side empty yields empty. +const EMPTY_SRC: &str = "f>L (L n);zip [] [1,2,3]"; + +fn check_empty(engine: &str) { + assert_eq!(run(engine, EMPTY_SRC, "f"), "[]", "engine={engine}"); +} + +#[test] +fn zip_empty_tree() { + check_empty("--run-tree"); +} + +#[test] +fn zip_empty_vm() { + check_empty("--run-vm"); +} + +#[test] +#[cfg(feature = "cranelift")] +fn zip_empty_cranelift() { + check_empty("--run-cranelift"); +} + +// Mixed types via type variable: zip text with numbers. +const MIXED_SRC: &str = "f>L (L a);zip [\"a\",\"b\"] [1,2]"; + +fn check_mixed(engine: &str) { + assert_eq!( + run(engine, MIXED_SRC, "f"), + "[[a, 1], [b, 2]]", + "engine={engine}" + ); +} + +#[test] +fn zip_mixed_tree() { + check_mixed("--run-tree"); +} + +#[test] +fn zip_mixed_vm() { + check_mixed("--run-vm"); +} + +#[test] +#[cfg(feature = "cranelift")] +fn zip_mixed_cranelift() { + check_mixed("--run-cranelift"); +}