From 09c1e5ec4f67850d23512215c71f3fe9030bcca3 Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Tue, 19 May 2026 01:03:30 +0100 Subject: [PATCH 1/8] aot: add postcard CompiledProgram wire format Engine audit PR #413 found AOT silently returns nil for HOF / closure dispatch (map over a lambda, fld, grp, uniqby, fn-ref return). Root cause: AOT never publishes ACTIVE_PROGRAM, so jit_call_dyn and jit_call_builtin_tree hit their null-program guards and return TAG_NIL for every user-fn callback. To fix, the AOT binary needs a CompiledProgram at runtime. Add a postcard wire format (chunks + func_names + is_tool + type_registry + ast) gated by schema_version. Chunk constants encode only the variants the compiler emits today (Nil / Number / Text / Bool / List); a future variant trips the From<&Value> guard in the test suite before any binary ships. The AST is serialised as JSON inside the postcard envelope because the Program::serialize_decls custom impl uses serialize_seq(None) which postcard rejects with "The length of a sequence must be known". serde_json handles unsized seqs and the AST already serialises cleanly via JSON for --ast. Round-trip unit tests cover the empty program, a map-lambda program, an fld user-fn program, type-registry preservation, and schema-version mismatch detection. --- Cargo.lock | 34 +++++ Cargo.toml | 3 +- src/vm/aot_blob.rs | 313 +++++++++++++++++++++++++++++++++++++++++++++ src/vm/mod.rs | 2 + 4 files changed, 351 insertions(+), 1 deletion(-) create mode 100644 src/vm/aot_blob.rs diff --git a/Cargo.lock b/Cargo.lock index ce72e87c..c4974f25 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -216,6 +216,15 @@ version = "1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" +[[package]] +name = "cobs" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa961b519f0b462e3a3b4a34b64d119eeaca1d59af726fe450bbba07a9fc0a1" +dependencies = [ + "thiserror 2.0.18", +] + [[package]] name = "colorchoice" version = "1.0.4" @@ -419,6 +428,18 @@ version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" +[[package]] +name = "embedded-io" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef1a6892d9eef45c8fa6b9e0086428a2cca8491aca8f787c534a3d6d0bcb3ced" + +[[package]] +name = "embedded-io" +version = "0.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "edd0f118536f44f5ccd48bcb8b111bdc3de888b58c74639dfb034a357d0f206d" + [[package]] name = "equivalent" version = "1.0.2" @@ -903,6 +924,7 @@ dependencies = [ "libc", "logos", "minreq", + "postcard", "regex", "reqwest", "serde", @@ -1167,6 +1189,18 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" +[[package]] +name = "postcard" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6764c3b5dd454e283a30e6dfe78e9b31096d9e32036b5d1eaac7a6119ccb9a24" +dependencies = [ + "cobs", + "embedded-io 0.4.0", + "embedded-io 0.6.1", + "serde", +] + [[package]] name = "potential_utf" version = "0.1.4" diff --git a/Cargo.toml b/Cargo.toml index 99b98d3d..ec3dd3fd 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -19,7 +19,7 @@ crate-type = ["staticlib", "rlib"] [features] default = ["cranelift", "http"] http = ["dep:minreq"] -cranelift = ["dep:cranelift-codegen", "dep:cranelift-frontend", "dep:cranelift-jit", "dep:cranelift-module", "dep:cranelift-native", "dep:cranelift-object", "dep:target-lexicon"] +cranelift = ["dep:cranelift-codegen", "dep:cranelift-frontend", "dep:cranelift-jit", "dep:cranelift-module", "dep:cranelift-native", "dep:cranelift-object", "dep:target-lexicon", "dep:postcard"] llvm = ["dep:inkwell"] tools = ["dep:tokio", "dep:reqwest"] @@ -36,6 +36,7 @@ cranelift-module = { version = "0.116", optional = true } cranelift-native = { version = "0.116", optional = true } cranelift-object = { version = "0.116", optional = true } target-lexicon = { version = "0.12", optional = true } +postcard = { version = "1.0", features = ["alloc"], default-features = false, optional = true } inkwell = { version = "0.5", features = ["llvm18-0"], optional = true } minreq = { version = "2.14", default-features = false, features = ["https-rustls"], optional = true } tokio = { version = "1", features = ["rt", "macros", "process", "io-util", "sync"], optional = true } diff --git a/src/vm/aot_blob.rs b/src/vm/aot_blob.rs new file mode 100644 index 00000000..4312665b --- /dev/null +++ b/src/vm/aot_blob.rs @@ -0,0 +1,313 @@ +//! Serialised `CompiledProgram` blob embedded into AOT binaries. +//! +//! AOT-compiled binaries need the full `CompiledProgram` at runtime so the +//! Cranelift HOF / closure dispatch helpers (`jit_call_dyn`, `jit_call_builtin_tree`) +//! can re-enter the VM on user-fn callbacks and resolve FnRef names. Before this +//! module existed, the AOT runtime had no `CompiledProgram` published in +//! `ACTIVE_PROGRAM` / `ACTIVE_AST_PROGRAM`, so every HOF callback hit the null-program +//! guard and silently returned `TAG_NIL` — manifesting as `[nil, nil, ...]` for +//! `map (lambda) xs`, `nil` for `fld add xs 0`, `nil` for `grp/uniqby`, and so on +//! (engine audit PR #413 gap #1). +//! +//! The fix: serialise the `CompiledProgram` with `postcard`, embed the byte blob +//! in a `.rodata` section of the AOT object file, and at `main` entry call +//! `ilo_aot_publish_program(ptr, len)` to deserialise + leak a static +//! `CompiledProgram` and publish its pointers into the `with_active_registry` +//! TLS slots for the lifetime of the process. The chunk constant pool is narrow +//! at compile time (`Number` / `Text` / `Bool` / `Nil`, plus `List` from the +//! record-`with` lowering) so the wire format keeps the constant variant set +//! tight rather than serialising the full `Value` enum. The `From<&Value>` +//! impl on `WireConst` panics with a clear message if a future compiler change +//! starts emitting a new variant — the existing AOT coverage suite catches it +//! before any binary ships. +//! +//! `schema_version` is the first field so a future change to the wire format +//! can be detected and rejected with a clean error rather than a silent +//! mis-deserialisation. + +use serde::{Deserialize, Serialize}; + +use super::{Chunk, CompiledProgram, TypeRegistry}; +use crate::ast::{Program, Span}; +use crate::interpreter::Value; +use std::sync::Arc; + +/// Bump whenever the on-disk shape changes in a way old runtimes cannot read. +/// The runtime rejects blobs with a `schema_version` it does not recognise. +pub const BLOB_SCHEMA_VERSION: u32 = 1; + +#[derive(Debug, Serialize, Deserialize)] +pub struct ProgramBlob { + pub schema_version: u32, + pub chunks: Vec, + pub func_names: Vec, + pub is_tool: Vec, + /// Tuples of `(name, fields, num_fields_bitmask)` — same shape as + /// `TypeRegistry::register` consumes. + pub type_registry_entries: Vec<(String, Vec, u64)>, + /// AST is serialised as JSON inside the postcard envelope. Two reasons: + /// (1) `Program::serialize_decls` is a custom `serialize_seq(None)` impl + /// that postcard's no-len-prefix encoding rejects with "The length + /// of a sequence must be known". serde_json doesn't require sized + /// sequences, and the AST already serialises cleanly via JSON for + /// the `--ast` flag. + /// (2) The AST is only consumed by the tree-bridge in `call_builtin_for_bridge_with_program`, + /// which is rare — most HOF dispatch goes through OP_CALL_DYN's + /// VM re-entry which only needs `chunks` + `func_names`. Paying a + /// small encoding-mismatch tax to keep the AST surface intact + /// beats inventing a wire AST. + pub ast_json: String, +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct WireChunk { + pub code: Vec, + pub constants: Vec, + pub param_count: u8, + pub reg_count: u8, + pub spans: Vec<(u32, u32)>, + pub all_regs_numeric: bool, +} + +/// Constant pool entry. The `RegCompiler` emits `Nil` / `Number` / `Text` / +/// `Bool` from literals + match patterns + constant folding, plus `List` from +/// the record-`with` lowering (see `RegCompiler` lines ~5380-5394). If a future +/// compiler change adds a new constant variant the `From<&Value>` impl below +/// will panic with a clear message — caught by the existing test suite before +/// any binary ships. +#[derive(Debug, Serialize, Deserialize)] +pub enum WireConst { + Nil, + Number(f64), + Text(String), + Bool(bool), + /// `Value::List` constant. Record-`with` lowering inlines either a list of + /// numeric field indices or a list of field-name strings as a single + /// constant; both cases use only the four scalar variants above so we + /// reuse `WireConst` recursively. + List(Vec), +} + +impl From<&Value> for WireConst { + fn from(v: &Value) -> Self { + match v { + Value::Nil => WireConst::Nil, + Value::Number(n) => WireConst::Number(*n), + Value::Text(s) => WireConst::Text(s.as_ref().clone()), + Value::Bool(b) => WireConst::Bool(*b), + Value::List(items) => WireConst::List(items.iter().map(WireConst::from).collect()), + other => panic!( + "aot_blob: unexpected chunk constant variant {:?} — only Nil/Number/Text/Bool/List are emitted by RegCompiler today; add a wire variant before lifting this", + std::mem::discriminant(other) + ), + } + } +} + +impl From for Value { + fn from(c: WireConst) -> Self { + match c { + WireConst::Nil => Value::Nil, + WireConst::Number(n) => Value::Number(n), + WireConst::Text(s) => Value::Text(Arc::new(s)), + WireConst::Bool(b) => Value::Bool(b), + WireConst::List(items) => { + Value::List(Arc::new(items.into_iter().map(Value::from).collect())) + } + } + } +} + +impl WireChunk { + pub fn from_chunk(chunk: &Chunk) -> Self { + WireChunk { + code: chunk.code.clone(), + constants: chunk.constants.iter().map(WireConst::from).collect(), + param_count: chunk.param_count, + reg_count: chunk.reg_count, + spans: chunk + .spans + .iter() + .map(|s| (s.start as u32, s.end as u32)) + .collect(), + all_regs_numeric: chunk.all_regs_numeric, + } + } + + pub fn into_chunk(self) -> Chunk { + Chunk { + code: self.code, + constants: self.constants.into_iter().map(Value::from).collect(), + param_count: self.param_count, + reg_count: self.reg_count, + spans: self + .spans + .into_iter() + .map(|(s, e)| Span { + start: s as usize, + end: e as usize, + }) + .collect(), + all_regs_numeric: self.all_regs_numeric, + } + } +} + +/// Serialise a `CompiledProgram` for embedding in an AOT binary. Errors +/// surface to the AOT compile path so a corrupt program never silently +/// ships. +pub fn serialize_program(program: &CompiledProgram) -> Result, String> { + let chunks: Vec = program.chunks.iter().map(WireChunk::from_chunk).collect(); + let type_registry_entries: Vec<(String, Vec, u64)> = program + .type_registry + .types + .iter() + .map(|ti| (ti.name.clone(), ti.fields.clone(), ti.num_fields)) + .collect(); + let ast_json = match &program.ast { + Some(ast) => serde_json::to_string(ast.as_ref()) + .map_err(|e| format!("serde_json serialize ast: {}", e))?, + None => "{\"declarations\":[]}".to_string(), + }; + let blob = ProgramBlob { + schema_version: BLOB_SCHEMA_VERSION, + chunks, + func_names: program.func_names.clone(), + is_tool: program.is_tool.clone(), + type_registry_entries, + ast_json, + }; + postcard::to_allocvec(&blob).map_err(|e| format!("postcard serialize: {}", e)) +} + +/// Deserialise a blob into a fully-formed `CompiledProgram` ready to be +/// published by `with_active_registry`. The caller is responsible for +/// keeping the returned program alive for the lifetime of any code that +/// reads `ACTIVE_PROGRAM` (the AOT runtime leaks it for the process). +pub fn deserialize_program(bytes: &[u8]) -> Result { + let blob: ProgramBlob = + postcard::from_bytes(bytes).map_err(|e| format!("postcard deserialize: {}", e))?; + if blob.schema_version != BLOB_SCHEMA_VERSION { + return Err(format!( + "AOT program blob schema_version mismatch: binary embeds v{} but this runtime expects v{}. Recompile with this ilo version.", + blob.schema_version, BLOB_SCHEMA_VERSION + )); + } + let chunks: Vec = blob.chunks.into_iter().map(WireChunk::into_chunk).collect(); + let nan_constants: Vec> = chunks + .iter() + .map(|c| c.constants.iter().map(super::NanVal::from_value).collect()) + .collect(); + let mut type_registry = TypeRegistry::default(); + for (name, fields, num_fields) in blob.type_registry_entries { + type_registry.register(name, fields, num_fields); + } + let ast: Program = serde_json::from_str(&blob.ast_json) + .map_err(|e| format!("serde_json deserialize ast: {}", e))?; + Ok(CompiledProgram { + chunks, + func_names: blob.func_names, + nan_constants, + type_registry, + is_tool: blob.is_tool, + ast: Some(Arc::new(ast)), + }) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::vm::compile; + + fn roundtrip(src: &str) -> CompiledProgram { + let tokens = crate::lexer::lex(src).expect("lex"); + let token_spans: Vec<_> = tokens + .into_iter() + .map(|(t, r)| { + ( + t, + crate::ast::Span { + start: r.start, + end: r.end, + }, + ) + }) + .collect(); + let (program, errors) = crate::parser::parse(token_spans); + assert!(errors.is_empty(), "parse errors: {:?}", errors); + let compiled = compile(&program).expect("compile"); + let bytes = serialize_program(&compiled).expect("serialize"); + deserialize_program(&bytes).expect("deserialize") + } + + #[test] + fn empty_program_roundtrips() { + let r = roundtrip("main>n;42"); + assert_eq!(r.func_names, vec!["main".to_string()]); + assert_eq!(r.chunks.len(), 1); + assert!(r.ast.is_some()); + } + + #[test] + fn schema_version_mismatch_is_rejected() { + let blob = ProgramBlob { + schema_version: 999, + chunks: vec![], + func_names: vec![], + is_tool: vec![], + type_registry_entries: vec![], + ast_json: "{\"declarations\":[]}".to_string(), + }; + let bytes = postcard::to_allocvec(&blob).unwrap(); + let err = match deserialize_program(&bytes) { + Ok(_) => panic!("expected schema mismatch error, got Ok"), + Err(e) => e, + }; + assert!(err.contains("schema_version mismatch"), "got: {}", err); + } + + #[test] + fn map_lambda_program_roundtrips() { + let r = roundtrip("main>L n;map (x:n>n;*x 2) [1,2,3]"); + let nv: Vec<&str> = r.func_names.iter().map(|s| s.as_str()).collect(); + assert!(nv.contains(&"main")); + assert!(nv.iter().any(|n| n.starts_with("__lit_"))); + } + + #[test] + fn fld_program_roundtrips() { + let r = roundtrip("add a:n b:n>n;+a b\nmain>n;fld add [1,2,3,4] 0"); + assert!(r.func_names.contains(&"add".to_string())); + assert!(r.func_names.contains(&"main".to_string())); + } + + #[test] + fn type_registry_roundtrips() { + // Build a CompiledProgram with a populated type registry by hand and + // exercise the serialise/deserialise path directly. The surface + // syntax for record construction is finicky enough that doing it + // through the full parser distracts from what we are checking here: + // type registry entries make it through the blob round-trip. + let mut tr = TypeRegistry::default(); + tr.register( + "point".to_string(), + vec!["x".to_string(), "y".to_string()], + 0b11, // both fields numeric + ); + let prog = CompiledProgram { + chunks: vec![], + func_names: vec![], + nan_constants: vec![], + type_registry: tr, + is_tool: vec![], + ast: None, + }; + let bytes = serialize_program(&prog).expect("serialize"); + let r = deserialize_program(&bytes).expect("deserialize"); + assert!(r.type_registry.name_to_id.contains_key("point")); + let id = r.type_registry.name_to_id["point"]; + let info = &r.type_registry.types[id as usize]; + assert_eq!(info.fields, vec!["x".to_string(), "y".to_string()]); + assert_eq!(info.num_fields, 0b11); + } +} diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 42f8ba96..c001edb5 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -101,6 +101,8 @@ pub enum CompileError { }, } +#[cfg(feature = "cranelift")] +pub mod aot_blob; #[cfg(feature = "cranelift")] pub mod compile_cranelift; #[cfg(feature = "cranelift")] From 29eccffbba692407e5a388bc92ebf98cc172d837 Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Tue, 19 May 2026 01:03:42 +0100 Subject: [PATCH 2/8] aot: ilo_aot_publish_program runtime helper New C ABI helper takes the embedded blob, deserialises via aot_blob, and publishes the program into ACTIVE_PROGRAM / ACTIVE_AST_PROGRAM / ACTIVE_FUNC_NAMES / ACTIVE_REGISTRY. Mirrors what with_active_registry does for the in-process JIT entry path, but stays for the process lifetime since AOT has no smaller scope. On a malformed blob (schema mismatch or postcard parse failure) we write a JSON diagnostic to stderr and exit 1. No silent fallback. ilo_aot_fini now also clears the three new pointers alongside the existing registry clear so the test suite sees clean state between AOT runs in the same process. --- src/vm/mod.rs | 51 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/src/vm/mod.rs b/src/vm/mod.rs index c001edb5..eea81ca5 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -18513,10 +18513,61 @@ pub extern "C" fn ilo_aot_arena_reset() { jit_arena_reset(); } +/// Deserialise an embedded `CompiledProgram` blob and publish its pointers +/// into the `ACTIVE_PROGRAM`, `ACTIVE_AST_PROGRAM`, `ACTIVE_FUNC_NAMES`, and +/// `ACTIVE_REGISTRY` TLS slots so HOF / closure dispatch helpers +/// (`jit_call_dyn`, `jit_call_builtin_tree`) can re-enter the VM and resolve +/// user-fn callbacks. Without this, AOT binaries silently returned `TAG_NIL` +/// for every program that emitted OP_CALL_DYN — see `aot_blob` module docs and +/// engine audit PR #413 gap #1. +/// +/// The program is leaked (`Box::leak`) for the process lifetime to match how +/// the JIT publishes `&CompiledProgram` for the duration of its `compile_and_call` +/// scope. AOT has no scope smaller than the process, so leaking is the +/// honest representation. On a malformed blob (schema mismatch or postcard +/// parse failure) we write a JSON diagnostic to stderr and exit 1 — no +/// silent fallback. +/// +/// Returns 0 on success, 1 on a malformed blob. The caller (the cranelift- +/// emitted `main`) ignores the return value because the helper has already +/// exited on failure; the signature is kept for forward compatibility with a +/// future "AOT diagnostic recovery" path. +/// +/// SAFETY: `ptr` must point to `len` readable bytes for the duration of this +/// call. The cranelift codegen emits the blob into a `.rodata` data section +/// via `create_data_section`, which the linker maps read-only; the pointer +/// is valid for the entire process lifetime. +#[cfg(feature = "cranelift")] +#[unsafe(no_mangle)] +pub extern "C" fn ilo_aot_publish_program(ptr: u64, len: u64) -> u64 { + let bytes = unsafe { std::slice::from_raw_parts(ptr as *const u8, len as usize) }; + let program = match aot_blob::deserialize_program(bytes) { + Ok(p) => p, + Err(e) => { + eprintln!( + "{{\"severity\":\"error\",\"code\":\"ILO-R013\",\"message\":\"AOT program blob load failed: {}\"}}", + e.replace('"', "\\\"") + ); + std::process::exit(1); + } + }; + let leaked: &'static CompiledProgram = Box::leak(Box::new(program)); + ACTIVE_PROGRAM.with(|r| r.set(leaked as *const CompiledProgram)); + ACTIVE_FUNC_NAMES.with(|r| r.set(&leaked.func_names as *const Vec)); + ACTIVE_REGISTRY.with(|r| r.set(&leaked.type_registry as *const TypeRegistry)); + if let Some(ast) = &leaked.ast { + ACTIVE_AST_PROGRAM.with(|r| r.set(ast.as_ref() as *const Program)); + } + 0 +} + #[cfg(feature = "cranelift")] #[unsafe(no_mangle)] pub extern "C" fn ilo_aot_fini() { clear_active_registry(); + ACTIVE_PROGRAM.with(|r| r.set(std::ptr::null())); + ACTIVE_FUNC_NAMES.with(|r| r.set(std::ptr::null())); + ACTIVE_AST_PROGRAM.with(|r| r.set(std::ptr::null())); jit_arena_reset(); } From 4baa2fb774dab81db458cbd66457ab64e507c52a Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Tue, 19 May 2026 01:03:54 +0100 Subject: [PATCH 3/8] aot: emit publish_program call in generate_main and the bench harness Declare ilo_aot_publish_program in HelperFuncs, serialise the program in compile_to_binary, and emit a call to the helper from generate_main after the type-registry setup and before the user-fn invocation. Same wiring goes through the C bench harness (compile_to_bench_binary) so --bench binaries see the same dispatch contract as plain compiles. The blob lands in a .rodata data section named ilo_program_blob via the existing create_data_section helper. Binary size grows by ~210 KB once (libilo.a now pulls in the postcard + serde_json deserialiser code paths) and by a few hundred bytes to a few KB per program for the blob itself. This closes the silent-nil class flagged by the engine audit. map over a lambda, fld with a user-fn, grp / uniqby with a key fn, closure-bind ctx args, and fn-ref return-and-call all now match tree / VM / JIT under AOT. --- src/vm/compile_cranelift.rs | 44 +++++++++++++++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/src/vm/compile_cranelift.rs b/src/vm/compile_cranelift.rs index 3722ef85..d56e1e5f 100644 --- a/src/vm/compile_cranelift.rs +++ b/src/vm/compile_cranelift.rs @@ -209,6 +209,13 @@ struct HelperFuncs { aot_init: FuncId, aot_fini: FuncId, aot_set_registry: FuncId, + /// Deserialise an embedded `CompiledProgram` blob and publish it via the + /// `with_active_registry` TLS slots so HOF / closure dispatch helpers can + /// re-enter the VM. See `ilo_aot_publish_program` in `src/vm/mod.rs` and + /// the `aot_blob` module for the blob format. Without this call AOT + /// binaries silently returned `TAG_NIL` for every program that emitted + /// OP_CALL_DYN — see engine audit PR #413 gap #1. + aot_publish_program: FuncId, aot_parse_arg: FuncId, string_const: FuncId, // Linear algebra @@ -404,6 +411,7 @@ fn declare_all_helpers(module: &mut ObjectModule) -> HelperFuncs { aot_init: declare_helper(module, "ilo_aot_init", 0, 0), aot_fini: declare_helper(module, "ilo_aot_fini", 0, 0), aot_set_registry: declare_helper(module, "ilo_aot_set_registry", 2, 0), + aot_publish_program: declare_helper(module, "ilo_aot_publish_program", 2, 1), aot_parse_arg: declare_helper(module, "ilo_aot_parse_arg", 1, 1), string_const: declare_helper(module, "jit_string_const", 1, 1), // Linear algebra @@ -547,6 +555,11 @@ pub fn compile_to_binary( // Serialize the type registry for embedding in the binary let registry_bytes = serialize_type_registry(&program.type_registry); + // Serialize the full CompiledProgram (chunks + AST + func_names + ...) + // so the AOT runtime can publish ACTIVE_PROGRAM and ACTIVE_AST_PROGRAM + // for HOF / closure dispatch (engine audit PR #413 gap #1). + let program_blob = super::aot_blob::serialize_program(program)?; + // Generate main() generate_main( &mut module, @@ -554,6 +567,7 @@ pub fn compile_to_binary( entry_chunk.param_count as usize, &helpers, ®istry_bytes, + &program_blob, )?; // Emit object file @@ -4228,6 +4242,7 @@ fn generate_main( param_count: usize, helpers: &HelperFuncs, registry_bytes: &[u8], + program_blob: &[u8], ) -> Result<(), String> { let mut sig = module.make_signature(); sig.params.push(AbiParam::new(I32)); // argc @@ -4266,6 +4281,18 @@ fn generate_main( builder.ins().call(set_reg_fref, &[reg_ptr, reg_len]); } + // Publish the embedded CompiledProgram blob so HOF / closure dispatch + // helpers (jit_call_dyn, jit_call_builtin_tree) can re-enter the VM on + // user-fn callbacks. Engine audit PR #413 gap #1: without this, every + // OP_CALL_DYN dispatch silently returns TAG_NIL, manifesting as + // [nil, nil, ...] for map(lambda), nil for fld/grp/uniqby/fnref-return. + { + let blob_ptr = create_data_section(module, &mut builder, "ilo_program_blob", program_blob)?; + let blob_len = builder.ins().iconst(I64, program_blob.len() as i64); + let publish_fref = module.declare_func_in_func(helpers.aot_publish_program, builder.func); + builder.ins().call(publish_fref, &[blob_ptr, blob_len]); + } + let user_fref = module.declare_func_in_func(user_func_id, builder.func); let parse_arg_fref = module.declare_func_in_func(helpers.aot_parse_arg, builder.func); @@ -4415,6 +4442,12 @@ pub fn compile_to_bench_binary( .iter() .map(|b| format!("\\x{:02x}", b)) .collect::(); + // Serialize the CompiledProgram blob for HOF / closure dispatch (PR #413 gap #1). + let program_blob = super::aot_blob::serialize_program(program)?; + let program_blob_c_literal = program_blob + .iter() + .map(|b| format!("\\x{:02x}", b)) + .collect::(); let mut c_code = String::from( "#include \n\ @@ -4426,6 +4459,7 @@ pub fn compile_to_bench_binary( extern void ilo_aot_fini(void);\n\ extern void ilo_aot_arena_reset(void);\n\ extern void ilo_aot_set_registry(int64_t ptr, int64_t len);\n\ + extern int64_t ilo_aot_publish_program(int64_t ptr, int64_t len);\n\ extern int64_t ilo_aot_parse_arg(int64_t ptr);\n\n", ); // Embed the serialized type registry as a C byte array @@ -4435,6 +4469,13 @@ pub fn compile_to_bench_binary( registry_c_literal )); } + // Embed the serialized program blob as a C byte array + c_code.push_str(&format!( + "static const char ilo_program_blob_data[] = \"{}\";\n\ + static const long ilo_program_blob_len = {};\n\n", + program_blob_c_literal, + program_blob.len() + )); // Declare the exported function c_code.push_str(&format!("extern int64_t {}(", func_name)); @@ -4475,6 +4516,9 @@ pub fn compile_to_bench_binary( registry_bytes.len() )); } + c_code.push_str( + "\tilo_aot_publish_program((int64_t)ilo_program_blob_data, (int64_t)ilo_program_blob_len);\n", + ); c_code.push_str(&format!("\t{}({});\n", func_name, call_args)); c_code.push_str("\tilo_aot_arena_reset();\n"); From f13b79093466ea3fc2477c309902011c7b30ca14 Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Tue, 19 May 2026 01:04:03 +0100 Subject: [PATCH 4/8] tests: cross-engine regression for AOT HOF / closure dispatch Eight cases, each compiled to a real AOT binary and compared byte-for-byte against --run-tree, --run-vm, and --jit. The first seven are the audit failure shapes (map lambda no-cap, map lambda with capture, map closure-bind ctx, fld user-fn, grp user-fn, uniqby user-fn, fn-ref return-and-call); the eighth pins the already-passing top-level fn-ref-to-map case so a future refactor that breaks it while fixing the others gets caught immediately. examples/aot-closures.ilo demonstrates every fixed shape at the language level and is exercised by the examples_engines harness so the same contract is enforced for an agent reading the in-context example. --- examples/aot-closures.ilo | 46 ++++++ tests/regression_aot_closures.rs | 235 +++++++++++++++++++++++++++++++ 2 files changed, 281 insertions(+) create mode 100644 examples/aot-closures.ilo create mode 100644 tests/regression_aot_closures.rs diff --git a/examples/aot-closures.ilo b/examples/aot-closures.ilo new file mode 100644 index 00000000..4b5910aa --- /dev/null +++ b/examples/aot-closures.ilo @@ -0,0 +1,46 @@ +-- AOT-compiled binaries now handle higher-order functions, inline +-- lambdas (with or without captures), closure-bind ctx args, fld/grp/ +-- uniqby with user-fn callbacks, and fn-ref return-and-call. Pre-fix +-- AOT silently returned `nil` (or `[nil, nil, ...]`) for every shape +-- below; tree, VM, and Cranelift JIT all returned the correct answer, +-- so AOT was the lone diverging engine. +-- +-- Root cause was simple: the AOT runtime never published the +-- `CompiledProgram` into `ACTIVE_PROGRAM` / `ACTIVE_AST_PROGRAM`, so the +-- dispatch helpers (`jit_call_dyn`, `jit_call_builtin_tree`) hit their +-- null-program guards and returned TAG_NIL. The fix serialises the +-- full program (chunks + AST + type registry) into a postcard blob +-- embedded in the binary's `.rodata` and publishes it at startup via +-- a new `ilo_aot_publish_program` runtime helper. +-- +-- This file is the in-context learning example for agents: every shape +-- here is one that used to fail under AOT and now works. The full +-- cross-engine contract is enforced by +-- tests/regression_aot_closures.rs. + +-- This example demonstrates the fix at the language level. The +-- `examples_engines` harness runs it through tree / VM / JIT; the AOT +-- contract is pinned by the regression test cited above. + +dbl x:n>n;*x 2 +add a:n b:n>n;+a b +sq x:n>n;*x x +addk x:n k:n>n;+x k +mksq>F n n;sq + +map-lambda-nocap>L n;map (x:n>n;*x 2) [1,2,3] +map-lambda-capture>L n;k=10;map (x:n>n;+x k) [1,2,3] +map-closure-bind>L n;map addk 10 [1,2,3] +fld-user-fn>n;fld add [1,2,3,4] 0 +fnref-return-call>n;f=mksq;f 5 + +main>L n; + a=map-lambda-nocap + b=map-lambda-capture + c=map-closure-bind + d=fld-user-fn + e=fnref-return-call + [hd a, hd b, hd c, d, e] + +-- run: main +-- out: [2, 11, 11, 10, 25] diff --git a/tests/regression_aot_closures.rs b/tests/regression_aot_closures.rs new file mode 100644 index 00000000..e9d3d20c --- /dev/null +++ b/tests/regression_aot_closures.rs @@ -0,0 +1,235 @@ +// Regression test: AOT-compiled binaries must handle HOF / closure / fn-ref +// dispatch identically to tree, VM, and Cranelift JIT. +// +// Background: +// +// Engine audit PR #413 found that AOT silently returned `nil` (or +// `[nil, nil, ...]`) for every program that emitted OP_CALL_DYN or an +// OP_*_BY_KEY finalizer where the helper has to re-enter the VM on a +// user-fn callback. Root cause: AOT never published `ACTIVE_PROGRAM` / +// `ACTIVE_AST_PROGRAM`, so the helpers hit their null-program guards +// (`src/vm/mod.rs` jit_call_dyn at line ~15977 and jit_call_builtin_tree +// at line ~15744) and silently returned TAG_NIL. +// +// The fix serialises the full `CompiledProgram` (chunks + AST + +// type_registry + func_names + is_tool) into a postcard blob embedded +// in the AOT binary's `.rodata`, and a new `ilo_aot_publish_program` +// runtime helper deserialises it on startup and publishes the four TLS +// pointers `with_active_registry` would otherwise set. +// +// Each case below is a row from the audit's `tests/engine-matrix/` corpus +// distilled into the smallest form that surfaces the bug. The assertion +// is cross-engine parity: AOT must match tree / VM / JIT byte-for-byte +// on stdout, stderr, and exit code. A regression that re-introduces the +// silent-nil class shows up here immediately. +// +// Gated on the `cranelift` feature because both AOT compile and the +// `--jit` baseline require it. + +#![cfg(feature = "cranelift")] + +use std::path::PathBuf; +use std::process::Command; +use std::sync::atomic::{AtomicU32, Ordering}; + +fn ilo() -> Command { + Command::new(env!("CARGO_BIN_EXE_ilo")) +} + +static COUNTER: AtomicU32 = AtomicU32::new(0); + +fn tmp_paths(tag: &str) -> (PathBuf, PathBuf) { + let n = COUNTER.fetch_add(1, Ordering::Relaxed); + let pid = std::process::id(); + let src = std::env::temp_dir().join(format!("ilo-aot-clos-{tag}-{pid}-{n}.ilo")); + let bin = std::env::temp_dir().join(format!("ilo-aot-clos-{tag}-{pid}-{n}.bin")); + (src, bin) +} + +fn run_in_process(src_path: &PathBuf, engine: &str) -> (Vec, Vec, i32) { + let out = ilo() + .arg(src_path) + .arg(engine) + .arg("main") + .output() + .expect("failed to run ilo in-process"); + (out.stdout, out.stderr, out.status.code().unwrap_or(-1)) +} + +fn run_aot(src_path: &PathBuf, bin_path: &PathBuf) -> (Vec, Vec, i32) { + let compile = ilo() + .args(["compile"]) + .arg(src_path) + .arg("-o") + .arg(bin_path) + .arg("main") + .output() + .expect("failed to invoke ilo compile"); + assert!( + compile.status.success(), + "ilo compile failed: stdout={:?} stderr={:?}", + String::from_utf8_lossy(&compile.stdout), + String::from_utf8_lossy(&compile.stderr), + ); + let out = Command::new(bin_path) + .output() + .expect("failed to run AOT binary"); + (out.stdout, out.stderr, out.status.code().unwrap_or(-1)) +} + +/// Assert AOT output matches all three in-process engines and the +/// explicit expected stdout (the audit row's `-- expected:` value). +fn assert_cross_engine(tag: &str, src: &str, expected_stdout: &[u8]) { + let (src_path, bin_path) = tmp_paths(tag); + std::fs::write(&src_path, src).expect("write ilo source"); + + let (aot_stdout, aot_stderr, aot_exit) = run_aot(&src_path, &bin_path); + + assert_eq!( + aot_stdout, + expected_stdout, + "{tag}: AOT stdout mismatch. got={:?} expected={:?}", + String::from_utf8_lossy(&aot_stdout), + String::from_utf8_lossy(expected_stdout), + ); + assert_eq!( + aot_exit, + 0, + "{tag}: AOT exit non-zero. stderr={:?}", + String::from_utf8_lossy(&aot_stderr), + ); + + for engine in ["--run-tree", "--run-vm", "--jit"] { + let (s, _e, c) = run_in_process(&src_path, engine); + assert_eq!( + s, + aot_stdout, + "{tag}/{engine}: stdout diverges from AOT. in-proc={:?} aot={:?}", + String::from_utf8_lossy(&s), + String::from_utf8_lossy(&aot_stdout), + ); + assert_eq!( + c, aot_exit, + "{tag}/{engine}: exit diverges from AOT. in-proc={c} aot={aot_exit}", + ); + } + + let _ = std::fs::remove_file(&src_path); + let _ = std::fs::remove_file(&bin_path); +} + +// ── map over an inline lambda with no captures ───────────────────────── +// Audit row 16. Pre-fix AOT returned [nil, nil, nil]. The lambda lifts to +// a synthetic top-level fn; the map call site emits OP_CALL_DYN against +// the FnRef. Without ACTIVE_PROGRAM published, jit_call_dyn returned +// TAG_NIL for every element. + +#[test] +fn aot_map_inline_lambda_nocap() { + assert_cross_engine( + "map-lambda-nocap", + "main>L n;map (x:n>n;*x 2) [1,2,3]\n", + b"[2, 4, 6]\n", + ); +} + +// ── map over an inline lambda WITH captures ──────────────────────────── +// Audit row 17. Pre-fix AOT returned [nil, nil, nil]. Same shape as the +// no-capture case but the compiler also emits OP_MAKE_CLOSURE to wrap +// the FnRef with the captured value, so the helper has to handle the +// `HeapObj::Closure` discriminator path inside jit_call_dyn too. + +#[test] +fn aot_map_inline_lambda_capture() { + assert_cross_engine( + "map-lambda-capture", + "main>L n;k=10;map (x:n>n;+x k) [1,2,3]\n", + b"[11, 12, 13]\n", + ); +} + +// ── map with a closure-bind ctx arg (3-arg map fn ctx xs) ────────────── +// Audit row 18. Pre-fix AOT returned `nil`. This routes through the +// tree bridge (`is_tree_bridge_eligible` lists Map+3 as bridge-only), +// so the failing helper is jit_call_builtin_tree's null-AST guard +// rather than jit_call_dyn. Covers the second of the two TLS publishing +// paths the fix had to wire up. + +#[test] +fn aot_map_closure_bind_ctx() { + assert_cross_engine( + "map-closure-bind", + "addk x:n k:n>n;+x k\nmain>L n;map addk 10 [1,2,3]\n", + b"[11, 12, 13]\n", + ); +} + +// ── fld with a user-fn fold accumulator ──────────────────────────────── +// Audit row 31. Pre-fix AOT returned `nil`. fld+3 (`fld fn xs init`) +// compiles to a loop body that calls the accumulator via OP_CALL_DYN. + +#[test] +fn aot_fld_user_fn() { + assert_cross_engine( + "fld-user-fn", + "add a:n b:n>n;+a b\nmain>n;fld add [1,2,3,4] 0\n", + b"10\n", + ); +} + +// ── grp by a user-fn key function (PR #391 native lift) ──────────────── +// Audit row 36. Pre-fix AOT returned `nil`. `grp fn xs` compiles to a +// pre-loop that builds the keys list via OP_CALL_DYN followed by +// OP_GRP_BY_KEY to finalise. The OP_CALL_DYN side fails first; the +// finalizer itself takes pre-computed lists and is unaffected. + +#[test] +fn aot_grp_by_user_fn() { + assert_cross_engine( + "grp-user-fn", + "parity n:n>t;=mod n 2 0{\"even\"};\"odd\"\nmain>n;g=grp parity [1,2,3,4];len mkeys g\n", + b"2\n", + ); +} + +// ── uniqby with a user-fn key function ───────────────────────────────── +// Audit row 37. Same shape as grp. + +#[test] +fn aot_uniqby_user_fn() { + assert_cross_engine( + "uniqby-user-fn", + "parity n:n>t;=mod n 2 0{\"even\"};\"odd\"\nmain>n;u=uniqby parity [1,2,3,4];len u\n", + b"2\n", + ); +} + +// ── returning a fn-ref and calling it ────────────────────────────────── +// Audit row 38. Pre-fix AOT returned `nil`. `>F n n` declared return +// type means the returned value is a FnRef NanVal; calling it through +// `f 5` emits OP_CALL_DYN which needed ACTIVE_PROGRAM. + +#[test] +fn aot_fnref_return_then_call() { + assert_cross_engine( + "fnref-return", + "sq x:n>n;*x x\nmksq>F n n;sq\nmain>n;f=mksq;f 5\n", + b"25\n", + ); +} + +// ── top-level fn-ref to map (sanity check) ───────────────────────────── +// Audit row 15. This row already passed pre-fix on AOT — `map dbl xs` +// where `dbl` is a top-level fn was already going down a different +// emission path. Keeping the case here so future refactors don't +// accidentally regress the already-working shape while fixing the +// broken ones. + +#[test] +fn aot_map_toplevel_fnref_still_works() { + assert_cross_engine( + "map-toplevel-fnref", + "dbl x:n>n;*x 2\nmain>L n;map dbl [1,2,3]\n", + b"[2, 4, 6]\n", + ); +} From ce4d4571653dd3b27d21e073681ed06747f2ea48 Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Tue, 19 May 2026 01:04:14 +0100 Subject: [PATCH 5/8] docs: AOT now runs Phase 2 closures and HOF / fn-ref dispatch SPEC.md and the generated ai.txt previously claimed Phase 2 closure capture was tree-only with VM / JIT raising ILO-R012 and the default runner falling through. Empirically VM and JIT have handled Phase 2 natively for a while (PR1 #384 onwards); with this PR AOT joins them. Update the closure-capture paragraph and the default-engine paragraph in SPEC.md so they describe post-fix reality, and rewrite the manually-maintained line in SKILL.md to match. ilo-engines.md gets the same treatment plus the AOT specifics note about the embedded CompiledProgram blob. The site cli.md is updated in a follow-up commit in the separate ilo-lang/site repo since the site has no CI and lives outside this monorepo. --- SPEC.md | 4 ++-- ai.txt | 4 ++-- skills/ilo/ilo-engines.md | 10 +++++----- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/SPEC.md b/SPEC.md index 816b3147..5c46cacf 100644 --- a/SPEC.md +++ b/SPEC.md @@ -129,7 +129,7 @@ Syntax: `(: ...>;)`. Same shape as a top-level f f xs:L n thr:n>L n;flt (x:n>b;>x thr) xs -- captures `thr` ``` -Phase 2 captures run natively on the tree interpreter, the register VM, and the Cranelift JIT - every in-process engine. Each free variable is snapshot by value at the call site (`Expr::MakeClosure`) and appended to the call frame's arg slice on dispatch. The AOT backend lags here: HOFs taking a function value (including capturing closures) currently miscompile and need a separate fix. The ctx-arg form (`srt fn ctx xs`) remains the cross-engine alternative when you want explicit state without forming a closure. +Phase 2 captures run natively on every engine: the tree interpreter, the register VM, the Cranelift JIT, and the Cranelift AOT backend. Each free variable is snapshot by value at the call site (`Expr::MakeClosure`) and appended to the call frame's arg slice on dispatch. The AOT backend additionally embeds the postcard-serialised `CompiledProgram` into the binary's `.rodata` and publishes TLS pointers on startup, so dispatch helpers can re-enter the VM on user-fn callbacks. The ctx-arg form (`srt fn ctx xs`) remains the cross-engine alternative when you want explicit state without forming a closure. --- @@ -1533,7 +1533,7 @@ ilo serv -- long-lived JSON request/response loop **AOT entry-pick.** `ilo compile file.ilo -o out` (alias `ilo build`) follows the same entry-pick rules as the in-process engines: a single user-defined function is used directly; on multi-function files the entry is `main` if defined, otherwise the explicit positional `func` arg (`ilo compile file.ilo -o out run`); otherwise the compile fails with `ILO-E801` and exits 1 without writing a binary. AOT does not fall back to "first declared function" - that historical default produced binaries that called the wrong entry symbol and SIGSEGV'd at runtime. -**Default engine.** The bytecode register VM is the default execution path. It supports every opcode (closures with Phase 2 capture, listview windows, fused len-of-filter, every modern shape), and avoids the JIT compile-and-bail cost paid by the pre-v0.11.9 Cranelift-first default whenever a program touched an opcode the JIT couldn't handle. Cranelift JIT is opt-in via `--jit`; on opt-in, the JIT runs hot numeric loops and falls back to the VM on bailout. The tree interpreter (`--run-tree`) remains the canonical-semantics reference. Phase 2 captures run natively on tree, VM, and JIT - no engine fallback needed. For long-running workloads where the JIT pays for itself, opt in explicitly; for most agent workloads the VM is the right default. +**Default engine.** The bytecode register VM is the default execution path. It supports every opcode (closures with Phase 2 capture, listview windows, fused len-of-filter, every modern shape), and avoids the JIT compile-and-bail cost paid by the pre-v0.11.9 Cranelift-first default whenever a program touched an opcode the JIT couldn't handle. Cranelift JIT is opt-in via `--jit`; on opt-in, the JIT runs hot numeric loops and falls back to the VM on bailout. The tree interpreter (`--run-tree`) remains the canonical-semantics reference. Phase 2 captures run natively on every engine - tree, VM, JIT, and AOT (`ilo compile`); AOT embeds the postcard `CompiledProgram` blob into the binary's `.rodata` so dispatch helpers can re-enter the VM on user-fn callbacks the same way the in-process runners do. For long-running workloads where the JIT pays for itself, opt in explicitly; for most agent workloads the VM is the right default. **Subcommand dispatch.** The first positional argument is interpreted as a function name when it has the shape of an ilo identifier - `[a-z][a-z0-9]*(-[a-z0-9]+)*` - so `ilo file.ilo list-orders` routes to the `list-orders` function. Args that don't match the ident shape (file paths like `/tmp/data.json`, numbers, sigils, bracketed lists, anything with a `.` or `/`) route to `main` (or the entry function) as a positional CLI arg instead. Trailing dashes (`foo-`), doubled dashes (`foo--bar`), and negative numbers (`-1`) are not idents and pass through as data. diff --git a/ai.txt b/ai.txt index be638479..5675ffb1 100644 --- a/ai.txt +++ b/ai.txt @@ -1,6 +1,6 @@ INTRO: ilo is a token-optimised programming language for AI agents. Every design choice is evaluated against total token cost: generation + retries + context loading. FUNCTIONS: : ...>; No parens around params - `>` separates params from return type `;` separates statements - no newlines required Last expression is the return value (no `return` keyword) Zero-arg call: `make-id()` tot p:n q:n r:n>n;s=*p q;t=*s r;+s t -TYPES: `n`=number (f64) `t`=text (string) `b`=bool `_`=any/unknown (wildcard type) `L n`=list of number `R n t`=result: ok=number, err=text `O n`=optional number (nil or n) `M t n`=map from text keys to numbers `S red green blue`=sum type - one of named text variants `F n t`=function type: takes n, returns t (used in HOF params) `order`=named type `a`=type variable - any single lowercase letter except n, t, b [Optional (`O T`)] `O T` accepts either `nil` or a value of type `T`. f x:O n>n;??x 0 -- unwrap optional or default to 0 g>O n;nil -- returns nil (valid O n) h>O n;42 -- returns 42 (valid O n) `??x default` - nil-coalesce: returns `x` if non-nil, else `default`. Unwraps `O T` to `T`. [Sum types (`S a b c`)] Closed set of named text variants. Verifier-enforced; runtime value is always `t`. color x:S red green blue > t ?x{red:"ff0000";green:"00ff00";blue:"0000ff"} Sum types are compatible with `t` - a sum value can be passed to any `t` parameter. [Map type (`M k v`)] Dynamic key-value collection. Keys are typed: text (`t`) or integer (`n`). `Int(1)` and `Text("1")` are distinct keys. mmap -- empty map mset m k v -- return new map with key k set to v mget m k -- value at key k, or nil mhas m k -- b: true if key exists mkeys m -- L t: sorted list of keys mvals m -- L v: values sorted by key mdel m k -- return new map with key k removed len m -- number of entries Numeric keys work directly - no `str` conversion needed. Float keys floor to `i64` at the builtin boundary (matching `at xs i`); NaN/Infinity raise at runtime. idx=mmap idx=mset idx 7 "seven" -- M n t, integer key mget idx 7 -- "seven" mhas idx 7 -- true mhas idx "7" -- false (Int and Text are distinct) `jdmp` stringifies numeric keys for JSON output (JSON object keys are always strings). The round-trip via `jpar` is lossy - numeric keys come back as text. Example: scores>M t n m=mmap m=mset m "alice" 99 m=mset m "bob" 87 mget m "alice" -- 99 [Type variables] A single lowercase letter (other than `n`, `t`, `b`) in type position is a type variable, treated as `unknown` during verification. Used for higher-order function signatures: identity x:a>a;x apply f:F a a x:a>a;f x Type variables provide weak generics - the verifier accepts any type for `a` without consistency checking across call sites. [Inline lambdas] Pass a function literal directly to a HOF instead of defining a one-off top-level helper: by-dist xs:L n>L n;srt (x:n>n;abs x) xs nonempty ws:L t>L t;flt (s:t>b;>(len s) 0) ws sumsq xs:L n>n;fld (a:n x:n>n;+a *x x) xs 0 Syntax: `(: ...>;)`. Same shape as a top-level function declaration, wrapped in parens, no name. **Phase 1 (no captures)** lifts the literal to a synthetic top-level decl and works across every engine (tree, VM, Cranelift JIT, AOT). The body's free variables must all be params, locals defined inside the lambda body, or known top-level fns. **Phase 2 (closure capture)** lets the body reference variables from the enclosing scope: f xs:L n thr:n>L n;flt (x:n>b;>x thr) xs -- captures `thr` Phase 2 captures run natively on the tree interpreter, the register VM, and the Cranelift JIT - every in-process engine. Each free variable is snapshot by value at the call site (`Expr::MakeClosure`) and appended to the call frame's arg slice on dispatch. The AOT backend lags here: HOFs taking a function value (including capturing closures) currently miscompile and need a separate fix. The ctx-arg form (`srt fn ctx xs`) remains the cross-engine alternative when you want explicit state without forming a closure. +TYPES: `n`=number (f64) `t`=text (string) `b`=bool `_`=any/unknown (wildcard type) `L n`=list of number `R n t`=result: ok=number, err=text `O n`=optional number (nil or n) `M t n`=map from text keys to numbers `S red green blue`=sum type - one of named text variants `F n t`=function type: takes n, returns t (used in HOF params) `order`=named type `a`=type variable - any single lowercase letter except n, t, b [Optional (`O T`)] `O T` accepts either `nil` or a value of type `T`. f x:O n>n;??x 0 -- unwrap optional or default to 0 g>O n;nil -- returns nil (valid O n) h>O n;42 -- returns 42 (valid O n) `??x default` - nil-coalesce: returns `x` if non-nil, else `default`. Unwraps `O T` to `T`. [Sum types (`S a b c`)] Closed set of named text variants. Verifier-enforced; runtime value is always `t`. color x:S red green blue > t ?x{red:"ff0000";green:"00ff00";blue:"0000ff"} Sum types are compatible with `t` - a sum value can be passed to any `t` parameter. [Map type (`M k v`)] Dynamic key-value collection. Keys are typed: text (`t`) or integer (`n`). `Int(1)` and `Text("1")` are distinct keys. mmap -- empty map mset m k v -- return new map with key k set to v mget m k -- value at key k, or nil mhas m k -- b: true if key exists mkeys m -- L t: sorted list of keys mvals m -- L v: values sorted by key mdel m k -- return new map with key k removed len m -- number of entries Numeric keys work directly - no `str` conversion needed. Float keys floor to `i64` at the builtin boundary (matching `at xs i`); NaN/Infinity raise at runtime. idx=mmap idx=mset idx 7 "seven" -- M n t, integer key mget idx 7 -- "seven" mhas idx 7 -- true mhas idx "7" -- false (Int and Text are distinct) `jdmp` stringifies numeric keys for JSON output (JSON object keys are always strings). The round-trip via `jpar` is lossy - numeric keys come back as text. Example: scores>M t n m=mmap m=mset m "alice" 99 m=mset m "bob" 87 mget m "alice" -- 99 [Type variables] A single lowercase letter (other than `n`, `t`, `b`) in type position is a type variable, treated as `unknown` during verification. Used for higher-order function signatures: identity x:a>a;x apply f:F a a x:a>a;f x Type variables provide weak generics - the verifier accepts any type for `a` without consistency checking across call sites. [Inline lambdas] Pass a function literal directly to a HOF instead of defining a one-off top-level helper: by-dist xs:L n>L n;srt (x:n>n;abs x) xs nonempty ws:L t>L t;flt (s:t>b;>(len s) 0) ws sumsq xs:L n>n;fld (a:n x:n>n;+a *x x) xs 0 Syntax: `(: ...>;)`. Same shape as a top-level function declaration, wrapped in parens, no name. **Phase 1 (no captures)** lifts the literal to a synthetic top-level decl and works across every engine (tree, VM, Cranelift JIT, AOT). The body's free variables must all be params, locals defined inside the lambda body, or known top-level fns. **Phase 2 (closure capture)** lets the body reference variables from the enclosing scope: f xs:L n thr:n>L n;flt (x:n>b;>x thr) xs -- captures `thr` Phase 2 runs on every engine - tree, VM, Cranelift JIT, and AOT. The VM and JIT lower captures natively via `OP_MAKE_CLOSURE`; AOT additionally embeds the `CompiledProgram` so dispatch helpers can re-enter the VM on user-fn callbacks. The ctx-arg form (`srt fn ctx xs`) remains the lightest cross-engine alternative for capturing state when you'd rather not pay for a closure-build. NAMING: Short names everywhere. 1–3 chars. `order`=`ord`=truncate `customers`=`cs`=consonants `data`=`d`=single letter `level`=`lv`=drop vowels `discount`=`dc`=initials `final`=`fin`=first 3 `items`=`its`=first 3 Function names follow the same rules. Field names in constructors and external tool names keep their full form - they define the public interface. [Identifier syntax] Identifiers are lowercase ASCII only, optionally with hyphenated segments. Formally: `[a-z][a-z0-9]*(-[a-z0-9]+)*`. Capital letters and underscores are rejected at the binding and call site. run -- OK run-d -- OK (hyphen separates segments) r2 -- OK (digit after first letter) runD -- ERROR (capital letter) RunD -- ERROR (leading capital) run_d -- ERROR (underscore not allowed in bindings) -run -- ERROR (must start with a letter) `runD` in the interactive CLI surfaces as `ILO-L003 unexpected token` with a suggestion to use `run-d` or `rund`. The constraint is intentional: a single lexical shape per identifier keeps the token stream predictable for agents and avoids style debates over camelCase vs snake_case vs kebab-case. The only place capital letters and underscores are accepted is **after `.` or `.?`** at field-access position, so heterogeneous JSON keys from real APIs work without rewriting. See [Field names at dot-access](#field-names-at-dot-access) for the full list of post-dot relaxations (`r.URL`, `r.AccessKey`, `r.user_name`, etc.). Binding names (`AccessKey = ...`) and function names (`AccessKey x:n>n;...`) still error. [Reserved words] The following identifiers are reserved and cannot be used as names: `if`, `return`, `let`, `fn`, `def`, `var`, `const`. Using them produces a friendly error with the ilo equivalent: -- ERROR: `if` is a reserved word. Use: ?cond{true:... false:...} -- ERROR: `return` is a reserved word. Last expression is the return value. -- ERROR: `let` is a reserved word. Use: name = expr -- ERROR: `fn`/`def` is a reserved word. Use: name param:type > rettype; body Builtin names (`flat`, `frq`, `map`, `flt`, `cat`, `len`, `srt`, `hd`, `tl`, `ord`, `fld`, `lst`, ...) are also rejected as user-function names and as local-binding LHS. Without this, calls to the user fn or use sites of the local binding silently mis-dispatch to the builtin and surface as a confusing `ILO-T006` arity mismatch. The parser intercepts at the declaration site with ILO-P011 and a rename hint: flat n:n>n;n -- ERROR ILO-P011: `flat` is a builtin and cannot be used as a function name -- hint: rename to something like `myflat` or `flatof`. main>n;flat=cat xs " ";spl flat ". " -- ERROR ILO-P011: `flat` is a builtin and cannot be used as a binding name -- hint: rename to something like `myflat` or `flatv`. [Reserved namespaces] Short builtin names are precious surface and ilo reserves a stable subset of them. To save agents (and their carry-forward scripts) from "what got reserved this release?" debugging cycles, the language publishes the full short-name reserve list plus a forward-compatibility rule for future builtins. **Currently reserved short names (1-3 characters).** Every name in this list is a builtin today and triggers `ILO-P011` if used as a binding or user-function name: 2-char at hd tl rd wr ct 3-char abs avg cap cat cel chr cos det dot env exp fft fld flr flt fmt frq get grp has inv len log lsd lst lwr map max min mod now num ord pow pst rdb rdl rev rgx rng rnd rou run sin slc spl srt str sum tan trm unq upr wrl zip `rng` is the short-form alias for the canonical `range` builtin; it is reserved with the same shadow-prevention semantics as a canonical builtin name (binding `rng=...` or declaring `rng x:...` fires `ILO-P011`). Longer builtin names (`acos`, `asin`, `atan`, `flat`, `take`, `drop`, `mget`, `mset`, `mmap`, `prnt`, `mapr`, `solve`, `clamp`, `cumsum`, `median`, `matmul`, `range`, `window`, `chunks`, `walk`, `glob`, …) are also reserved and rejected by `ILO-P011`, but the short-name namespace above is where carry-forward scripts most often collide, so it gets explicit enumeration. **Forward-compatibility rule.** Future ilo releases add new builtins under names **4 characters or longer**. A 2-character name that is not on this list today is safe to use as a binding or function name and stays safe across releases. A 3-character name that is not on this list is _highly likely_ to stay safe but is not a hard promise - the 3-char surface is already dense, and a rare ergonomic win may justify an addition, called out in the changelog. This gives agents a deterministic safe-name strategy: **2 chars**: any unreserved 2-char name is permanently fine for bindings (`ce` for "category", `ix` for index, `mn` for "mean", `pq` for "priority queue", …). Names on the reserved list above never get removed. **3 chars**: prefer unreserved 3-char names where possible. If a future release reserves one, the migration is a 1-character rename plus a changelog entry. **4+ chars**: always safe. New builtins land here first; any short alias is added later only if the long name is unambiguous and the short doesn't shadow a plausible user binding. When a collision does happen, `ILO-P011` surfaces it at the binding site with a rename suggestion - never silently mis-dispatches at the call site (see the `flat=cat xs " "` example above). Combined with the reserve list, that turns every name-collision incident into a single-character rename instead of a debugging spiral. [Cross-language gotchas] Common shapes reached for from other languages. The parser and lexer surface each with a friendly hint: `AND a b`, `OR a b`, `NOT a`=`&a b`, `|a b`, `!a`=`ILO-L001` `=a b`=`<=a b`, `>=a b` (single token)=`ILO-P003` `f=fn x:n>n;+x 1` (lambda)=`(x:n>n;+x 1)` (parenthesised lambda)=`ILO-P009` `\x{+x 1}` (Haskell/Rust lambda)=`(x:n>n;+x 1)` (parenthesised lambda)=`ILO-L001` `main:>n;body`=`main>n;body` (no `:` before `>`)=`ILO-P003` Multi-line body without braces=`@k xs{body}`, `cond{body}` on one line=`ILO-P003` `cond{^"err"}` braced-cond=Braceless `cond ^"err"` for early return=hint only `- -*a b *c d` (double-minus)=`- 0 +*a b *c d` (negate the sum)=`ILO-P021` `[k fmt2 v 2]` (call in list)=`[k (fmt2 v 2)]` or bind-first=`ILO-P101` Each case fires a hint pointing at the canonical form; the agent's first retry should be the right one. Identifier-shaped collisions with builtin names (`len=...`, `sin=...`) are rejected with `ILO-P011` plus a rename suggestion. The list-literal call trap (`ILO-P101`) catches the case where a variadic builtin (`fmt`, `fmt2`) appears bare inside `[...]`. Fixed-arity builtins (`str`, `at`, `map`, ...) auto-expand to a call as one element, but variadic ones can't (the parser doesn't know where their args end), so the bare form would silently fall through as multiple elements with the builtin name as an undefined Ref. Fix by wrapping the call in parens (`[k (fmt2 v 2)]`) or binding first. The double-minus trap (`ILO-P021`) catches the silent-miscompile shape `- - a b c d` for `` in `{+,*,/}`. Read intuitively as `-(a*b) - (c*d)` but parses as `-((a*b) - (c*d)) = -(a*b) + (c*d)` because the inner `-` greedily consumes both prefix-binop groups as binary subtract and the outer `-` falls back to unary negate. Fix by negating the sum (`- 0 +*a b *c d`) or binding first (`p=*a b;q=*c d;- 0 +p q`). Single-atom variants like `- -a b` remain accepted since they're unambiguous. COMMENTS: -- full line comment +a b -- end of line comment -- no multi-line comments; use consecutive -- lines -- like this Single-line only. `--` to end of line. No multi-line comment syntax - newlines are a human display concern, not a language concern. An entire ilo program can be one line. Use consecutive `--` lines when humans need multi-line comments. Stripped at the lexer level before parsing - comments produce no AST nodes and cost zero runtime tokens. Generating `--` costs 1 LLM token, so comments are essentially free. **Gotcha:** `--x 1` is a comment, not "negate (x minus 1)". The lexer matches `--` greedily as a comment and eats the rest of the line. To negate a subtraction, use a space or bind first: -- DON'T: --x 1 (comment, not negate-subtract) -- DO: - -x 1 (space separates the two minus operators) -- DO: r=-x 1;-r (bind first) OPERATORS: Both prefix and infix notation are supported. **Prefix is preferred** - it is the token-optimal form that eliminates parentheses and produces denser code. Infix is available for readability when needed. [Binary] `+a b`=`a + b`=add / concat / list concat=`n`, `t`, `L` `+=a v`=append to list (returns new list, see [Append semantics](#append-semantics-+=))=`L` `-a b`=`a - b`=subtract=`n` `*a b`=`a * b`=multiply=`n` `/a b`=`a / b`=divide=`n` `=a b`=`a == b`=equal (prefix `=` is preferred; `==a b` also accepted)=any `!=a b`=`a != b`=not equal=any `>a b`=`a > b`=greater than=`n`, `t` `=a b`=`a >= b`=greater or equal=`n`, `t` `<=a b`=`a <= b`=less or equal=`n`, `t` `&a b`=`a & b`=logical AND (short-circuit)=any (truthy) `|a b`=`a | b`=logical OR (short-circuit)=any (truthy) [Append semantics (`+=`)] `+=xs v` is **pure-shaped**, despite the imperative-looking syntax. It returns a new list with `v` appended and does **not** mutate `xs` in the caller's scope. It works in every position a value-producing expression works: -- 1. Rebind (canonical accumulator pattern) xs=[];@i 0..3{xs=+=xs i};xs -- [0, 1, 2] -- 2. Non-rebind assignment (xs preserved) xs=[1, 2, 3];ys=+=xs 99 -- xs is still [1, 2, 3]; ys is [1, 2, 3, 99] -- 3. Pipeline / argument position len +=xs 99 -- length of [xs..., 99] sum +=xs 99 -- sum of [xs..., 99] The rebind shape `xs = +=xs v` is the standard foreach-build accumulator. When the binding is RC=1 the engines mutate the underlying buffer in place (amortised O(1) per push) - but this is a behind-the-scenes optimisation. To any observer the operation is still functional: nothing outside the rebind sees the old `xs`. The non-rebind shape `ys = +=xs v` always allocates a fresh list and leaves `xs` untouched, so source aliases are safe. There is no separate `push` builtin. `+=` covers every use case and is shorter; adding an alias would mean two ways to spell the same operation, costing reasoning tokens and surface area. [Unary] `-x`=negate=`n` `!x`=logical NOT=any (truthy) [Special infix] `a??b`=nil-coalesce (if a is nil, return b)=any `a>>f`=pipe (desugar to `f(a)`)=any [Prefix nesting (no parens needed)] +*a b c -- (a * b) + c *a +b c -- a * (b + c) >=+x y 100 -- (x + y) >= 100 -*a b *c d -- (a * b) - (c * d) The outer prefix op binds the inner prefix subexpression as its **left** operand, regardless of operator precedence. With two same-precedence ops side by side this is easy to misread: */a b c -- (a/b) * c ← NOT (a*b)/c /*a b c -- (a*b) / c ← NOT (a/b)*c +-a b c -- (a-b) + c ← NOT (a+b)-c -+a b c -- (a+b) - c ← NOT (a-b)+c The runtime emits a `hint:` diagnostic when one of these four pairs appears at a prefix position, since the parse order disagrees with the natural left-to-right reading. To force the other grouping, swap the ops or bind the inner result first: -- Want (a*b)/c with a=6, b=2, c=3: r=*a b;/r c -- bind, then divide → 4 /*a b c -- equivalent, swapping the prefix-pair order [Infix precedence] Standard mathematical precedence (higher binds tighter): 6=`*` `/` 5=`+` `-` `+=` 4=`>` `<` `>=` `<=` 3=`=` `!=` 2=`&` 1=`|` Function application binds tighter than all infix operators: f a + b -- (f a) + b, NOT f(a + b) x * y + 1 -- (x * y) + 1 (x + y) * 2 -- parens override precedence Each nested prefix operator saves 2 tokens (no `(` `)` needed). Flat prefix like `+a b` saves 1 char vs `a + b`. Across 25 expression patterns, prefix notation saves **22% tokens** and **42% characters** vs infix. See [research/explorations/prefix-vs-infix/](research/explorations/prefix-vs-infix/) for the full benchmark. Disambiguation: `-` followed by one atom is unary negate, followed by two atoms is binary subtract. [Operands] Operator operands are **atoms** (literals, refs, field access), **nested prefix operators**, or **known-arity function calls**. The prefix-binop operand parser dispatches to call parsing when the ident at the cursor is a known-arity user fn or builtin AND the next token can start another operand: wh >len q 0{body} -- parses as wh > (len q) 0 { body } +f g h -- if f is 1-arity: BinOp(+, Call(f, [g]), h) -lnx 5 lnx 3 -- BinOp(-, Call(lnx, [5]), Call(lnx, [3])) dbl 5 -- Negate(Call(dbl, [5])) - unary on a call This parallels the `??` precedent: `??x default` accepts a call expression on the value side. Applies to every prefix-binop family member - `+`, `-`, `*`, `/`, comparisons, `&`, `|`, `+=` - and to unary negate when the call consumes the only operand. The same expansion also applies to the then/else slots of the prefix-ternary family (`?=cond a b`, `?>cond a b`, …) and the `?h cond a b` keyword form, so `?h =a b sev sc "NONE"` parses `sev sc` as a nested call without parens or a bind-first. Bare locals that shadow a user fn name still resolve via `Ref` rather than expanding into a zero-arg call, so `&e f{...}` where `f` is a local still parses as the bool operator with two refs. When the call expansion isn't available (the ident is a local that shadows a fn name, or the call's arity doesn't fit the remaining tokens), bind the call result first: r=fac p;*n r -- bind, then operate - always unambiguous **Negative literals vs binary minus**: the lexer greedily includes a leading `-` into number tokens. `-1`, `-7`, `-0` are all number literals at fresh-expression positions. To subtract from zero at the start of a statement, use a space: `- 0 v` (Minus token, then `0`, then `v`). f v:n>n;-0 v -- WRONG: -0 is Number(-0.0); v is a stray token f v:n>n;- 0 v -- OK: binary subtract: 0 - v = -v The lexer splits a glued negative literal back into `Minus + Number` when the previous token is one of `;`, `\n`, `=`, `{`, `(`, or `-`. The `-` context covers the operand slot of an outer prefix-minus, so `- -0 a b` lexes as `-, -, 0, a, b` and parses as `Subtract(Subtract(0, a), b)` = `-a - b` rather than tripping `ILO-P020`. Negative literals after an Ident, `[`, or another prefix binop (`+`, `*`, `/`) stay glued so call args (`at xs -1`), list literals (`[-2 1 3]`), and binary operands (`+a -3`) read naturally. @@ -15,6 +15,6 @@ TOOLS (EXTERNAL CALLS): tool "" > timeou IMPORTS: Split programs across files with `use`: use "path/to/file.ilo" -- import all declarations use "path/to/file.ilo" [name1 name2] -- import only named declarations All imported declarations merge into a flat shared namespace - no qualification, no `mod::fn` syntax. The verifier catches name collisions. -- math.ilo dbl n:n>n; *n 2 half n:n>n; /n 2 -- main.ilo use "math.ilo" run n:n>n; dbl! half n [Rules] Path is relative to the importing file's directory Transitive: if `a.ilo` uses `b.ilo`, `b.ilo`'s declarations are visible to `main.ilo` when it uses `a.ilo` Circular imports are an error (`ILO-P018`) Scoped import with unknown name: `ILO-P019` `use` in inline code (no file context): `ILO-P017` [Error codes] `ILO-P017`=File not found or `use` in inline mode `ILO-P018`=Circular import detected `ILO-P019`=Name in `[...]` list not declared in the imported file ERROR HANDLING: `R ok err` return type. Call then match: get-user uid;?{^e:^+"Lookup failed: "e;~d:use d} Compensate/rollback inline: charge pid amt;?{^e:release rid;^+"Payment failed: "e;~cid:continue} [Auto-Unwrap `!`] `func! args` calls `func` and auto-unwraps the Result: if `~v` (Ok), returns `v`; if `^e` (Err), immediately returns `^e` from the enclosing function. inner x:n>R n t;~x outer x:n>R n t;d=inner! x;~d Equivalent to `r=inner x;?r{~v:v;^e:^e}` but in 1 token instead of 12. Rules: The called function must return `R` or `O` (else verifier error ILO-T025) The enclosing function must return `R` (or `O` for Optional callees) (else verifier error ILO-T026) `!` goes after the function name, before args: `get! url` not `get url!` Zero-arg: `fetch!()` [Panic-Unwrap `!!`] `func!! args` is symmetric in shape with `!`, but on the failure path it aborts the program with a runtime diagnostic and exit code 1 instead of propagating. There is no enclosing-return-type constraint, so persona code can use it from `main>t`, `main>n`, or any non-Result / non-Optional context. main>t;rdl!! "input.txt" -- read file, abort with diagnostic if missing main>n;v=num!! "42";v -- parse number, abort on parse error main>n;m=mset mmap "k" 7;mget!! m "k" -- get value or abort if key missing On `^e` (Err) the program writes `panic-unwrap: ` to stderr and exits 1. On `O nil` the program writes `panic-unwrap: expected value, got nil`. On `~v` (Ok) or non-nil Optional, the inner value is extracted, identical to `!`. Rules: The called function must return `R` or `O` (else verifier error ILO-T025) **No constraint on the enclosing function's return type** - this is the difference from `!` `!!` goes after the function name, before args: `rdl!! path` not `rdl path!!` Zero-arg: `fetch!!()` Use `!` when the caller wants to react to the Err (compensate, retry, log). Use `!!` when the failure is a programming or environmental error the caller has no way to recover from - typical in short scripts, glue code, and main entry points. PATTERNS (FOR LLM GENERATORS): [Bind-first pattern] Always bind complex expressions to variables before using them in operators. Operators only accept atoms and nested operators as operands - not function calls. -- DON'T: *n fac -n 1 (fac is an operand of *, not a call) -- DO: r=fac -n 1;*n r (bind call result, then use in operator) [Recursion template] >;;...;;combine 1. **Guard**: base case returns early - `<=n 1 1` (or `<=n 1{1}`) 2. **Bind**: bind recursive call results - `r=fac -n 1` 3. **Combine**: use bound results in final expression - `*n r` [Factorial] fac n:n>n;<=n 1 1;r=fac -n 1;*n r `<=n 1 1` - braceless guard: if n <= 1, return 1 `r=fac -n 1` - recursive call with prefix subtract as argument `*n r` - multiply n by result [Fibonacci] fib n:n>n;<=n 1 n;a=fib -n 1;b=fib -n 2;+a b `<=n 1 n` - braceless guard: return n for 0 and 1 `a=fib -n 1;b=fib -n 2` - two recursive calls, each with prefix arg `+a b` - add results [Multi-statement bodies] Semicolons separate statements. Last expression is the return value. f x:n>n;a=*x 2;b=+a 1;*b b -- (x*2 + 1)^2 Bodies may also be written across multiple newline-separated lines, indented under the signature. The parser stays inside the same function body while it sees an open bracket (`[`, `(`, `{`) or a pipe operator continuation. This makes long literals and multi-line conditional pipelines readable without semicolons: f x:n>n a=*x 2 b=+a 1 *b b g>L n [10, 20, 30, 40, 50, 60, 70, 80] Statement separation reverts to standard rules once brackets close. A blank line ends the current declaration. [Multi-function files] Functions in a file are separated by **newlines**. The parser strips all newlines, so the token stream is flat. After parsing each function body, the parser uses the next newline-delimited boundary to start the next declaration. A non-last function body's **final expression must not be a bare variable reference (`Ref`) or a function call**, because the parser greedily reads following tokens as additional call arguments. Safe endings prevent this: Binary operator=`+n 0`, `*x 1`=✓=fixed arity - no greedy loop Index access=`xs.0`, `rec.field`=✓=returns `Expr::Index`, not `Ref` Match block=`?v{…}`=✓=ends with `}` ForEach block=`@x xs{…}`=✓=ends with `}` Parenthesised expr=`(x>>f>>g)`=✓=ends with `)` Record constructor=`point x:1 y:2`=✓=parses as `Expr::Record`, not `Ref` Text/number literal=`"ok"`, `42`=✓=literal, not `Ref` Bare variable (`Ref`)=`n`, `result`=✗=greedy loop fires Bare function call=`len xs`, `f a`=✗=greedy loop fires The **last function in a file** can end with anything - greedy parsing stops at EOF. -- Non-last functions: end with a binary expression digs n:n>n;t=str n;l=len t;+l 0 -- +l 0 = l (binary, safe) clmp n:n lo:n hi:n>n;n hi hi;+n 0 -- +n 0 = n (binary, safe; `clamp` is a builtin) -- Last function: bare call is fine sz xs:L n>n;len xs -- EOF - greedy loop stops naturally To use a pipe chain in a non-last function, wrap it in parentheses: dbl-inc x:n>n;(x>>dbl>>inc) -- parens prevent >> from consuming next function's name inc-sq x:n>n;x>>inc>>sq -- last function - no parens needed [DO / DON'T] -- DON'T: fac n:n>n;<=n 1 1;*n fac -n 1 -- ↑ *n sees fac as an atom operand, not a call -- DO: fac n:n>n;<=n 1 1;r=fac -n 1;*n r -- ↑ bind-first: call result goes into r, then *n r works -- DON'T: +fac -n 1 fac -n 2 -- ↑ + takes two operands; fac is just an atom ref -- DO: a=fac -n 1;b=fac -n 2;+a b -- ↑ bind both calls, then combine -ERROR DIAGNOSTICS: ilo verifies programs before execution and reports errors with stable codes, source context, and suggestions. [Error codes] Every error has a stable `ILO-` code. The letter is the namespace - the phase that raised the diagnostic - so agents and tools can route on prefix without parsing the message. Numeric ranges are reserved per namespace with generous gaps, so future codes slot in cleanly and the contract is forward-compatible. `ILO-L000-099`=L=Lexer / tokenisation=active `ILO-P100-199`=P=Parser / syntax=active `ILO-N200-299`=N=Names / resolution=reserved `ILO-I300-399`=I=Imports=reserved `ILO-T400-499`=T=Types=active `ILO-V500-599`=V=Verifier (post-type checks)=reserved `ILO-R600-699`=R=Runtime=active `ILO-D700-799`=D=Deprecation warnings=reserved `ILO-E800-899`=E=Engine-specific limitations=reserved `ILO-S900-999`=S=Skill / spec system=reserved **Historical codes.** ilo shipped with flat numbering inside each namespace - `ILO-L001`, `ILO-P001`, `ILO-T001`, `ILO-R001`, `ILO-W001`, all starting at 001. Those codes remain valid forever. The hundreds-block allocation above applies to new codes from now on, and a cross-engine regression test asserts every emitted code lives in a documented range. **Reserved namespaces.** `N`, `I`, `V`, `D`, `E`, `S` carry no codes today. They are forward declarations so the first code in each category slots into its own range without conflicting with the active namespaces. `D` is earmarked for deprecation warnings: when a feature is scheduled for removal it emits an `ILO-D7xx` warning at compile time without failing the build. Use `--explain` to see a detailed explanation: ilo --explain ILO-T004 [Source context] Errors point at the relevant source location with a caret: error[ILO-T005]: undefined function 'foo' (called with 1 args) --> 1:9 1 | f x:n>n;foo x = note: in function 'f' = suggestion: did you mean 'f'? Parser, verifier, and runtime errors all show source spans. The verifier uses the enclosing statement span as the best available location for expression-level errors. [Suggestions] The verifier provides context-aware hints: **Did you mean?** - Levenshtein-based suggestions for undefined variables, functions, fields, and types **Type conversion** - suggests `str` for n→t, `num` for t→n **Missing arms** - lists uncovered match patterns with types **Arity** - shows expected parameter signature [Error output formats] --ansi / -a ANSI colour (default for TTY) --text / -t Plain text (no colour) --json / -j JSON (default for piped output) --no-hints / -nh Suppress idiomatic hints NO_COLOR=1 Disable colour (same as --text) JSON error output follows a structured schema with `severity`, `code`, `message`, `labels` (with spans), `notes`, and `suggestion` fields. Runtime errors raised from the Cranelift JIT (opt-in via `--jit`) populate `labels` with the source span of the failing operation, matching tree and VM behaviour. Span coverage threads through every JIT runtime helper (unwrap, panic-unwrap, list-get, slice, index, jpth, mget, record-field strict access, builtin dispatch, dynamic call); AOT-compiled binaries inherit the same coverage. Pre-v0.11.6 builds surfaced `{"labels":[]}` for these shapes - if you see an empty labels array on a runtime error, the binary is out of date. AOT binaries also install an async-signal-safe handler in `ilo_aot_init` that catches fatal signals (SIGSEGV, SIGBUS, SIGFPE, SIGILL, SIGABRT) and writes a single JSON line on stderr identifying the signal before the process terminates with the conventional 128+signo exit code. The diagnostic uses `ILO-R015` (AOT runtime fault). Without the handler, a hard fault inside compiled native code would leave the process with raw signal exit (e.g. 139 for SIGSEGV) and no diagnostic — agents driving ilo couldn't distinguish a clean non-zero exit from a hard fault. A SIGSEGV from an AOT binary is always a bug in ilo (codegen or runtime helper); file an issue with the source program and the JSON line. AOT binaries also install an async-signal-safe handler in `ilo_aot_init` that catches fatal signals (SIGSEGV, SIGBUS, SIGFPE, SIGILL, SIGABRT) and writes a single JSON line on stderr identifying the signal before the process terminates with the conventional 128+signo exit code. The diagnostic uses `ILO-R015` (AOT runtime fault). Without the handler, a hard fault inside compiled native code would leave the process with raw signal exit (e.g. 139 for SIGSEGV) and no diagnostic — agents driving ilo couldn't distinguish a clean non-zero exit from a hard fault. A SIGSEGV from an AOT binary is always a bug in ilo (codegen or runtime helper); file an issue with the source program and the JSON line. [Top-level program output] For a program whose entry function returns a Result, the `~`/`^` wrapper is split across streams and exit codes so shell callers do not have to strip a prefix: `~v` (Ok)=`v` (bare)=-=0 `^e` (Err)=-=`^e`=1 any non-Result=`v`=-=0 In `--json` mode the value is always wrapped (`{"ok": v}` / `{"error": {...}}`) and emitted to stdout; exit codes match the plain-mode table. `Display` on `Value::Ok` / `Value::Err` still renders `~v` / `^e` in every other context (nested values, `prnt`, REPL prompts, error messages, debug output) - only the top-level program-return print path is split. The contract applies uniformly to in-process runners (`ilo prog.ilo`, `--run-tree`, `--run-vm`, `--jit`) and to AOT-compiled standalone binaries from `ilo compile`. Both strip the top-level `~`/`^` wrapper on stdout, route `^e` to stderr, and use the same exit codes - output is byte-for-byte identical across every backend. [Idiomatic hints] After successful execution, ilo scans the source for non-canonical forms and emits hints to stderr: hint: `==` → `=` saves 1 char (both mean equality in ilo) hint: `length` → `len` (canonical short form) Builtin alias hints appear at most once per program (the first long-form name found). In JSON mode, hints appear as `{"hints":["..."]}` on stderr. Suppress with `--no-hints` / `-nh`. [CLI invocation] ilo 'code' [args...] -- inline program; default-runs the entry function ilo program.ilo [func] [args] -- if `func` is omitted and the file declares exactly one function, that function runs automatically ilo run program.ilo [func] [a] -- verb form; same dispatch as the bare positional ilo check program.ilo [--json] -- run the verifier without executing (exit 0 = clean) ilo build program.ilo -o out -- AOT compile to a standalone binary (alias for `compile`) ilo program.ilo --ast -- print parsed AST as JSON and exit ilo --explain ILO-T004 -- print error explanation and exit ilo help ai -- compact AI spec to stdout (= contents of ai.txt) ilo serv -- long-lived JSON request/response loop **Verb-noun aliases.** `ilo run ` is an exact alias for the bare positional `ilo ` - same dispatch, same engine selection, same arg handling. `ilo build -o ` is an alias for `ilo compile -o `. Both exist to match the toolchain conventions used by `cargo`, `go`, and `zero` so agents and humans can guess the command name without consulting the help text. The bare positional forms remain fully supported for backwards compatibility; nothing has been removed. **`ilo check`.** Standalone verifier invocation: lex, parse, resolve imports, and run the type verifier without proceeding to bytecode compilation or execution. Exit code 0 means the program is well-typed and verifier-clean; exit code 1 means at least one diagnostic was emitted on stderr. The output mode follows the global flags (`--json` for NDJSON diagnostics, `--text` for plain text, `--ansi` for coloured output; auto-detected when omitted - JSON when stderr is not a TTY, ANSI otherwise). `ilo check` works on both files and inline code; on a syntactically-broken input it still reports the parse error rather than crashing, which is important for editor and agent loops that may feed in half-written programs. **Default-run.** Inline programs (`ilo 'code'`) and single-function files run their entry function with the remaining CLI args; no explicit function name needed. Multi-function files auto-pick a function called `main` when no positional func arg is supplied. The same heuristic applies to the explicit engine flags - `--run-tree`, `--run-vm`, and `--jit` all auto-pick `main` on multi-fn files, matching the default-engine behaviour. With no `main` declared, supply a function-name argument. **AOT entry-pick.** `ilo compile file.ilo -o out` (alias `ilo build`) follows the same entry-pick rules as the in-process engines: a single user-defined function is used directly; on multi-function files the entry is `main` if defined, otherwise the explicit positional `func` arg (`ilo compile file.ilo -o out run`); otherwise the compile fails with `ILO-E801` and exits 1 without writing a binary. AOT does not fall back to "first declared function" - that historical default produced binaries that called the wrong entry symbol and SIGSEGV'd at runtime. **Default engine.** The bytecode register VM is the default execution path. It supports every opcode (closures with Phase 2 capture, listview windows, fused len-of-filter, every modern shape), and avoids the JIT compile-and-bail cost paid by the pre-v0.11.9 Cranelift-first default whenever a program touched an opcode the JIT couldn't handle. Cranelift JIT is opt-in via `--jit`; on opt-in, the JIT runs hot numeric loops and falls back to the VM on bailout. The tree interpreter (`--run-tree`) remains the canonical-semantics reference. Phase 2 captures run natively on tree, VM, and JIT - no engine fallback needed. For long-running workloads where the JIT pays for itself, opt in explicitly; for most agent workloads the VM is the right default. **Subcommand dispatch.** The first positional argument is interpreted as a function name when it has the shape of an ilo identifier - `[a-z][a-z0-9]*(-[a-z0-9]+)*` - so `ilo file.ilo list-orders` routes to the `list-orders` function. Args that don't match the ident shape (file paths like `/tmp/data.json`, numbers, sigils, bracketed lists, anything with a `.` or `/`) route to `main` (or the entry function) as a positional CLI arg instead. Trailing dashes (`foo-`), doubled dashes (`foo--bar`), and negative numbers (`-1`) are not idents and pass through as data. **Unknown `--flag` guard.** Any token in the positional tail matching the clean long-flag shape `--word` or `--word-with-dashes` that isn't a recognised flag is rejected upfront with `error: unrecognised flag '--'. Use 'ilo --help' for valid flags. To pass it as a literal arg, separate with '--' first.` and exit 1. This prevents `ilo main.ilo --engine tree` from silently consuming `--engine` as a positional arg (which used to surface as misleading `ILO-R012 no functions defined` or `ILO-R004 main: expected N args, got N+1`). To pass a hyphen-prefixed token through as literal data, place the `--` separator first: `ilo main.ilo -- --foo`. Anything after the first `--` is data. Tokens with `=` (`--key=val`), trailing or doubled dashes (`--foo-`, `--foo--bar`), and negative numbers (`-1`) are not clean flag shapes and pass through unchanged. **Text-typed params.** When the entry function declares a parameter of type `t`, the CLI passes the raw arg through without numeric coercion. `ilo 'f x:t>t;x' 42` returns the string `"42"`, not the number 42. **Exit codes.** A program returning `Value::Err` (or `^reason` from the entry function) exits with code 1 and prints the err payload on stderr. `~v` (Ok) and any non-Result return value exit 0. Verifier and parser errors exit 2. **List args from the CLI.** Comma-separated args become `L n` or `L t` automatically: `ilo 'f xs:L n>n;sum xs' 1,2,3`. +ERROR DIAGNOSTICS: ilo verifies programs before execution and reports errors with stable codes, source context, and suggestions. [Error codes] Every error has a stable `ILO-` code. The letter is the namespace - the phase that raised the diagnostic - so agents and tools can route on prefix without parsing the message. Numeric ranges are reserved per namespace with generous gaps, so future codes slot in cleanly and the contract is forward-compatible. `ILO-L000-099`=L=Lexer / tokenisation=active `ILO-P100-199`=P=Parser / syntax=active `ILO-N200-299`=N=Names / resolution=reserved `ILO-I300-399`=I=Imports=reserved `ILO-T400-499`=T=Types=active `ILO-V500-599`=V=Verifier (post-type checks)=reserved `ILO-R600-699`=R=Runtime=active `ILO-D700-799`=D=Deprecation warnings=reserved `ILO-E800-899`=E=Engine-specific limitations=reserved `ILO-S900-999`=S=Skill / spec system=reserved **Historical codes.** ilo shipped with flat numbering inside each namespace - `ILO-L001`, `ILO-P001`, `ILO-T001`, `ILO-R001`, `ILO-W001`, all starting at 001. Those codes remain valid forever. The hundreds-block allocation above applies to new codes from now on, and a cross-engine regression test asserts every emitted code lives in a documented range. **Reserved namespaces.** `N`, `I`, `V`, `D`, `E`, `S` carry no codes today. They are forward declarations so the first code in each category slots into its own range without conflicting with the active namespaces. `D` is earmarked for deprecation warnings: when a feature is scheduled for removal it emits an `ILO-D7xx` warning at compile time without failing the build. Use `--explain` to see a detailed explanation: ilo --explain ILO-T004 [Source context] Errors point at the relevant source location with a caret: error[ILO-T005]: undefined function 'foo' (called with 1 args) --> 1:9 1 | f x:n>n;foo x = note: in function 'f' = suggestion: did you mean 'f'? Parser, verifier, and runtime errors all show source spans. The verifier uses the enclosing statement span as the best available location for expression-level errors. [Suggestions] The verifier provides context-aware hints: **Did you mean?** - Levenshtein-based suggestions for undefined variables, functions, fields, and types **Type conversion** - suggests `str` for n→t, `num` for t→n **Missing arms** - lists uncovered match patterns with types **Arity** - shows expected parameter signature [Error output formats] --ansi / -a ANSI colour (default for TTY) --text / -t Plain text (no colour) --json / -j JSON (default for piped output) --no-hints / -nh Suppress idiomatic hints NO_COLOR=1 Disable colour (same as --text) JSON error output follows a structured schema with `severity`, `code`, `message`, `labels` (with spans), `notes`, and `suggestion` fields. Runtime errors raised from the Cranelift JIT (opt-in via `--jit`) populate `labels` with the source span of the failing operation, matching tree and VM behaviour. Span coverage threads through every JIT runtime helper (unwrap, panic-unwrap, list-get, slice, index, jpth, mget, record-field strict access, builtin dispatch, dynamic call); AOT-compiled binaries inherit the same coverage. Pre-v0.11.6 builds surfaced `{"labels":[]}` for these shapes - if you see an empty labels array on a runtime error, the binary is out of date. AOT binaries also install an async-signal-safe handler in `ilo_aot_init` that catches fatal signals (SIGSEGV, SIGBUS, SIGFPE, SIGILL, SIGABRT) and writes a single JSON line on stderr identifying the signal before the process terminates with the conventional 128+signo exit code. The diagnostic uses `ILO-R015` (AOT runtime fault). Without the handler, a hard fault inside compiled native code would leave the process with raw signal exit (e.g. 139 for SIGSEGV) and no diagnostic — agents driving ilo couldn't distinguish a clean non-zero exit from a hard fault. A SIGSEGV from an AOT binary is always a bug in ilo (codegen or runtime helper); file an issue with the source program and the JSON line. AOT binaries also install an async-signal-safe handler in `ilo_aot_init` that catches fatal signals (SIGSEGV, SIGBUS, SIGFPE, SIGILL, SIGABRT) and writes a single JSON line on stderr identifying the signal before the process terminates with the conventional 128+signo exit code. The diagnostic uses `ILO-R015` (AOT runtime fault). Without the handler, a hard fault inside compiled native code would leave the process with raw signal exit (e.g. 139 for SIGSEGV) and no diagnostic — agents driving ilo couldn't distinguish a clean non-zero exit from a hard fault. A SIGSEGV from an AOT binary is always a bug in ilo (codegen or runtime helper); file an issue with the source program and the JSON line. [Top-level program output] For a program whose entry function returns a Result, the `~`/`^` wrapper is split across streams and exit codes so shell callers do not have to strip a prefix: `~v` (Ok)=`v` (bare)=-=0 `^e` (Err)=-=`^e`=1 any non-Result=`v`=-=0 In `--json` mode the value is always wrapped (`{"ok": v}` / `{"error": {...}}`) and emitted to stdout; exit codes match the plain-mode table. `Display` on `Value::Ok` / `Value::Err` still renders `~v` / `^e` in every other context (nested values, `prnt`, REPL prompts, error messages, debug output) - only the top-level program-return print path is split. The contract applies uniformly to in-process runners (`ilo prog.ilo`, `--run-tree`, `--run-vm`, `--jit`) and to AOT-compiled standalone binaries from `ilo compile`. Both strip the top-level `~`/`^` wrapper on stdout, route `^e` to stderr, and use the same exit codes - output is byte-for-byte identical across every backend. [Idiomatic hints] After successful execution, ilo scans the source for non-canonical forms and emits hints to stderr: hint: `==` → `=` saves 1 char (both mean equality in ilo) hint: `length` → `len` (canonical short form) Builtin alias hints appear at most once per program (the first long-form name found). In JSON mode, hints appear as `{"hints":["..."]}` on stderr. Suppress with `--no-hints` / `-nh`. [CLI invocation] ilo 'code' [args...] -- inline program; default-runs the entry function ilo program.ilo [func] [args] -- if `func` is omitted and the file declares exactly one function, that function runs automatically ilo run program.ilo [func] [a] -- verb form; same dispatch as the bare positional ilo check program.ilo [--json] -- run the verifier without executing (exit 0 = clean) ilo build program.ilo -o out -- AOT compile to a standalone binary (alias for `compile`) ilo program.ilo --ast -- print parsed AST as JSON and exit ilo --explain ILO-T004 -- print error explanation and exit ilo help ai -- compact AI spec to stdout (= contents of ai.txt) ilo serv -- long-lived JSON request/response loop **Verb-noun aliases.** `ilo run ` is an exact alias for the bare positional `ilo ` - same dispatch, same engine selection, same arg handling. `ilo build -o ` is an alias for `ilo compile -o `. Both exist to match the toolchain conventions used by `cargo`, `go`, and `zero` so agents and humans can guess the command name without consulting the help text. The bare positional forms remain fully supported for backwards compatibility; nothing has been removed. **`ilo check`.** Standalone verifier invocation: lex, parse, resolve imports, and run the type verifier without proceeding to bytecode compilation or execution. Exit code 0 means the program is well-typed and verifier-clean; exit code 1 means at least one diagnostic was emitted on stderr. The output mode follows the global flags (`--json` for NDJSON diagnostics, `--text` for plain text, `--ansi` for coloured output; auto-detected when omitted - JSON when stderr is not a TTY, ANSI otherwise). `ilo check` works on both files and inline code; on a syntactically-broken input it still reports the parse error rather than crashing, which is important for editor and agent loops that may feed in half-written programs. **Default-run.** Inline programs (`ilo 'code'`) and single-function files run their entry function with the remaining CLI args; no explicit function name needed. Multi-function files auto-pick a function called `main` when no positional func arg is supplied. The same heuristic applies to the explicit engine flags - `--run-tree`, `--run-vm`, and `--jit` all auto-pick `main` on multi-fn files, matching the default-engine behaviour. With no `main` declared, supply a function-name argument. **AOT entry-pick.** `ilo compile file.ilo -o out` (alias `ilo build`) follows the same entry-pick rules as the in-process engines: a single user-defined function is used directly; on multi-function files the entry is `main` if defined, otherwise the explicit positional `func` arg (`ilo compile file.ilo -o out run`); otherwise the compile fails with `ILO-E801` and exits 1 without writing a binary. AOT does not fall back to "first declared function" - that historical default produced binaries that called the wrong entry symbol and SIGSEGV'd at runtime. **Default engine.** The bytecode register VM is the default execution path. It supports every opcode (closures with Phase 2 capture, listview windows, fused len-of-filter, every modern shape), and avoids the JIT compile-and-bail cost paid by the pre-v0.11.9 Cranelift-first default whenever a program touched an opcode the JIT couldn't handle. Cranelift JIT is opt-in via `--jit`; on opt-in, the JIT runs hot numeric loops and falls back to the VM on bailout. The tree interpreter (`--run-tree`) remains the canonical-semantics reference. Phase 2 captures run natively on every engine - tree, VM, JIT, and AOT (`ilo compile`); AOT embeds the `CompiledProgram` blob into the binary's `.rodata` so dispatch helpers can re-enter the VM on user-fn callbacks the same way the in-process runners do. For long-running workloads where the JIT pays for itself, opt in explicitly; for most agent workloads the VM is the right default. **Subcommand dispatch.** The first positional argument is interpreted as a function name when it has the shape of an ilo identifier - `[a-z][a-z0-9]*(-[a-z0-9]+)*` - so `ilo file.ilo list-orders` routes to the `list-orders` function. Args that don't match the ident shape (file paths like `/tmp/data.json`, numbers, sigils, bracketed lists, anything with a `.` or `/`) route to `main` (or the entry function) as a positional CLI arg instead. Trailing dashes (`foo-`), doubled dashes (`foo--bar`), and negative numbers (`-1`) are not idents and pass through as data. **Unknown `--flag` guard.** Any token in the positional tail matching the clean long-flag shape `--word` or `--word-with-dashes` that isn't a recognised flag is rejected upfront with `error: unrecognised flag '--'. Use 'ilo --help' for valid flags. To pass it as a literal arg, separate with '--' first.` and exit 1. This prevents `ilo main.ilo --engine tree` from silently consuming `--engine` as a positional arg (which used to surface as misleading `ILO-R012 no functions defined` or `ILO-R004 main: expected N args, got N+1`). To pass a hyphen-prefixed token through as literal data, place the `--` separator first: `ilo main.ilo -- --foo`. Anything after the first `--` is data. Tokens with `=` (`--key=val`), trailing or doubled dashes (`--foo-`, `--foo--bar`), and negative numbers (`-1`) are not clean flag shapes and pass through unchanged. **Text-typed params.** When the entry function declares a parameter of type `t`, the CLI passes the raw arg through without numeric coercion. `ilo 'f x:t>t;x' 42` returns the string `"42"`, not the number 42. **Exit codes.** A program returning `Value::Err` (or `^reason` from the entry function) exits with code 1 and prints the err payload on stderr. `~v` (Ok) and any non-Result return value exit 0. Verifier and parser errors exit 2. **List args from the CLI.** Comma-separated args become `L n` or `L t` automatically: `ilo 'f xs:L n>n;sum xs' 1,2,3`. FORMATTER: Dense output is the default - newlines are for humans, not agents. No flag needed for dense format: ilo 'code' Dense wire format (default) ilo 'code' --dense / -d Same, explicit ilo 'code' --expanded / -e Expanded human format (for code review) [Dense format] Single line per declaration, minimal whitespace. Operators glue to first operand: cls sp:n>t;>=sp 1000{"gold"};>=sp 500{"silver"};"bronze" [Expanded format] Multi-line with 2-space indentation. Operators spaced from operands: cls sp:n > t >= sp 1000 { "gold" } >= sp 500 { "silver" } "bronze" Dense format is canonical - `dense(parse(dense(parse(src)))) == dense(parse(src))`. COMPLETE EXAMPLE: tool get-user"Retrieve user by ID" uid:t>R profile t timeout:5,retry:2 tool send-email"Send an email" to:t subject:t body:t>R _ t timeout:10,retry:1 type profile{id:t;name:t;email:t;verified:b} ntf uid:t msg:t>R _ t;get-user uid;?{^e:^+"Lookup failed: "e;~d:!d.verified{^"Email not verified"};send-email d.email "Notification" msg;?{^e:^+"Send failed: "e;~_:~_}} [Recursive Example] Factorial and Fibonacci as standalone functions: fac n:n>n;<=n 1 1;r=fac -n 1;*n r fib n:n>n;<=n 1 n;a=fib -n 1;b=fib -n 2;+a b diff --git a/skills/ilo/ilo-engines.md b/skills/ilo/ilo-engines.md index 62bddb75..5fa5576e 100644 --- a/skills/ilo/ilo-engines.md +++ b/skills/ilo/ilo-engines.md @@ -11,10 +11,10 @@ Four backends. Default (`ilo file.ilo`) is the register VM; covers ~all programs | Engine | Flag | Speed | Notes | |--|--|--|--| -| Tree-walk | `--run-tree` | 1x | Feature-complete. Required for capturing lambdas. | -| VM | `--run-vm` | 10-100x | Default. Captures auto fall-back. | -| Cranelift JIT | `--jit` | 100-1000x | Opt-in for hot numeric loops; bails to VM on unsupported. | -| Cranelift AOT | `ilo compile` | 100-1000x | Standalone native (~9 MB). | +| Tree-walk | `--run-tree` | 1x | Canonical-semantics reference. | +| VM | `--run-vm` | 10-100x | Default. Native closures. | +| Cranelift JIT | `--jit` | 100-1000x | Opt-in for hot numeric loops; VM fallback on bailout. | +| Cranelift AOT | `ilo compile` | 100-1000x | Standalone native (~9.7 MB). Native closures via embedded `CompiledProgram`. | | LLVM JIT | `--run-llvm` | ~Cranelift | Behind `llvm` feature. Rarely needed. | `--run` aliases `--run-tree`. @@ -28,7 +28,7 @@ Four backends. Default (`ilo file.ilo`) is the register VM; covers ~all programs ## Feature matrix -All four support core ops, lists/maps/records/sums, HOFs, lambdas (with or without captures), Results, HTTP, JSON, file I/O, MCP and HTTP tools. AOT miscompiles HOFs taking function values; use `--run-vm` for that case. +All four support core ops, lists/maps/records/sums, HOFs, lambdas (with or without captures), Results, HTTP, JSON, file I/O, MCP and HTTP tools. ## Benchmarking From 06322cdcf79b62772347b91c7f0626367f913449 Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Tue, 19 May 2026 18:00:10 +0100 Subject: [PATCH 6/8] docs: regenerate ai.txt from SPEC.md after rebase The post-rebase SPEC.md text is slightly different from the version that produced the dcf9b98 ai.txt - build.rs regenerates ai.txt from SPEC.md on every cargo build, and CI fails if ai.txt drifts. Re-run of cargo build produced this diff. --- ai.txt | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/ai.txt b/ai.txt index 5675ffb1..0027d4a4 100644 --- a/ai.txt +++ b/ai.txt @@ -1,6 +1,6 @@ INTRO: ilo is a token-optimised programming language for AI agents. Every design choice is evaluated against total token cost: generation + retries + context loading. FUNCTIONS: : ...>; No parens around params - `>` separates params from return type `;` separates statements - no newlines required Last expression is the return value (no `return` keyword) Zero-arg call: `make-id()` tot p:n q:n r:n>n;s=*p q;t=*s r;+s t -TYPES: `n`=number (f64) `t`=text (string) `b`=bool `_`=any/unknown (wildcard type) `L n`=list of number `R n t`=result: ok=number, err=text `O n`=optional number (nil or n) `M t n`=map from text keys to numbers `S red green blue`=sum type - one of named text variants `F n t`=function type: takes n, returns t (used in HOF params) `order`=named type `a`=type variable - any single lowercase letter except n, t, b [Optional (`O T`)] `O T` accepts either `nil` or a value of type `T`. f x:O n>n;??x 0 -- unwrap optional or default to 0 g>O n;nil -- returns nil (valid O n) h>O n;42 -- returns 42 (valid O n) `??x default` - nil-coalesce: returns `x` if non-nil, else `default`. Unwraps `O T` to `T`. [Sum types (`S a b c`)] Closed set of named text variants. Verifier-enforced; runtime value is always `t`. color x:S red green blue > t ?x{red:"ff0000";green:"00ff00";blue:"0000ff"} Sum types are compatible with `t` - a sum value can be passed to any `t` parameter. [Map type (`M k v`)] Dynamic key-value collection. Keys are typed: text (`t`) or integer (`n`). `Int(1)` and `Text("1")` are distinct keys. mmap -- empty map mset m k v -- return new map with key k set to v mget m k -- value at key k, or nil mhas m k -- b: true if key exists mkeys m -- L t: sorted list of keys mvals m -- L v: values sorted by key mdel m k -- return new map with key k removed len m -- number of entries Numeric keys work directly - no `str` conversion needed. Float keys floor to `i64` at the builtin boundary (matching `at xs i`); NaN/Infinity raise at runtime. idx=mmap idx=mset idx 7 "seven" -- M n t, integer key mget idx 7 -- "seven" mhas idx 7 -- true mhas idx "7" -- false (Int and Text are distinct) `jdmp` stringifies numeric keys for JSON output (JSON object keys are always strings). The round-trip via `jpar` is lossy - numeric keys come back as text. Example: scores>M t n m=mmap m=mset m "alice" 99 m=mset m "bob" 87 mget m "alice" -- 99 [Type variables] A single lowercase letter (other than `n`, `t`, `b`) in type position is a type variable, treated as `unknown` during verification. Used for higher-order function signatures: identity x:a>a;x apply f:F a a x:a>a;f x Type variables provide weak generics - the verifier accepts any type for `a` without consistency checking across call sites. [Inline lambdas] Pass a function literal directly to a HOF instead of defining a one-off top-level helper: by-dist xs:L n>L n;srt (x:n>n;abs x) xs nonempty ws:L t>L t;flt (s:t>b;>(len s) 0) ws sumsq xs:L n>n;fld (a:n x:n>n;+a *x x) xs 0 Syntax: `(: ...>;)`. Same shape as a top-level function declaration, wrapped in parens, no name. **Phase 1 (no captures)** lifts the literal to a synthetic top-level decl and works across every engine (tree, VM, Cranelift JIT, AOT). The body's free variables must all be params, locals defined inside the lambda body, or known top-level fns. **Phase 2 (closure capture)** lets the body reference variables from the enclosing scope: f xs:L n thr:n>L n;flt (x:n>b;>x thr) xs -- captures `thr` Phase 2 runs on every engine - tree, VM, Cranelift JIT, and AOT. The VM and JIT lower captures natively via `OP_MAKE_CLOSURE`; AOT additionally embeds the `CompiledProgram` so dispatch helpers can re-enter the VM on user-fn callbacks. The ctx-arg form (`srt fn ctx xs`) remains the lightest cross-engine alternative for capturing state when you'd rather not pay for a closure-build. +TYPES: `n`=number (f64) `t`=text (string) `b`=bool `_`=any/unknown (wildcard type) `L n`=list of number `R n t`=result: ok=number, err=text `O n`=optional number (nil or n) `M t n`=map from text keys to numbers `S red green blue`=sum type - one of named text variants `F n t`=function type: takes n, returns t (used in HOF params) `order`=named type `a`=type variable - any single lowercase letter except n, t, b [Optional (`O T`)] `O T` accepts either `nil` or a value of type `T`. f x:O n>n;??x 0 -- unwrap optional or default to 0 g>O n;nil -- returns nil (valid O n) h>O n;42 -- returns 42 (valid O n) `??x default` - nil-coalesce: returns `x` if non-nil, else `default`. Unwraps `O T` to `T`. [Sum types (`S a b c`)] Closed set of named text variants. Verifier-enforced; runtime value is always `t`. color x:S red green blue > t ?x{red:"ff0000";green:"00ff00";blue:"0000ff"} Sum types are compatible with `t` - a sum value can be passed to any `t` parameter. [Map type (`M k v`)] Dynamic key-value collection. Keys are typed: text (`t`) or integer (`n`). `Int(1)` and `Text("1")` are distinct keys. mmap -- empty map mset m k v -- return new map with key k set to v mget m k -- value at key k, or nil mhas m k -- b: true if key exists mkeys m -- L t: sorted list of keys mvals m -- L v: values sorted by key mdel m k -- return new map with key k removed len m -- number of entries Numeric keys work directly - no `str` conversion needed. Float keys floor to `i64` at the builtin boundary (matching `at xs i`); NaN/Infinity raise at runtime. idx=mmap idx=mset idx 7 "seven" -- M n t, integer key mget idx 7 -- "seven" mhas idx 7 -- true mhas idx "7" -- false (Int and Text are distinct) `jdmp` stringifies numeric keys for JSON output (JSON object keys are always strings). The round-trip via `jpar` is lossy - numeric keys come back as text. Example: scores>M t n m=mmap m=mset m "alice" 99 m=mset m "bob" 87 mget m "alice" -- 99 [Type variables] A single lowercase letter (other than `n`, `t`, `b`) in type position is a type variable, treated as `unknown` during verification. Used for higher-order function signatures: identity x:a>a;x apply f:F a a x:a>a;f x Type variables provide weak generics - the verifier accepts any type for `a` without consistency checking across call sites. [Inline lambdas] Pass a function literal directly to a HOF instead of defining a one-off top-level helper: by-dist xs:L n>L n;srt (x:n>n;abs x) xs nonempty ws:L t>L t;flt (s:t>b;>(len s) 0) ws sumsq xs:L n>n;fld (a:n x:n>n;+a *x x) xs 0 Syntax: `(: ...>;)`. Same shape as a top-level function declaration, wrapped in parens, no name. **Phase 1 (no captures)** lifts the literal to a synthetic top-level decl and works across every engine (tree, VM, Cranelift JIT, AOT). The body's free variables must all be params, locals defined inside the lambda body, or known top-level fns. **Phase 2 (closure capture)** lets the body reference variables from the enclosing scope: f xs:L n thr:n>L n;flt (x:n>b;>x thr) xs -- captures `thr` Phase 2 captures run natively on every engine: the tree interpreter, the register VM, the Cranelift JIT, and the Cranelift AOT backend. Each free variable is snapshot by value at the call site (`Expr::MakeClosure`) and appended to the call frame's arg slice on dispatch. The AOT backend additionally embeds the postcard-serialised `CompiledProgram` into the binary's `.rodata` and publishes TLS pointers on startup, so dispatch helpers can re-enter the VM on user-fn callbacks. The ctx-arg form (`srt fn ctx xs`) remains the cross-engine alternative when you want explicit state without forming a closure. NAMING: Short names everywhere. 1–3 chars. `order`=`ord`=truncate `customers`=`cs`=consonants `data`=`d`=single letter `level`=`lv`=drop vowels `discount`=`dc`=initials `final`=`fin`=first 3 `items`=`its`=first 3 Function names follow the same rules. Field names in constructors and external tool names keep their full form - they define the public interface. [Identifier syntax] Identifiers are lowercase ASCII only, optionally with hyphenated segments. Formally: `[a-z][a-z0-9]*(-[a-z0-9]+)*`. Capital letters and underscores are rejected at the binding and call site. run -- OK run-d -- OK (hyphen separates segments) r2 -- OK (digit after first letter) runD -- ERROR (capital letter) RunD -- ERROR (leading capital) run_d -- ERROR (underscore not allowed in bindings) -run -- ERROR (must start with a letter) `runD` in the interactive CLI surfaces as `ILO-L003 unexpected token` with a suggestion to use `run-d` or `rund`. The constraint is intentional: a single lexical shape per identifier keeps the token stream predictable for agents and avoids style debates over camelCase vs snake_case vs kebab-case. The only place capital letters and underscores are accepted is **after `.` or `.?`** at field-access position, so heterogeneous JSON keys from real APIs work without rewriting. See [Field names at dot-access](#field-names-at-dot-access) for the full list of post-dot relaxations (`r.URL`, `r.AccessKey`, `r.user_name`, etc.). Binding names (`AccessKey = ...`) and function names (`AccessKey x:n>n;...`) still error. [Reserved words] The following identifiers are reserved and cannot be used as names: `if`, `return`, `let`, `fn`, `def`, `var`, `const`. Using them produces a friendly error with the ilo equivalent: -- ERROR: `if` is a reserved word. Use: ?cond{true:... false:...} -- ERROR: `return` is a reserved word. Last expression is the return value. -- ERROR: `let` is a reserved word. Use: name = expr -- ERROR: `fn`/`def` is a reserved word. Use: name param:type > rettype; body Builtin names (`flat`, `frq`, `map`, `flt`, `cat`, `len`, `srt`, `hd`, `tl`, `ord`, `fld`, `lst`, ...) are also rejected as user-function names and as local-binding LHS. Without this, calls to the user fn or use sites of the local binding silently mis-dispatch to the builtin and surface as a confusing `ILO-T006` arity mismatch. The parser intercepts at the declaration site with ILO-P011 and a rename hint: flat n:n>n;n -- ERROR ILO-P011: `flat` is a builtin and cannot be used as a function name -- hint: rename to something like `myflat` or `flatof`. main>n;flat=cat xs " ";spl flat ". " -- ERROR ILO-P011: `flat` is a builtin and cannot be used as a binding name -- hint: rename to something like `myflat` or `flatv`. [Reserved namespaces] Short builtin names are precious surface and ilo reserves a stable subset of them. To save agents (and their carry-forward scripts) from "what got reserved this release?" debugging cycles, the language publishes the full short-name reserve list plus a forward-compatibility rule for future builtins. **Currently reserved short names (1-3 characters).** Every name in this list is a builtin today and triggers `ILO-P011` if used as a binding or user-function name: 2-char at hd tl rd wr ct 3-char abs avg cap cat cel chr cos det dot env exp fft fld flr flt fmt frq get grp has inv len log lsd lst lwr map max min mod now num ord pow pst rdb rdl rev rgx rng rnd rou run sin slc spl srt str sum tan trm unq upr wrl zip `rng` is the short-form alias for the canonical `range` builtin; it is reserved with the same shadow-prevention semantics as a canonical builtin name (binding `rng=...` or declaring `rng x:...` fires `ILO-P011`). Longer builtin names (`acos`, `asin`, `atan`, `flat`, `take`, `drop`, `mget`, `mset`, `mmap`, `prnt`, `mapr`, `solve`, `clamp`, `cumsum`, `median`, `matmul`, `range`, `window`, `chunks`, `walk`, `glob`, …) are also reserved and rejected by `ILO-P011`, but the short-name namespace above is where carry-forward scripts most often collide, so it gets explicit enumeration. **Forward-compatibility rule.** Future ilo releases add new builtins under names **4 characters or longer**. A 2-character name that is not on this list today is safe to use as a binding or function name and stays safe across releases. A 3-character name that is not on this list is _highly likely_ to stay safe but is not a hard promise - the 3-char surface is already dense, and a rare ergonomic win may justify an addition, called out in the changelog. This gives agents a deterministic safe-name strategy: **2 chars**: any unreserved 2-char name is permanently fine for bindings (`ce` for "category", `ix` for index, `mn` for "mean", `pq` for "priority queue", …). Names on the reserved list above never get removed. **3 chars**: prefer unreserved 3-char names where possible. If a future release reserves one, the migration is a 1-character rename plus a changelog entry. **4+ chars**: always safe. New builtins land here first; any short alias is added later only if the long name is unambiguous and the short doesn't shadow a plausible user binding. When a collision does happen, `ILO-P011` surfaces it at the binding site with a rename suggestion - never silently mis-dispatches at the call site (see the `flat=cat xs " "` example above). Combined with the reserve list, that turns every name-collision incident into a single-character rename instead of a debugging spiral. [Cross-language gotchas] Common shapes reached for from other languages. The parser and lexer surface each with a friendly hint: `AND a b`, `OR a b`, `NOT a`=`&a b`, `|a b`, `!a`=`ILO-L001` `=a b`=`<=a b`, `>=a b` (single token)=`ILO-P003` `f=fn x:n>n;+x 1` (lambda)=`(x:n>n;+x 1)` (parenthesised lambda)=`ILO-P009` `\x{+x 1}` (Haskell/Rust lambda)=`(x:n>n;+x 1)` (parenthesised lambda)=`ILO-L001` `main:>n;body`=`main>n;body` (no `:` before `>`)=`ILO-P003` Multi-line body without braces=`@k xs{body}`, `cond{body}` on one line=`ILO-P003` `cond{^"err"}` braced-cond=Braceless `cond ^"err"` for early return=hint only `- -*a b *c d` (double-minus)=`- 0 +*a b *c d` (negate the sum)=`ILO-P021` `[k fmt2 v 2]` (call in list)=`[k (fmt2 v 2)]` or bind-first=`ILO-P101` Each case fires a hint pointing at the canonical form; the agent's first retry should be the right one. Identifier-shaped collisions with builtin names (`len=...`, `sin=...`) are rejected with `ILO-P011` plus a rename suggestion. The list-literal call trap (`ILO-P101`) catches the case where a variadic builtin (`fmt`, `fmt2`) appears bare inside `[...]`. Fixed-arity builtins (`str`, `at`, `map`, ...) auto-expand to a call as one element, but variadic ones can't (the parser doesn't know where their args end), so the bare form would silently fall through as multiple elements with the builtin name as an undefined Ref. Fix by wrapping the call in parens (`[k (fmt2 v 2)]`) or binding first. The double-minus trap (`ILO-P021`) catches the silent-miscompile shape `- - a b c d` for `` in `{+,*,/}`. Read intuitively as `-(a*b) - (c*d)` but parses as `-((a*b) - (c*d)) = -(a*b) + (c*d)` because the inner `-` greedily consumes both prefix-binop groups as binary subtract and the outer `-` falls back to unary negate. Fix by negating the sum (`- 0 +*a b *c d`) or binding first (`p=*a b;q=*c d;- 0 +p q`). Single-atom variants like `- -a b` remain accepted since they're unambiguous. COMMENTS: -- full line comment +a b -- end of line comment -- no multi-line comments; use consecutive -- lines -- like this Single-line only. `--` to end of line. No multi-line comment syntax - newlines are a human display concern, not a language concern. An entire ilo program can be one line. Use consecutive `--` lines when humans need multi-line comments. Stripped at the lexer level before parsing - comments produce no AST nodes and cost zero runtime tokens. Generating `--` costs 1 LLM token, so comments are essentially free. **Gotcha:** `--x 1` is a comment, not "negate (x minus 1)". The lexer matches `--` greedily as a comment and eats the rest of the line. To negate a subtraction, use a space or bind first: -- DON'T: --x 1 (comment, not negate-subtract) -- DO: - -x 1 (space separates the two minus operators) -- DO: r=-x 1;-r (bind first) OPERATORS: Both prefix and infix notation are supported. **Prefix is preferred** - it is the token-optimal form that eliminates parentheses and produces denser code. Infix is available for readability when needed. [Binary] `+a b`=`a + b`=add / concat / list concat=`n`, `t`, `L` `+=a v`=append to list (returns new list, see [Append semantics](#append-semantics-+=))=`L` `-a b`=`a - b`=subtract=`n` `*a b`=`a * b`=multiply=`n` `/a b`=`a / b`=divide=`n` `=a b`=`a == b`=equal (prefix `=` is preferred; `==a b` also accepted)=any `!=a b`=`a != b`=not equal=any `>a b`=`a > b`=greater than=`n`, `t` `=a b`=`a >= b`=greater or equal=`n`, `t` `<=a b`=`a <= b`=less or equal=`n`, `t` `&a b`=`a & b`=logical AND (short-circuit)=any (truthy) `|a b`=`a | b`=logical OR (short-circuit)=any (truthy) [Append semantics (`+=`)] `+=xs v` is **pure-shaped**, despite the imperative-looking syntax. It returns a new list with `v` appended and does **not** mutate `xs` in the caller's scope. It works in every position a value-producing expression works: -- 1. Rebind (canonical accumulator pattern) xs=[];@i 0..3{xs=+=xs i};xs -- [0, 1, 2] -- 2. Non-rebind assignment (xs preserved) xs=[1, 2, 3];ys=+=xs 99 -- xs is still [1, 2, 3]; ys is [1, 2, 3, 99] -- 3. Pipeline / argument position len +=xs 99 -- length of [xs..., 99] sum +=xs 99 -- sum of [xs..., 99] The rebind shape `xs = +=xs v` is the standard foreach-build accumulator. When the binding is RC=1 the engines mutate the underlying buffer in place (amortised O(1) per push) - but this is a behind-the-scenes optimisation. To any observer the operation is still functional: nothing outside the rebind sees the old `xs`. The non-rebind shape `ys = +=xs v` always allocates a fresh list and leaves `xs` untouched, so source aliases are safe. There is no separate `push` builtin. `+=` covers every use case and is shorter; adding an alias would mean two ways to spell the same operation, costing reasoning tokens and surface area. [Unary] `-x`=negate=`n` `!x`=logical NOT=any (truthy) [Special infix] `a??b`=nil-coalesce (if a is nil, return b)=any `a>>f`=pipe (desugar to `f(a)`)=any [Prefix nesting (no parens needed)] +*a b c -- (a * b) + c *a +b c -- a * (b + c) >=+x y 100 -- (x + y) >= 100 -*a b *c d -- (a * b) - (c * d) The outer prefix op binds the inner prefix subexpression as its **left** operand, regardless of operator precedence. With two same-precedence ops side by side this is easy to misread: */a b c -- (a/b) * c ← NOT (a*b)/c /*a b c -- (a*b) / c ← NOT (a/b)*c +-a b c -- (a-b) + c ← NOT (a+b)-c -+a b c -- (a+b) - c ← NOT (a-b)+c The runtime emits a `hint:` diagnostic when one of these four pairs appears at a prefix position, since the parse order disagrees with the natural left-to-right reading. To force the other grouping, swap the ops or bind the inner result first: -- Want (a*b)/c with a=6, b=2, c=3: r=*a b;/r c -- bind, then divide → 4 /*a b c -- equivalent, swapping the prefix-pair order [Infix precedence] Standard mathematical precedence (higher binds tighter): 6=`*` `/` 5=`+` `-` `+=` 4=`>` `<` `>=` `<=` 3=`=` `!=` 2=`&` 1=`|` Function application binds tighter than all infix operators: f a + b -- (f a) + b, NOT f(a + b) x * y + 1 -- (x * y) + 1 (x + y) * 2 -- parens override precedence Each nested prefix operator saves 2 tokens (no `(` `)` needed). Flat prefix like `+a b` saves 1 char vs `a + b`. Across 25 expression patterns, prefix notation saves **22% tokens** and **42% characters** vs infix. See [research/explorations/prefix-vs-infix/](research/explorations/prefix-vs-infix/) for the full benchmark. Disambiguation: `-` followed by one atom is unary negate, followed by two atoms is binary subtract. [Operands] Operator operands are **atoms** (literals, refs, field access), **nested prefix operators**, or **known-arity function calls**. The prefix-binop operand parser dispatches to call parsing when the ident at the cursor is a known-arity user fn or builtin AND the next token can start another operand: wh >len q 0{body} -- parses as wh > (len q) 0 { body } +f g h -- if f is 1-arity: BinOp(+, Call(f, [g]), h) -lnx 5 lnx 3 -- BinOp(-, Call(lnx, [5]), Call(lnx, [3])) dbl 5 -- Negate(Call(dbl, [5])) - unary on a call This parallels the `??` precedent: `??x default` accepts a call expression on the value side. Applies to every prefix-binop family member - `+`, `-`, `*`, `/`, comparisons, `&`, `|`, `+=` - and to unary negate when the call consumes the only operand. The same expansion also applies to the then/else slots of the prefix-ternary family (`?=cond a b`, `?>cond a b`, …) and the `?h cond a b` keyword form, so `?h =a b sev sc "NONE"` parses `sev sc` as a nested call without parens or a bind-first. Bare locals that shadow a user fn name still resolve via `Ref` rather than expanding into a zero-arg call, so `&e f{...}` where `f` is a local still parses as the bool operator with two refs. When the call expansion isn't available (the ident is a local that shadows a fn name, or the call's arity doesn't fit the remaining tokens), bind the call result first: r=fac p;*n r -- bind, then operate - always unambiguous **Negative literals vs binary minus**: the lexer greedily includes a leading `-` into number tokens. `-1`, `-7`, `-0` are all number literals at fresh-expression positions. To subtract from zero at the start of a statement, use a space: `- 0 v` (Minus token, then `0`, then `v`). f v:n>n;-0 v -- WRONG: -0 is Number(-0.0); v is a stray token f v:n>n;- 0 v -- OK: binary subtract: 0 - v = -v The lexer splits a glued negative literal back into `Minus + Number` when the previous token is one of `;`, `\n`, `=`, `{`, `(`, or `-`. The `-` context covers the operand slot of an outer prefix-minus, so `- -0 a b` lexes as `-, -, 0, a, b` and parses as `Subtract(Subtract(0, a), b)` = `-a - b` rather than tripping `ILO-P020`. Negative literals after an Ident, `[`, or another prefix binop (`+`, `*`, `/`) stay glued so call args (`at xs -1`), list literals (`[-2 1 3]`), and binary operands (`+a -3`) read naturally. @@ -15,6 +15,6 @@ TOOLS (EXTERNAL CALLS): tool "" > timeou IMPORTS: Split programs across files with `use`: use "path/to/file.ilo" -- import all declarations use "path/to/file.ilo" [name1 name2] -- import only named declarations All imported declarations merge into a flat shared namespace - no qualification, no `mod::fn` syntax. The verifier catches name collisions. -- math.ilo dbl n:n>n; *n 2 half n:n>n; /n 2 -- main.ilo use "math.ilo" run n:n>n; dbl! half n [Rules] Path is relative to the importing file's directory Transitive: if `a.ilo` uses `b.ilo`, `b.ilo`'s declarations are visible to `main.ilo` when it uses `a.ilo` Circular imports are an error (`ILO-P018`) Scoped import with unknown name: `ILO-P019` `use` in inline code (no file context): `ILO-P017` [Error codes] `ILO-P017`=File not found or `use` in inline mode `ILO-P018`=Circular import detected `ILO-P019`=Name in `[...]` list not declared in the imported file ERROR HANDLING: `R ok err` return type. Call then match: get-user uid;?{^e:^+"Lookup failed: "e;~d:use d} Compensate/rollback inline: charge pid amt;?{^e:release rid;^+"Payment failed: "e;~cid:continue} [Auto-Unwrap `!`] `func! args` calls `func` and auto-unwraps the Result: if `~v` (Ok), returns `v`; if `^e` (Err), immediately returns `^e` from the enclosing function. inner x:n>R n t;~x outer x:n>R n t;d=inner! x;~d Equivalent to `r=inner x;?r{~v:v;^e:^e}` but in 1 token instead of 12. Rules: The called function must return `R` or `O` (else verifier error ILO-T025) The enclosing function must return `R` (or `O` for Optional callees) (else verifier error ILO-T026) `!` goes after the function name, before args: `get! url` not `get url!` Zero-arg: `fetch!()` [Panic-Unwrap `!!`] `func!! args` is symmetric in shape with `!`, but on the failure path it aborts the program with a runtime diagnostic and exit code 1 instead of propagating. There is no enclosing-return-type constraint, so persona code can use it from `main>t`, `main>n`, or any non-Result / non-Optional context. main>t;rdl!! "input.txt" -- read file, abort with diagnostic if missing main>n;v=num!! "42";v -- parse number, abort on parse error main>n;m=mset mmap "k" 7;mget!! m "k" -- get value or abort if key missing On `^e` (Err) the program writes `panic-unwrap: ` to stderr and exits 1. On `O nil` the program writes `panic-unwrap: expected value, got nil`. On `~v` (Ok) or non-nil Optional, the inner value is extracted, identical to `!`. Rules: The called function must return `R` or `O` (else verifier error ILO-T025) **No constraint on the enclosing function's return type** - this is the difference from `!` `!!` goes after the function name, before args: `rdl!! path` not `rdl path!!` Zero-arg: `fetch!!()` Use `!` when the caller wants to react to the Err (compensate, retry, log). Use `!!` when the failure is a programming or environmental error the caller has no way to recover from - typical in short scripts, glue code, and main entry points. PATTERNS (FOR LLM GENERATORS): [Bind-first pattern] Always bind complex expressions to variables before using them in operators. Operators only accept atoms and nested operators as operands - not function calls. -- DON'T: *n fac -n 1 (fac is an operand of *, not a call) -- DO: r=fac -n 1;*n r (bind call result, then use in operator) [Recursion template] >;;...;;combine 1. **Guard**: base case returns early - `<=n 1 1` (or `<=n 1{1}`) 2. **Bind**: bind recursive call results - `r=fac -n 1` 3. **Combine**: use bound results in final expression - `*n r` [Factorial] fac n:n>n;<=n 1 1;r=fac -n 1;*n r `<=n 1 1` - braceless guard: if n <= 1, return 1 `r=fac -n 1` - recursive call with prefix subtract as argument `*n r` - multiply n by result [Fibonacci] fib n:n>n;<=n 1 n;a=fib -n 1;b=fib -n 2;+a b `<=n 1 n` - braceless guard: return n for 0 and 1 `a=fib -n 1;b=fib -n 2` - two recursive calls, each with prefix arg `+a b` - add results [Multi-statement bodies] Semicolons separate statements. Last expression is the return value. f x:n>n;a=*x 2;b=+a 1;*b b -- (x*2 + 1)^2 Bodies may also be written across multiple newline-separated lines, indented under the signature. The parser stays inside the same function body while it sees an open bracket (`[`, `(`, `{`) or a pipe operator continuation. This makes long literals and multi-line conditional pipelines readable without semicolons: f x:n>n a=*x 2 b=+a 1 *b b g>L n [10, 20, 30, 40, 50, 60, 70, 80] Statement separation reverts to standard rules once brackets close. A blank line ends the current declaration. [Multi-function files] Functions in a file are separated by **newlines**. The parser strips all newlines, so the token stream is flat. After parsing each function body, the parser uses the next newline-delimited boundary to start the next declaration. A non-last function body's **final expression must not be a bare variable reference (`Ref`) or a function call**, because the parser greedily reads following tokens as additional call arguments. Safe endings prevent this: Binary operator=`+n 0`, `*x 1`=✓=fixed arity - no greedy loop Index access=`xs.0`, `rec.field`=✓=returns `Expr::Index`, not `Ref` Match block=`?v{…}`=✓=ends with `}` ForEach block=`@x xs{…}`=✓=ends with `}` Parenthesised expr=`(x>>f>>g)`=✓=ends with `)` Record constructor=`point x:1 y:2`=✓=parses as `Expr::Record`, not `Ref` Text/number literal=`"ok"`, `42`=✓=literal, not `Ref` Bare variable (`Ref`)=`n`, `result`=✗=greedy loop fires Bare function call=`len xs`, `f a`=✗=greedy loop fires The **last function in a file** can end with anything - greedy parsing stops at EOF. -- Non-last functions: end with a binary expression digs n:n>n;t=str n;l=len t;+l 0 -- +l 0 = l (binary, safe) clmp n:n lo:n hi:n>n;n hi hi;+n 0 -- +n 0 = n (binary, safe; `clamp` is a builtin) -- Last function: bare call is fine sz xs:L n>n;len xs -- EOF - greedy loop stops naturally To use a pipe chain in a non-last function, wrap it in parentheses: dbl-inc x:n>n;(x>>dbl>>inc) -- parens prevent >> from consuming next function's name inc-sq x:n>n;x>>inc>>sq -- last function - no parens needed [DO / DON'T] -- DON'T: fac n:n>n;<=n 1 1;*n fac -n 1 -- ↑ *n sees fac as an atom operand, not a call -- DO: fac n:n>n;<=n 1 1;r=fac -n 1;*n r -- ↑ bind-first: call result goes into r, then *n r works -- DON'T: +fac -n 1 fac -n 2 -- ↑ + takes two operands; fac is just an atom ref -- DO: a=fac -n 1;b=fac -n 2;+a b -- ↑ bind both calls, then combine -ERROR DIAGNOSTICS: ilo verifies programs before execution and reports errors with stable codes, source context, and suggestions. [Error codes] Every error has a stable `ILO-` code. The letter is the namespace - the phase that raised the diagnostic - so agents and tools can route on prefix without parsing the message. Numeric ranges are reserved per namespace with generous gaps, so future codes slot in cleanly and the contract is forward-compatible. `ILO-L000-099`=L=Lexer / tokenisation=active `ILO-P100-199`=P=Parser / syntax=active `ILO-N200-299`=N=Names / resolution=reserved `ILO-I300-399`=I=Imports=reserved `ILO-T400-499`=T=Types=active `ILO-V500-599`=V=Verifier (post-type checks)=reserved `ILO-R600-699`=R=Runtime=active `ILO-D700-799`=D=Deprecation warnings=reserved `ILO-E800-899`=E=Engine-specific limitations=reserved `ILO-S900-999`=S=Skill / spec system=reserved **Historical codes.** ilo shipped with flat numbering inside each namespace - `ILO-L001`, `ILO-P001`, `ILO-T001`, `ILO-R001`, `ILO-W001`, all starting at 001. Those codes remain valid forever. The hundreds-block allocation above applies to new codes from now on, and a cross-engine regression test asserts every emitted code lives in a documented range. **Reserved namespaces.** `N`, `I`, `V`, `D`, `E`, `S` carry no codes today. They are forward declarations so the first code in each category slots into its own range without conflicting with the active namespaces. `D` is earmarked for deprecation warnings: when a feature is scheduled for removal it emits an `ILO-D7xx` warning at compile time without failing the build. Use `--explain` to see a detailed explanation: ilo --explain ILO-T004 [Source context] Errors point at the relevant source location with a caret: error[ILO-T005]: undefined function 'foo' (called with 1 args) --> 1:9 1 | f x:n>n;foo x = note: in function 'f' = suggestion: did you mean 'f'? Parser, verifier, and runtime errors all show source spans. The verifier uses the enclosing statement span as the best available location for expression-level errors. [Suggestions] The verifier provides context-aware hints: **Did you mean?** - Levenshtein-based suggestions for undefined variables, functions, fields, and types **Type conversion** - suggests `str` for n→t, `num` for t→n **Missing arms** - lists uncovered match patterns with types **Arity** - shows expected parameter signature [Error output formats] --ansi / -a ANSI colour (default for TTY) --text / -t Plain text (no colour) --json / -j JSON (default for piped output) --no-hints / -nh Suppress idiomatic hints NO_COLOR=1 Disable colour (same as --text) JSON error output follows a structured schema with `severity`, `code`, `message`, `labels` (with spans), `notes`, and `suggestion` fields. Runtime errors raised from the Cranelift JIT (opt-in via `--jit`) populate `labels` with the source span of the failing operation, matching tree and VM behaviour. Span coverage threads through every JIT runtime helper (unwrap, panic-unwrap, list-get, slice, index, jpth, mget, record-field strict access, builtin dispatch, dynamic call); AOT-compiled binaries inherit the same coverage. Pre-v0.11.6 builds surfaced `{"labels":[]}` for these shapes - if you see an empty labels array on a runtime error, the binary is out of date. AOT binaries also install an async-signal-safe handler in `ilo_aot_init` that catches fatal signals (SIGSEGV, SIGBUS, SIGFPE, SIGILL, SIGABRT) and writes a single JSON line on stderr identifying the signal before the process terminates with the conventional 128+signo exit code. The diagnostic uses `ILO-R015` (AOT runtime fault). Without the handler, a hard fault inside compiled native code would leave the process with raw signal exit (e.g. 139 for SIGSEGV) and no diagnostic — agents driving ilo couldn't distinguish a clean non-zero exit from a hard fault. A SIGSEGV from an AOT binary is always a bug in ilo (codegen or runtime helper); file an issue with the source program and the JSON line. AOT binaries also install an async-signal-safe handler in `ilo_aot_init` that catches fatal signals (SIGSEGV, SIGBUS, SIGFPE, SIGILL, SIGABRT) and writes a single JSON line on stderr identifying the signal before the process terminates with the conventional 128+signo exit code. The diagnostic uses `ILO-R015` (AOT runtime fault). Without the handler, a hard fault inside compiled native code would leave the process with raw signal exit (e.g. 139 for SIGSEGV) and no diagnostic — agents driving ilo couldn't distinguish a clean non-zero exit from a hard fault. A SIGSEGV from an AOT binary is always a bug in ilo (codegen or runtime helper); file an issue with the source program and the JSON line. [Top-level program output] For a program whose entry function returns a Result, the `~`/`^` wrapper is split across streams and exit codes so shell callers do not have to strip a prefix: `~v` (Ok)=`v` (bare)=-=0 `^e` (Err)=-=`^e`=1 any non-Result=`v`=-=0 In `--json` mode the value is always wrapped (`{"ok": v}` / `{"error": {...}}`) and emitted to stdout; exit codes match the plain-mode table. `Display` on `Value::Ok` / `Value::Err` still renders `~v` / `^e` in every other context (nested values, `prnt`, REPL prompts, error messages, debug output) - only the top-level program-return print path is split. The contract applies uniformly to in-process runners (`ilo prog.ilo`, `--run-tree`, `--run-vm`, `--jit`) and to AOT-compiled standalone binaries from `ilo compile`. Both strip the top-level `~`/`^` wrapper on stdout, route `^e` to stderr, and use the same exit codes - output is byte-for-byte identical across every backend. [Idiomatic hints] After successful execution, ilo scans the source for non-canonical forms and emits hints to stderr: hint: `==` → `=` saves 1 char (both mean equality in ilo) hint: `length` → `len` (canonical short form) Builtin alias hints appear at most once per program (the first long-form name found). In JSON mode, hints appear as `{"hints":["..."]}` on stderr. Suppress with `--no-hints` / `-nh`. [CLI invocation] ilo 'code' [args...] -- inline program; default-runs the entry function ilo program.ilo [func] [args] -- if `func` is omitted and the file declares exactly one function, that function runs automatically ilo run program.ilo [func] [a] -- verb form; same dispatch as the bare positional ilo check program.ilo [--json] -- run the verifier without executing (exit 0 = clean) ilo build program.ilo -o out -- AOT compile to a standalone binary (alias for `compile`) ilo program.ilo --ast -- print parsed AST as JSON and exit ilo --explain ILO-T004 -- print error explanation and exit ilo help ai -- compact AI spec to stdout (= contents of ai.txt) ilo serv -- long-lived JSON request/response loop **Verb-noun aliases.** `ilo run ` is an exact alias for the bare positional `ilo ` - same dispatch, same engine selection, same arg handling. `ilo build -o ` is an alias for `ilo compile -o `. Both exist to match the toolchain conventions used by `cargo`, `go`, and `zero` so agents and humans can guess the command name without consulting the help text. The bare positional forms remain fully supported for backwards compatibility; nothing has been removed. **`ilo check`.** Standalone verifier invocation: lex, parse, resolve imports, and run the type verifier without proceeding to bytecode compilation or execution. Exit code 0 means the program is well-typed and verifier-clean; exit code 1 means at least one diagnostic was emitted on stderr. The output mode follows the global flags (`--json` for NDJSON diagnostics, `--text` for plain text, `--ansi` for coloured output; auto-detected when omitted - JSON when stderr is not a TTY, ANSI otherwise). `ilo check` works on both files and inline code; on a syntactically-broken input it still reports the parse error rather than crashing, which is important for editor and agent loops that may feed in half-written programs. **Default-run.** Inline programs (`ilo 'code'`) and single-function files run their entry function with the remaining CLI args; no explicit function name needed. Multi-function files auto-pick a function called `main` when no positional func arg is supplied. The same heuristic applies to the explicit engine flags - `--run-tree`, `--run-vm`, and `--jit` all auto-pick `main` on multi-fn files, matching the default-engine behaviour. With no `main` declared, supply a function-name argument. **AOT entry-pick.** `ilo compile file.ilo -o out` (alias `ilo build`) follows the same entry-pick rules as the in-process engines: a single user-defined function is used directly; on multi-function files the entry is `main` if defined, otherwise the explicit positional `func` arg (`ilo compile file.ilo -o out run`); otherwise the compile fails with `ILO-E801` and exits 1 without writing a binary. AOT does not fall back to "first declared function" - that historical default produced binaries that called the wrong entry symbol and SIGSEGV'd at runtime. **Default engine.** The bytecode register VM is the default execution path. It supports every opcode (closures with Phase 2 capture, listview windows, fused len-of-filter, every modern shape), and avoids the JIT compile-and-bail cost paid by the pre-v0.11.9 Cranelift-first default whenever a program touched an opcode the JIT couldn't handle. Cranelift JIT is opt-in via `--jit`; on opt-in, the JIT runs hot numeric loops and falls back to the VM on bailout. The tree interpreter (`--run-tree`) remains the canonical-semantics reference. Phase 2 captures run natively on every engine - tree, VM, JIT, and AOT (`ilo compile`); AOT embeds the `CompiledProgram` blob into the binary's `.rodata` so dispatch helpers can re-enter the VM on user-fn callbacks the same way the in-process runners do. For long-running workloads where the JIT pays for itself, opt in explicitly; for most agent workloads the VM is the right default. **Subcommand dispatch.** The first positional argument is interpreted as a function name when it has the shape of an ilo identifier - `[a-z][a-z0-9]*(-[a-z0-9]+)*` - so `ilo file.ilo list-orders` routes to the `list-orders` function. Args that don't match the ident shape (file paths like `/tmp/data.json`, numbers, sigils, bracketed lists, anything with a `.` or `/`) route to `main` (or the entry function) as a positional CLI arg instead. Trailing dashes (`foo-`), doubled dashes (`foo--bar`), and negative numbers (`-1`) are not idents and pass through as data. **Unknown `--flag` guard.** Any token in the positional tail matching the clean long-flag shape `--word` or `--word-with-dashes` that isn't a recognised flag is rejected upfront with `error: unrecognised flag '--'. Use 'ilo --help' for valid flags. To pass it as a literal arg, separate with '--' first.` and exit 1. This prevents `ilo main.ilo --engine tree` from silently consuming `--engine` as a positional arg (which used to surface as misleading `ILO-R012 no functions defined` or `ILO-R004 main: expected N args, got N+1`). To pass a hyphen-prefixed token through as literal data, place the `--` separator first: `ilo main.ilo -- --foo`. Anything after the first `--` is data. Tokens with `=` (`--key=val`), trailing or doubled dashes (`--foo-`, `--foo--bar`), and negative numbers (`-1`) are not clean flag shapes and pass through unchanged. **Text-typed params.** When the entry function declares a parameter of type `t`, the CLI passes the raw arg through without numeric coercion. `ilo 'f x:t>t;x' 42` returns the string `"42"`, not the number 42. **Exit codes.** A program returning `Value::Err` (or `^reason` from the entry function) exits with code 1 and prints the err payload on stderr. `~v` (Ok) and any non-Result return value exit 0. Verifier and parser errors exit 2. **List args from the CLI.** Comma-separated args become `L n` or `L t` automatically: `ilo 'f xs:L n>n;sum xs' 1,2,3`. +ERROR DIAGNOSTICS: ilo verifies programs before execution and reports errors with stable codes, source context, and suggestions. [Error codes] Every error has a stable `ILO-` code. The letter is the namespace - the phase that raised the diagnostic - so agents and tools can route on prefix without parsing the message. Numeric ranges are reserved per namespace with generous gaps, so future codes slot in cleanly and the contract is forward-compatible. `ILO-L000-099`=L=Lexer / tokenisation=active `ILO-P100-199`=P=Parser / syntax=active `ILO-N200-299`=N=Names / resolution=reserved `ILO-I300-399`=I=Imports=reserved `ILO-T400-499`=T=Types=active `ILO-V500-599`=V=Verifier (post-type checks)=reserved `ILO-R600-699`=R=Runtime=active `ILO-D700-799`=D=Deprecation warnings=reserved `ILO-E800-899`=E=Engine-specific limitations=reserved `ILO-S900-999`=S=Skill / spec system=reserved **Historical codes.** ilo shipped with flat numbering inside each namespace - `ILO-L001`, `ILO-P001`, `ILO-T001`, `ILO-R001`, `ILO-W001`, all starting at 001. Those codes remain valid forever. The hundreds-block allocation above applies to new codes from now on, and a cross-engine regression test asserts every emitted code lives in a documented range. **Reserved namespaces.** `N`, `I`, `V`, `D`, `E`, `S` carry no codes today. They are forward declarations so the first code in each category slots into its own range without conflicting with the active namespaces. `D` is earmarked for deprecation warnings: when a feature is scheduled for removal it emits an `ILO-D7xx` warning at compile time without failing the build. Use `--explain` to see a detailed explanation: ilo --explain ILO-T004 [Source context] Errors point at the relevant source location with a caret: error[ILO-T005]: undefined function 'foo' (called with 1 args) --> 1:9 1 | f x:n>n;foo x = note: in function 'f' = suggestion: did you mean 'f'? Parser, verifier, and runtime errors all show source spans. The verifier uses the enclosing statement span as the best available location for expression-level errors. [Suggestions] The verifier provides context-aware hints: **Did you mean?** - Levenshtein-based suggestions for undefined variables, functions, fields, and types **Type conversion** - suggests `str` for n→t, `num` for t→n **Missing arms** - lists uncovered match patterns with types **Arity** - shows expected parameter signature [Error output formats] --ansi / -a ANSI colour (default for TTY) --text / -t Plain text (no colour) --json / -j JSON (default for piped output) --no-hints / -nh Suppress idiomatic hints NO_COLOR=1 Disable colour (same as --text) JSON error output follows a structured schema with `severity`, `code`, `message`, `labels` (with spans), `notes`, and `suggestion` fields. Runtime errors raised from the Cranelift JIT (opt-in via `--jit`) populate `labels` with the source span of the failing operation, matching tree and VM behaviour. Span coverage threads through every JIT runtime helper (unwrap, panic-unwrap, list-get, slice, index, jpth, mget, record-field strict access, builtin dispatch, dynamic call); AOT-compiled binaries inherit the same coverage. Pre-v0.11.6 builds surfaced `{"labels":[]}` for these shapes - if you see an empty labels array on a runtime error, the binary is out of date. AOT binaries also install an async-signal-safe handler in `ilo_aot_init` that catches fatal signals (SIGSEGV, SIGBUS, SIGFPE, SIGILL, SIGABRT) and writes a single JSON line on stderr identifying the signal before the process terminates with the conventional 128+signo exit code. The diagnostic uses `ILO-R015` (AOT runtime fault). Without the handler, a hard fault inside compiled native code would leave the process with raw signal exit (e.g. 139 for SIGSEGV) and no diagnostic — agents driving ilo couldn't distinguish a clean non-zero exit from a hard fault. A SIGSEGV from an AOT binary is always a bug in ilo (codegen or runtime helper); file an issue with the source program and the JSON line. AOT binaries also install an async-signal-safe handler in `ilo_aot_init` that catches fatal signals (SIGSEGV, SIGBUS, SIGFPE, SIGILL, SIGABRT) and writes a single JSON line on stderr identifying the signal before the process terminates with the conventional 128+signo exit code. The diagnostic uses `ILO-R015` (AOT runtime fault). Without the handler, a hard fault inside compiled native code would leave the process with raw signal exit (e.g. 139 for SIGSEGV) and no diagnostic — agents driving ilo couldn't distinguish a clean non-zero exit from a hard fault. A SIGSEGV from an AOT binary is always a bug in ilo (codegen or runtime helper); file an issue with the source program and the JSON line. [Top-level program output] For a program whose entry function returns a Result, the `~`/`^` wrapper is split across streams and exit codes so shell callers do not have to strip a prefix: `~v` (Ok)=`v` (bare)=-=0 `^e` (Err)=-=`^e`=1 any non-Result=`v`=-=0 In `--json` mode the value is always wrapped (`{"ok": v}` / `{"error": {...}}`) and emitted to stdout; exit codes match the plain-mode table. `Display` on `Value::Ok` / `Value::Err` still renders `~v` / `^e` in every other context (nested values, `prnt`, REPL prompts, error messages, debug output) - only the top-level program-return print path is split. The contract applies uniformly to in-process runners (`ilo prog.ilo`, `--run-tree`, `--run-vm`, `--jit`) and to AOT-compiled standalone binaries from `ilo compile`. Both strip the top-level `~`/`^` wrapper on stdout, route `^e` to stderr, and use the same exit codes - output is byte-for-byte identical across every backend. [Idiomatic hints] After successful execution, ilo scans the source for non-canonical forms and emits hints to stderr: hint: `==` → `=` saves 1 char (both mean equality in ilo) hint: `length` → `len` (canonical short form) Builtin alias hints appear at most once per program (the first long-form name found). In JSON mode, hints appear as `{"hints":["..."]}` on stderr. Suppress with `--no-hints` / `-nh`. [CLI invocation] ilo 'code' [args...] -- inline program; default-runs the entry function ilo program.ilo [func] [args] -- if `func` is omitted and the file declares exactly one function, that function runs automatically ilo run program.ilo [func] [a] -- verb form; same dispatch as the bare positional ilo check program.ilo [--json] -- run the verifier without executing (exit 0 = clean) ilo build program.ilo -o out -- AOT compile to a standalone binary (alias for `compile`) ilo program.ilo --ast -- print parsed AST as JSON and exit ilo --explain ILO-T004 -- print error explanation and exit ilo help ai -- compact AI spec to stdout (= contents of ai.txt) ilo serv -- long-lived JSON request/response loop **Verb-noun aliases.** `ilo run ` is an exact alias for the bare positional `ilo ` - same dispatch, same engine selection, same arg handling. `ilo build -o ` is an alias for `ilo compile -o `. Both exist to match the toolchain conventions used by `cargo`, `go`, and `zero` so agents and humans can guess the command name without consulting the help text. The bare positional forms remain fully supported for backwards compatibility; nothing has been removed. **`ilo check`.** Standalone verifier invocation: lex, parse, resolve imports, and run the type verifier without proceeding to bytecode compilation or execution. Exit code 0 means the program is well-typed and verifier-clean; exit code 1 means at least one diagnostic was emitted on stderr. The output mode follows the global flags (`--json` for NDJSON diagnostics, `--text` for plain text, `--ansi` for coloured output; auto-detected when omitted - JSON when stderr is not a TTY, ANSI otherwise). `ilo check` works on both files and inline code; on a syntactically-broken input it still reports the parse error rather than crashing, which is important for editor and agent loops that may feed in half-written programs. **Default-run.** Inline programs (`ilo 'code'`) and single-function files run their entry function with the remaining CLI args; no explicit function name needed. Multi-function files auto-pick a function called `main` when no positional func arg is supplied. The same heuristic applies to the explicit engine flags - `--run-tree`, `--run-vm`, and `--jit` all auto-pick `main` on multi-fn files, matching the default-engine behaviour. With no `main` declared, supply a function-name argument. **AOT entry-pick.** `ilo compile file.ilo -o out` (alias `ilo build`) follows the same entry-pick rules as the in-process engines: a single user-defined function is used directly; on multi-function files the entry is `main` if defined, otherwise the explicit positional `func` arg (`ilo compile file.ilo -o out run`); otherwise the compile fails with `ILO-E801` and exits 1 without writing a binary. AOT does not fall back to "first declared function" - that historical default produced binaries that called the wrong entry symbol and SIGSEGV'd at runtime. **Default engine.** The bytecode register VM is the default execution path. It supports every opcode (closures with Phase 2 capture, listview windows, fused len-of-filter, every modern shape), and avoids the JIT compile-and-bail cost paid by the pre-v0.11.9 Cranelift-first default whenever a program touched an opcode the JIT couldn't handle. Cranelift JIT is opt-in via `--jit`; on opt-in, the JIT runs hot numeric loops and falls back to the VM on bailout. The tree interpreter (`--run-tree`) remains the canonical-semantics reference. Phase 2 captures run natively on every engine - tree, VM, JIT, and AOT (`ilo compile`); AOT embeds the postcard `CompiledProgram` blob into the binary's `.rodata` so dispatch helpers can re-enter the VM on user-fn callbacks the same way the in-process runners do. For long-running workloads where the JIT pays for itself, opt in explicitly; for most agent workloads the VM is the right default. **Subcommand dispatch.** The first positional argument is interpreted as a function name when it has the shape of an ilo identifier - `[a-z][a-z0-9]*(-[a-z0-9]+)*` - so `ilo file.ilo list-orders` routes to the `list-orders` function. Args that don't match the ident shape (file paths like `/tmp/data.json`, numbers, sigils, bracketed lists, anything with a `.` or `/`) route to `main` (or the entry function) as a positional CLI arg instead. Trailing dashes (`foo-`), doubled dashes (`foo--bar`), and negative numbers (`-1`) are not idents and pass through as data. **Unknown `--flag` guard.** Any token in the positional tail matching the clean long-flag shape `--word` or `--word-with-dashes` that isn't a recognised flag is rejected upfront with `error: unrecognised flag '--'. Use 'ilo --help' for valid flags. To pass it as a literal arg, separate with '--' first.` and exit 1. This prevents `ilo main.ilo --engine tree` from silently consuming `--engine` as a positional arg (which used to surface as misleading `ILO-R012 no functions defined` or `ILO-R004 main: expected N args, got N+1`). To pass a hyphen-prefixed token through as literal data, place the `--` separator first: `ilo main.ilo -- --foo`. Anything after the first `--` is data. Tokens with `=` (`--key=val`), trailing or doubled dashes (`--foo-`, `--foo--bar`), and negative numbers (`-1`) are not clean flag shapes and pass through unchanged. **Text-typed params.** When the entry function declares a parameter of type `t`, the CLI passes the raw arg through without numeric coercion. `ilo 'f x:t>t;x' 42` returns the string `"42"`, not the number 42. **Exit codes.** A program returning `Value::Err` (or `^reason` from the entry function) exits with code 1 and prints the err payload on stderr. `~v` (Ok) and any non-Result return value exit 0. Verifier and parser errors exit 2. **List args from the CLI.** Comma-separated args become `L n` or `L t` automatically: `ilo 'f xs:L n>n;sum xs' 1,2,3`. FORMATTER: Dense output is the default - newlines are for humans, not agents. No flag needed for dense format: ilo 'code' Dense wire format (default) ilo 'code' --dense / -d Same, explicit ilo 'code' --expanded / -e Expanded human format (for code review) [Dense format] Single line per declaration, minimal whitespace. Operators glue to first operand: cls sp:n>t;>=sp 1000{"gold"};>=sp 500{"silver"};"bronze" [Expanded format] Multi-line with 2-space indentation. Operators spaced from operands: cls sp:n > t >= sp 1000 { "gold" } >= sp 500 { "silver" } "bronze" Dense format is canonical - `dense(parse(dense(parse(src)))) == dense(parse(src))`. COMPLETE EXAMPLE: tool get-user"Retrieve user by ID" uid:t>R profile t timeout:5,retry:2 tool send-email"Send an email" to:t subject:t body:t>R _ t timeout:10,retry:1 type profile{id:t;name:t;email:t;verified:b} ntf uid:t msg:t>R _ t;get-user uid;?{^e:^+"Lookup failed: "e;~d:!d.verified{^"Email not verified"};send-email d.email "Notification" msg;?{^e:^+"Send failed: "e;~_:~_}} [Recursive Example] Factorial and Fibonacci as standalone functions: fac n:n>n;<=n 1 1;r=fac -n 1;*n r fib n:n>n;<=n 1 n;a=fib -n 1;b=fib -n 2;+a b From daf8d5748b4bfc1c2d0c1b0920a13df0db6ec014 Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Tue, 19 May 2026 18:00:17 +0100 Subject: [PATCH 7/8] tests: fix grp/uniqby test sources to use ternary syntax The original source 'parity n:n>t;=mod n 2 0{"even"};"odd"' parses as a sequence of two statements - '=mod n 2 0{"even"}' (a comparison followed by a list literal, discarded) and '"odd"' (the return). The function therefore always returned "odd" regardless of input, so grp and uniqby produced singleton outputs across every engine - not just AOT. The fix is to use the standard ternary form '?(=mod n 2 0){"even"}{"odd"}' so the function actually branches on parity. --- tests/regression_aot_closures.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/regression_aot_closures.rs b/tests/regression_aot_closures.rs index e9d3d20c..7ff58c15 100644 --- a/tests/regression_aot_closures.rs +++ b/tests/regression_aot_closures.rs @@ -187,7 +187,7 @@ fn aot_fld_user_fn() { fn aot_grp_by_user_fn() { assert_cross_engine( "grp-user-fn", - "parity n:n>t;=mod n 2 0{\"even\"};\"odd\"\nmain>n;g=grp parity [1,2,3,4];len mkeys g\n", + "parity n:n>t;?(=mod n 2 0){\"even\"}{\"odd\"}\nmain>n;g=grp parity [1,2,3,4];len mkeys g\n", b"2\n", ); } @@ -199,7 +199,7 @@ fn aot_grp_by_user_fn() { fn aot_uniqby_user_fn() { assert_cross_engine( "uniqby-user-fn", - "parity n:n>t;=mod n 2 0{\"even\"};\"odd\"\nmain>n;u=uniqby parity [1,2,3,4];len u\n", + "parity n:n>t;?(=mod n 2 0){\"even\"}{\"odd\"}\nmain>n;u=uniqby parity [1,2,3,4];len u\n", b"2\n", ); } From 69a18990cbcd3c9ee9fc96d2bde432c0cc35c9f7 Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Tue, 19 May 2026 18:00:25 +0100 Subject: [PATCH 8/8] tests: direct FFI coverage for ilo_aot_publish_program The cross-engine AOT regression tests exercise publish/fini end-to-end via a compiled binary, but the parent test process's coverage instrumentation doesn't track lines executed in the child. Add three in-process unit tests that call ilo_aot_publish_program and ilo_aot_fini directly so cargo llvm-cov picks up the publish path, the TLS slot wiring, and the schema-version constant. Codecov was failing patch coverage at 86.7% because the entire ilo_aot_publish_program body (20 lines in vm/mod.rs) was reported as uncovered. These tests cover the happy path and the fini-clears-TLS path; the malformed-blob branch exits the process so remains uncovered by design (matches how Rust's coverage tooling treats other process::exit branches in the codebase). --- src/vm/mod.rs | 98 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) diff --git a/src/vm/mod.rs b/src/vm/mod.rs index eea81ca5..55579227 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -34482,3 +34482,101 @@ main>n } } } + +#[cfg(all(test, feature = "cranelift"))] +mod aot_publish_tests { + //! Direct FFI tests for `ilo_aot_publish_program` and `ilo_aot_fini`. + //! + //! The cross-engine AOT regression tests in `tests/regression_aot_closures.rs` + //! cover the happy path end-to-end by compiling a real binary, but they + //! run the publish/fini code inside a child process, so the parent's + //! `cargo llvm-cov` instrumentation misses the lines. These tests call + //! the `extern "C"` helpers directly from Rust so coverage picks them up, + //! and exercise the TLS publish + fini cycle without exec-ing a binary. + use super::*; + use crate::vm::aot_blob::{BLOB_SCHEMA_VERSION, serialize_program}; + + fn compile_simple(src: &str) -> CompiledProgram { + let tokens = crate::lexer::lex(src).expect("lex"); + let token_spans: Vec<_> = tokens + .into_iter() + .map(|(t, r)| { + ( + t, + crate::ast::Span { + start: r.start, + end: r.end, + }, + ) + }) + .collect(); + let (prog, errors) = crate::parser::parse(token_spans); + assert!(errors.is_empty(), "parse errors: {:?}", errors); + compile(&prog).expect("compile") + } + + #[test] + fn publish_program_populates_tls_slots() { + let prog = compile_simple("add a:n b:n>n;+a b\nmain>n;add 2 3"); + let bytes = serialize_program(&prog).expect("serialize"); + + // Sanity: TLS slots may be set by a previous test; clear so we observe + // the publish actually writes them. + ilo_aot_fini(); + ACTIVE_PROGRAM.with(|r| assert!(r.get().is_null())); + ACTIVE_FUNC_NAMES.with(|r| assert!(r.get().is_null())); + ACTIVE_AST_PROGRAM.with(|r| assert!(r.get().is_null())); + + let rc = ilo_aot_publish_program(bytes.as_ptr() as u64, bytes.len() as u64); + assert_eq!(rc, 0, "publish should return 0 on success"); + + ACTIVE_PROGRAM.with(|r| assert!(!r.get().is_null(), "ACTIVE_PROGRAM not published")); + ACTIVE_FUNC_NAMES.with(|r| assert!(!r.get().is_null(), "ACTIVE_FUNC_NAMES not published")); + ACTIVE_AST_PROGRAM + .with(|r| assert!(!r.get().is_null(), "ACTIVE_AST_PROGRAM not published")); + + let names_ptr = ACTIVE_FUNC_NAMES.with(|r| r.get()); + let names = unsafe { &*names_ptr }; + assert!(names.contains(&"add".to_string())); + assert!(names.contains(&"main".to_string())); + + // Cleanup so we don't leak state into other tests in the same process. + ilo_aot_fini(); + ACTIVE_PROGRAM.with(|r| assert!(r.get().is_null(), "fini should null ACTIVE_PROGRAM")); + ACTIVE_FUNC_NAMES + .with(|r| assert!(r.get().is_null(), "fini should null ACTIVE_FUNC_NAMES")); + ACTIVE_AST_PROGRAM + .with(|r| assert!(r.get().is_null(), "fini should null ACTIVE_AST_PROGRAM")); + } + + #[test] + fn publish_program_round_trips_chunks_and_registry() { + // Confirms the published CompiledProgram is structurally equivalent + // to the source: same chunk count, same func_names, same TLS + // pointers reachable. + let src = "sq x:n>n;*x x\nmain>L n;map (n:n>n;sq n) [1,2,3]"; + let prog = compile_simple(src); + let original_chunk_count = prog.chunks.len(); + let original_names = prog.func_names.clone(); + let bytes = serialize_program(&prog).expect("serialize"); + + ilo_aot_fini(); + let rc = ilo_aot_publish_program(bytes.as_ptr() as u64, bytes.len() as u64); + assert_eq!(rc, 0); + + let prog_ptr = ACTIVE_PROGRAM.with(|r| r.get()); + let pubd = unsafe { &*prog_ptr }; + assert_eq!(pubd.chunks.len(), original_chunk_count); + assert_eq!(pubd.func_names, original_names); + + ilo_aot_fini(); + } + + #[test] + fn blob_schema_version_is_stable() { + // Lock the schema version. Bumping it must be intentional - any + // change here must come with a corresponding deserialise-compat + // story (or an explicit "we only support v_n" cut-over). + assert_eq!(BLOB_SCHEMA_VERSION, 1); + } +}