From 745f57e1517acf280a34854d312f9e73c4628342 Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Thu, 21 May 2026 21:29:41 +0100 Subject: [PATCH 1/4] fix(vm): add B64Dec to tree_bridge_returns_result for ! unwrap b64-dec is tree-bridge eligible and returns R t t, but the matching entry in tree_bridge_returns_result was missing. The verifier accepts b64-dec! as Result-returning, but the VM compiler's tree- bridge dispatch then hits the assert at src/vm/mod.rs:1887 and panics with 'auto-unwrap on a non-Result tree-bridge builtin slipped past verify' before the program even starts running. The matching B64uDec entry has been there since the url-safe cluster landed; B64Dec was added to is_tree_bridge_eligible in the crypto cluster (PR #560) but never wired into the Result whitelist below it. Same fix as the b64u case: one extra match arm. Adds two regression tests in tests/regression_crypto_primitives.rs: b64_dec_bang_auto_unwrap_does_not_panic_vm pins the Ok-path round- trip cross-engine, and b64_dec_bang_propagates_err pins the Err-path (invalid input) so a future drop of B64Dec from the whitelist is caught at unit-test scope rather than waiting for the examples harness to repeat-fire across every -- run: entry. --- src/vm/mod.rs | 1 + tests/regression_crypto_primitives.rs | 46 +++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/src/vm/mod.rs b/src/vm/mod.rs index 14308fff..c029d69b 100644 --- a/src/vm/mod.rs +++ b/src/vm/mod.rs @@ -870,6 +870,7 @@ pub(crate) fn tree_bridge_returns_result(b: crate::builtins::Builtin) -> bool { | Builtin::Opt | Builtin::Urldec | Builtin::B64uDec + | Builtin::B64Dec | Builtin::TzOffset ) } diff --git a/tests/regression_crypto_primitives.rs b/tests/regression_crypto_primitives.rs index 990893dc..18e5a5f6 100644 --- a/tests/regression_crypto_primitives.rs +++ b/tests/regression_crypto_primitives.rs @@ -238,6 +238,52 @@ fn ct_eq_empty_strings() { } } +// ── Tree-bridge auto-unwrap invariant for b64-dec! ─────────────────────────── + +#[test] +fn b64_dec_bang_auto_unwrap_does_not_panic_vm() { + // Regression for the 0.12.x crypto-cluster panic where `b64-dec!` + // crashed the VM with "auto-unwrap on a non-Result tree-bridge builtin + // slipped past verify" at src/vm/mod.rs:1887. `b64-dec` returns R t t + // and is tree-bridge eligible, so it must also appear in + // `tree_bridge_returns_result` for the VM's `!` compiler arm to wire + // up OP_ISOK/OP_UNWRAP correctly. The matching tree-bridge / verifier + // pair for B64uDec was already correct; B64Dec was the regression. + // + // This test specifically pins the VM `!` path so a future drop of + // B64Dec from `tree_bridge_returns_result` is caught immediately + // instead of waiting for the `examples` harness to hit it. + let src = "f>t;b64-dec! \"Zm9vYmFy\""; + for e in ENGINES { + assert_eq!(run_ok(e, src, "f"), "foobar", "engine={e}"); + } +} + +#[test] +fn b64_dec_bang_propagates_err() { + // Companion to the panic-regression test above: the `!` operator + // propagates Err out to the caller (which we trigger as a non-zero + // exit), so an invalid input must still take the propagate path + // cleanly across engines, not panic. + let src = "f>t;b64-dec! \"!!!!\""; + for e in ENGINES { + let out = ilo() + .args([src, e, "f"]) + .output() + .expect("failed to run ilo"); + assert!( + !out.status.success(), + "engine={e}: expected non-zero exit from b64-dec! on invalid input" + ); + // Must NOT be the VM bridge-assert panic. + let stderr = String::from_utf8_lossy(&out.stderr); + assert!( + !stderr.contains("auto-unwrap on a non-Result tree-bridge builtin slipped past verify"), + "engine={e}: VM tree-bridge assert tripped; stderr={stderr}" + ); + } +} + // ── HMAC verification flow (the canonical use case) ────────────────────────── #[test] From fab95f454740dc7b2e0d9419712d06b9fed1d164 Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Thu, 21 May 2026 21:29:51 +0100 Subject: [PATCH 2/4] skills: bump per-module token caps to match 0.12.x growth The four ilo-builtins-* modules and ilo-agent have drifted over their 1000-token cap with the HTTP verb cluster (#5z), getx/pstx (#5bn), crypto primitives, calendar arithmetic, and the numeric/text prelude additions of 0.12.x. ilo-language has crept just over its 1500-cap on the back of triple-quoted string docs and a few clarifications. Each cap is set with ~10% headroom over current weight so a single follow-on doc-pair addition does not flip CI red. The aggregate TOTAL_LIMIT goes from 15000 to 16000, well above current total 11239. The split-by-category design holds - typical task load is still 1-2 modules at ~2-3 KB each. The bumps reflect deliberate surface-area growth in 0.12.x and the comment is updated to say so. --- scripts/check-skill-tokens.py | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) diff --git a/scripts/check-skill-tokens.py b/scripts/check-skill-tokens.py index 4f2a44b5..127dae3d 100755 --- a/scripts/check-skill-tokens.py +++ b/scripts/check-skill-tokens.py @@ -45,14 +45,21 @@ PER_MODULE_LIMIT = 1000 # `ilo-language` is the foundational module every agent loads first; it # carries a higher cap because core syntax doesn't split cleanly into -# smaller files. `ilo-builtins-io` is the next most-touched module — -# HTTP, JSON, env, time, and process all live there; agent dogfooding -# hits this cap on every other doc PR. Bumped to match its density. +# smaller files. The four `ilo-builtins-*` modules and `ilo-agent` carry +# bumped caps reflecting accumulated growth across the HTTP verb cluster +# (#5z), getx/pstx (#5bn), crypto primitives, calendar arithmetic, and +# the numeric/text prelude additions of 0.12.x. Each cap is set with +# ~10% headroom over current weight so a single doc-pair addition does +# not flip CI red. PER_MODULE_OVERRIDES = { - "ilo-language": 1500, - "ilo-builtins-io": 1500, + "ilo-language": 1700, + "ilo-builtins-core": 1200, + "ilo-builtins-math": 1200, + "ilo-builtins-io": 2000, + "ilo-builtins-text": 1200, + "ilo-agent": 1300, } -TOTAL_LIMIT = 15000 +TOTAL_LIMIT = 16000 def main() -> int: From cc2afefc519c1fa815a982268eae993388248d5d Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Thu, 21 May 2026 21:29:56 +0100 Subject: [PATCH 3/4] docs: list b64 and hex in the 3-char reserved short-names table PR #560 added the crypto cluster (sha256, hmac-sha256, b64, b64-dec, hex, ct-eq) but did not update the SPEC.md reserved-namespaces table to include the two new 3-char short names. regression_reserved_names_doc catches this drift but was masked behind the examples/crypto-primitives panic in CI fail-fast ordering. ai.txt regenerated by build.rs. --- SPEC.md | 8 ++++---- ai.txt | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/SPEC.md b/SPEC.md index 6e277fdd..5f13ecf9 100644 --- a/SPEC.md +++ b/SPEC.md @@ -224,10 +224,10 @@ Short builtin names are precious surface and ilo reserves a stable subset of the ``` 1-char e 2-char at hd pi tl rd wr ct -3-char abs avg cap cat cel chr cos del det dot env ewm exp fft fld flr flt - fmt frq get grp has hed inv len log lsd lst lwr map max min mod now - num opt ord pat pow pst put rdb rdl rep rev rgx rng rnd rou run sin - slc spl srt str sum tan tau trm unq upr wra wrl zip +3-char abs avg b64 cap cat cel chr cos del det dot env ewm exp fft fld flr + flt fmt frq get grp has hed hex inv len log lsd lst lwr map max min + mod now num opt ord pat pow pst put rdb rdl rep rev rgx rng rnd rou + run sin slc spl srt str sum tan tau trm unq upr wra wrl zip ``` All builtin aliases (`head`, `length`, `filter`, `concat`, `tail`, `sort`, `reverse`, `flatten`, `contains`, `group`, `average`, `print`, `trim`, `split`, `format`, `regex`, `read`, `readlines`, `readbuf`, `write`, `writelines`, `lset`, `floor`, `ceil`, `round`, `rand`, `random`, `rng`, `string`, `number`, `slice`, `unique`, `fold`) are reserved with the same shadow-prevention semantics as canonical builtin names. Binding an alias name or using it as a user-function name fires `ILO-P011` at parse time with the canonical form in the diagnostic, since the call-site rewrite to the canonical builtin silently bypasses any user binding of the same name. Previously only `rng` and `rand` had individual guards; as of 0.12.1 every alias in the table above is covered by a single `resolve_alias` check, so new aliases automatically inherit the protection when added to the table. diff --git a/ai.txt b/ai.txt index b7f47141..0212541b 100644 --- a/ai.txt +++ b/ai.txt @@ -2,7 +2,7 @@ INTRO: ilo is a token-optimised programming language for AI agents. Every design FILE VERSION PRAGMA: Optional. ^26.5 -- rest of file Top-of-file declaration of the minimum required runtime. First line, no leading whitespace. Sigil-led (principle 4), ~3 tokens (principle 1). First-class syntax, not a magic comment - the lexer recognises `^` only at file start, so `^` elsewhere keeps its `return err` meaning. Pragma absent=Assume latest installed runtime, no diagnostic File targets older than runtime, breaking change between=Fail with migration pointer File targets newer than runtime=Fail asking to upgrade Tooling: `ilo --version-of ` reads the pragma (returns nothing when absent); the formatter canonicalises position when present, never inserts one. Ships with the CalVer cut; 0.x files have no pragma and verify silently. FUNCTIONS: : ...>; No parens around params - `>` separates params from return type `;` separates statements - no newlines required Last expression is the return value (no `return` keyword) Zero-arg call: `make-id()` tot p:n q:n r:n>n;s=*p q;t=*s r;+s t TYPES: `n`=number (f64) `t`=text (string) `b`=bool `_`=any/unknown (wildcard type) `L n`=list of number `R n t`=result: ok=number, err=text `O n`=optional number (nil or n) `M t n`=map from text keys to numbers `S red green blue`=sum type - one of named text variants `F n t`=function type: takes n, returns t (used in HOF params) `order`=named type `a`=type variable - any single lowercase letter except n, t, b [Optional (`O T`)] `O T` accepts either `nil` or a value of type `T`. f x:O n>n;??x 0 -- unwrap optional or default to 0 g>O n;nil -- returns nil (valid O n) h>O n;42 -- returns 42 (valid O n) `??x default` - nil-coalesce: returns `x` if non-nil, else `default`. Unwraps `O T` to `T`. [Sum types (`S a b c`)] Closed set of named text variants. Verifier-enforced; runtime value is always `t`. color x:S red green blue > t ?x{red:"ff0000";green:"00ff00";blue:"0000ff"} Sum types are compatible with `t` - a sum value can be passed to any `t` parameter. [Map type (`M k v`)] Dynamic key-value collection. Keys are typed: text (`t`) or integer (`n`). `Int(1)` and `Text("1")` are distinct keys. mmap -- empty map mset m k v -- return new map with key k set to v mget m k -- value at key k, or nil mget-or m k default -- value at key k, or default if missing (never nil) mhas m k -- b: true if key exists mkeys m -- L t: sorted list of keys mvals m -- L v: values sorted by key mpairs m -- L (L _): sorted [k, v] pairs; mpairs m == zip (mkeys m) (mvals m) mdel m k -- return new map with key k removed len m -- number of entries Numeric keys work directly - no `str` conversion needed. Float keys floor to `i64` at the builtin boundary (matching `at xs i`); NaN/Infinity raise at runtime. idx=mmap idx=mset idx 7 "seven" -- M n t, integer key mget idx 7 -- "seven" mhas idx 7 -- true mhas idx "7" -- false (Int and Text are distinct) `jdmp` stringifies numeric keys for JSON output (JSON object keys are always strings). The round-trip via `jpar` is lossy - numeric keys come back as text. Example: scores>M t n m=mmap m=mset m "alice" 99 m=mset m "bob" 87 mget m "alice" -- 99 [Type variables] A single lowercase letter (other than `n`, `t`, `b`) in type position is a type variable, treated as `unknown` during verification. Used for higher-order function signatures: identity x:a>a;x apply f:F a a x:a>a;f x Type variables provide weak generics - the verifier accepts any type for `a` without consistency checking across call sites. [Inline lambdas] Pass a function literal directly to a HOF instead of defining a one-off top-level helper: by-dist xs:L n>L n;srt (x:n>n;abs x) xs nonempty ws:L t>L t;flt (s:t>b;>(len s) 0) ws sumsq xs:L n>n;fld (a:n x:n>n;+a *x x) xs 0 Syntax: `(: ...>;)`. Same shape as a top-level function declaration, wrapped in parens, no name. **Phase 1 (no captures)** lifts the literal to a synthetic top-level decl and works across every engine (tree, VM, Cranelift JIT, AOT). The body's free variables must all be params, locals defined inside the lambda body, or known top-level fns. **Phase 2 (closure capture)** lets the body reference variables from the enclosing scope: f xs:L n thr:n>L n;flt (x:n>b;>x thr) xs -- captures `thr` Phase 2 captures run natively on every engine: the tree interpreter, the register VM, the Cranelift JIT, and the Cranelift AOT backend. Each free variable is snapshot by value at the call site (`Expr::MakeClosure`) and appended to the call frame's arg slice on dispatch. The AOT backend additionally embeds the postcard-serialised `CompiledProgram` into the binary's `.rodata` and publishes TLS pointers on startup, so dispatch helpers can re-enter the VM on user-fn callbacks. The ctx-arg form (`srt fn ctx xs`) remains the cross-engine alternative when you want explicit state without forming a closure. -NAMING: Short names everywhere. 1–3 chars. `order`=`ord`=truncate `customers`=`cs`=consonants `data`=`d`=single letter `level`=`lv`=drop vowels `discount`=`dc`=initials `final`=`fin`=first 3 `items`=`its`=first 3 Function names follow the same rules. Field names in constructors and external tool names keep their full form - they define the public interface. [Identifier syntax] Identifiers are lowercase ASCII only, optionally with hyphenated segments. Formally: `[a-z][a-z0-9]*(-[a-z0-9]+)*`. Capital letters and underscores are rejected at the binding and call site. run -- OK run-d -- OK (hyphen separates segments) r2 -- OK (digit after first letter) runD -- ERROR (capital letter) RunD -- ERROR (leading capital) run_d -- ERROR (underscore not allowed in bindings) -run -- ERROR (must start with a letter) `runD` in the interactive CLI surfaces as `ILO-L003 unexpected token` with a suggestion to use `run-d` or `rund`. The constraint is intentional: a single lexical shape per identifier keeps the token stream predictable for agents and avoids style debates over camelCase vs snake_case vs kebab-case. The only place capital letters and underscores are accepted is **after `.` or `.?`** at field-access position, so heterogeneous JSON keys from real APIs work without rewriting. See [Field names at dot-access](#field-names-at-dot-access) for the full list of post-dot relaxations (`r.URL`, `r.AccessKey`, `r.user_name`, etc.). Binding names (`AccessKey = ...`) and function names (`AccessKey x:n>n;...`) still error. [Reserved words] The following identifiers are reserved and cannot be used as names: `if`, `return`, `let`, `fn`, `def`, `var`, `const`. Using them produces a friendly error with the ilo equivalent: -- ERROR: `if` is a reserved word. Use: ?cond{true:...;false:...} -- ERROR: `return` is a reserved word. Last expression is the return value. -- ERROR: `let` is a reserved word. Use: name = expr -- ERROR: `fn`/`def` is a reserved word. Use: name param:type > rettype; body These checks fire at parse time across every context the keyword can appear in: top-level declaration head (`fn>n;...`), binding LHS (`fn=5`), and **parameter position** (`g fn:n>n;fn` rejects with ILO-P011 against the param name, not a cryptic ILO-P003 against the missing `>`). Builtin names (`flat`, `frq`, `map`, `flt`, `cat`, `len`, `srt`, `hd`, `tl`, `ord`, `fld`, `lst`, ...) are also rejected as user-function names and as local-binding LHS. Without this, calls to the user fn or use sites of the local binding silently mis-dispatch to the builtin and surface as a confusing `ILO-T006` arity mismatch. The parser intercepts at the declaration site with ILO-P011 and a rename hint: flat n:n>n;n -- ERROR ILO-P011: `flat` is a builtin and cannot be used as a function name -- hint: rename to something like `myflat` or `flatof`. main>n;flat=cat xs " ";spl flat ". " -- ERROR ILO-P011: `flat` is a builtin and cannot be used as a binding name -- hint: rename to something like `myflat` or `flatv`. [Reserved namespaces] Short builtin names are precious surface and ilo reserves a stable subset of them. To save agents (and their carry-forward scripts) from "what got reserved this release?" debugging cycles, the language publishes the full short-name reserve list plus a forward-compatibility rule for future builtins. **Currently reserved short names (1-3 characters).** Every name in this list is a builtin today and triggers `ILO-P011` if used as a binding or user-function name: 1-char e 2-char at hd pi tl rd wr ct 3-char abs avg cap cat cel chr cos del det dot env ewm exp fft fld flr flt fmt frq get grp has hed inv len log lsd lst lwr map max min mod now num opt ord pat pow pst put rdb rdl rep rev rgx rng rnd rou run sin slc spl srt str sum tan tau trm unq upr wra wrl zip All builtin aliases (`head`, `length`, `filter`, `concat`, `tail`, `sort`, `reverse`, `flatten`, `contains`, `group`, `average`, `print`, `trim`, `split`, `format`, `regex`, `read`, `readlines`, `readbuf`, `write`, `writelines`, `lset`, `floor`, `ceil`, `round`, `rand`, `random`, `rng`, `string`, `number`, `slice`, `unique`, `fold`) are reserved with the same shadow-prevention semantics as canonical builtin names. Binding an alias name or using it as a user-function name fires `ILO-P011` at parse time with the canonical form in the diagnostic, since the call-site rewrite to the canonical builtin silently bypasses any user binding of the same name. Previously only `rng` and `rand` had individual guards; as of 0.12.1 every alias in the table above is covered by a single `resolve_alias` check, so new aliases automatically inherit the protection when added to the table. Longer builtin names (`acos`, `asin`, `atan`, `flat`, `take`, `drop`, `mget`, `mset`, `mmap`, `prnt`, `mapr`, `solve`, `lstsq`, `clamp`, `cumsum`, `cprod`, `median`, `matmul`, `range`, `window`, `chunks`, `walk`, `glob`, `prod`, `fsize`, `mtime`, `isfile`, `isdir`, …) are also reserved and rejected by `ILO-P011`, but the short-name namespace above is where carry-forward scripts most often collide, so it gets explicit enumeration. Longer builtin names (`acos`, `asin`, `atan`, `flat`, `take`, `drop`, `mget`, `mset`, `mmap`, `prnt`, `mapr`, `solve`, `clamp`, `cumsum`, `cprod`, `median`, `matmul`, `range`, `window`, `chunks`, `walk`, `glob`, `prod`, `fsize`, `mtime`, `isfile`, `isdir`, `ones`, `linspace`, …) are also reserved and rejected by `ILO-P011`, but the short-name namespace above is where carry-forward scripts most often collide, so it gets explicit enumeration. **Forward-compatibility rule.** Future ilo releases add new builtins under names **4 characters or longer**. A 2-character name that is not on this list today is safe to use as a binding or function name and stays safe across releases. A 3-character name that is not on this list is _highly likely_ to stay safe but is not a hard promise - the 3-char surface is already dense, and a rare ergonomic win may justify an addition, called out in the changelog. This gives agents a deterministic safe-name strategy: **2 chars**: any unreserved 2-char name is permanently fine for bindings (`ce` for "category", `ix` for index, `mn` for "mean", `pq` for "priority queue", …). Names on the reserved list above never get removed. **3 chars**: prefer unreserved 3-char names where possible. If a future release reserves one, the migration is a 1-character rename plus a changelog entry. **4+ chars**: always safe. New builtins land here first; any short alias is added later only if the long name is unambiguous and the short doesn't shadow a plausible user binding. When a collision does happen, `ILO-P011` surfaces it at the binding site with a rename suggestion - never silently mis-dispatches at the call site (see the `flat=cat xs " "` example above). Combined with the reserve list, that turns every name-collision incident into a single-character rename instead of a debugging spiral. [Cross-language gotchas] Common shapes reached for from other languages. The parser and lexer surface each with a friendly hint: `AND a b`, `OR a b`, `NOT a`=`&a b`, `|a b`, `!a`=`ILO-L001` `=a b`=`<=a b`, `>=a b` (single token)=`ILO-P003` `f=fn x:n>n;+x 1` (lambda)=`(x:n>n;+x 1)` (parenthesised lambda)=`ILO-P009` `\x{+x 1}` (Haskell/Rust lambda)=`(x:n>n;+x 1)` (parenthesised lambda)=`ILO-L001` `main:>n;body`=`main>n;body` (no `:` before `>`)=`ILO-P003` Multi-line body without braces=`@k xs{body}`, `cond{body}` on one line=`ILO-P003` `cond{^"err"}` braced-cond=Braceless `cond ^"err"` for early return=hint only `- -*a b *c d` (double-minus)=`- 0 +*a b *c d` (negate the sum)=`ILO-P021` `[k fmt2 v 2]` (call in list)=`[k (fmt2 v 2)]` or bind-first=`ILO-P101` `pts=gen-pts;cs0=[...];prnt cs0` at top level=`main>_;pts=gen-pts;cs0=[...];prnt cs0` (wrap in `main>_;`)=`ILO-P102` `((((...((1+1))))...))` 1000 deep=bind intermediates, or pass `--max-ast-depth N`=`ILO-P103` `dx=xj 0-xi` (call vs binop)=`-xj xi` or pre-bind: `nxi=0-xi;+xj nxi`=`ILO-T005` `tup.0` / `pair.0` (tuple access)=bind from `zip`-pair, then `at pair 0` (no tuple type)=`ILO-T004` Each case fires a hint pointing at the canonical form; the agent's first retry should be the right one. Identifier-shaped collisions with builtin names (`len=...`, `sin=...`) are rejected with `ILO-P011` plus a rename suggestion. The list-literal call trap (`ILO-P101`) catches the case where a variadic builtin (`fmt`, `fmt2`) appears bare inside `[...]`. Fixed-arity builtins (`str`, `at`, `map`, ...) auto-expand to a call as one element, but variadic ones can't (the parser doesn't know where their args end), so the bare form would silently fall through as multiple elements with the builtin name as an undefined Ref. Fix by wrapping the call in parens (`[k (fmt2 v 2)]`) or binding first. The top-level chain trap (`ILO-P102`) catches a bare `name=expr` at the top level. ilo requires every binding to live inside a function body; a top-level `pts=gen-pts;cs0=[[...]]; ...; prnt cs2` without a `main>_;` (or any) header used to either die on the `=` (a bare `ILO-P003`) or get slurped into a previous function's body and emit a wall of misleading `ILO-T005` cascades on the wrong line. `ILO-P102` collapses both shapes into a single diagnostic that names the offending binding and suggests the canonical `main>_;` wrapper. The double-minus trap (`ILO-P021`) catches the silent-miscompile shape `- - a b c d` for `` in `{+,*,/}`. Read intuitively as `-(a*b) - (c*d)` but parses as `-((a*b) - (c*d)) = -(a*b) + (c*d)` because the inner `-` greedily consumes both prefix-binop groups as binary subtract and the outer `-` falls back to unary negate. Fix by negating the sum (`- 0 +*a b *c d`) or binding first (`p=*a b;q=*c d;- 0 +p q`). Single-atom variants like `- -a b` remain accepted since they're unambiguous. The call-vs-binop trap (`ILO-T005` with tailored hint) catches the assignment-RHS shape `name expr` where `name` is a bound non-fn value (typically a parameter). Whitespace-juxtaposition is the call syntax in ilo, so `dx=xj 0-xi` parses as `dx=(xj 0)-xi` — a call to `xj` with argument `0`. Verification fails because `xj` isn't a function. The hint surfaces the prefix-operator alternatives (`-xj xi`, `+xj `) and the pre-bind workaround. The misparse is most common when an agent reaches for infix arithmetic between a parameter and a subexpression; pre-binding the operand always resolves the ambiguity. `ilo --explain ILO-T005` includes the full gotcha walkthrough. The tuple-access trap (`ILO-T004` with the `at ` hint) catches `tup.0` / `pair.0` shapes where `tup` / `pair` was never bound. ilo has no tuple type. `zip xs ys` returns `L (L n)` — a list of two-element lists — so destructuring a pair is `at pair 0` / `at pair 1`, not `pair.0` / `pair.1`. The hint names the exact `at` call to write. (`pair.0` itself is still valid sugar for list indexing once `pair` is bound to an `L T`; the diagnostic only fires when the identifier is unbound.) The AST depth cap (`ILO-P103`) catches deeply nested source that would otherwise blow the parser stack. Any context that compiles untrusted text - `ilo serv`, the bare-positional dispatch, the `--ast` dump - is exposed to a payload of the shape `((((...((1+1))))...))` 1000 levels deep that recurses straight through the OS thread stack. The default cap of 256 is far above anything hand-written (the in-tree examples top out under 20) and low enough to keep the worst-case stack frame in `parse_atom`/`parse_expr` inside the default 8 MB main-thread stack. Override with `--max-ast-depth N` on `ilo`, `ilo run`, `ilo check`, `ilo build`, and `ilo serv` when a legitimate program needs deeper nesting. +NAMING: Short names everywhere. 1–3 chars. `order`=`ord`=truncate `customers`=`cs`=consonants `data`=`d`=single letter `level`=`lv`=drop vowels `discount`=`dc`=initials `final`=`fin`=first 3 `items`=`its`=first 3 Function names follow the same rules. Field names in constructors and external tool names keep their full form - they define the public interface. [Identifier syntax] Identifiers are lowercase ASCII only, optionally with hyphenated segments. Formally: `[a-z][a-z0-9]*(-[a-z0-9]+)*`. Capital letters and underscores are rejected at the binding and call site. run -- OK run-d -- OK (hyphen separates segments) r2 -- OK (digit after first letter) runD -- ERROR (capital letter) RunD -- ERROR (leading capital) run_d -- ERROR (underscore not allowed in bindings) -run -- ERROR (must start with a letter) `runD` in the interactive CLI surfaces as `ILO-L003 unexpected token` with a suggestion to use `run-d` or `rund`. The constraint is intentional: a single lexical shape per identifier keeps the token stream predictable for agents and avoids style debates over camelCase vs snake_case vs kebab-case. The only place capital letters and underscores are accepted is **after `.` or `.?`** at field-access position, so heterogeneous JSON keys from real APIs work without rewriting. See [Field names at dot-access](#field-names-at-dot-access) for the full list of post-dot relaxations (`r.URL`, `r.AccessKey`, `r.user_name`, etc.). Binding names (`AccessKey = ...`) and function names (`AccessKey x:n>n;...`) still error. [Reserved words] The following identifiers are reserved and cannot be used as names: `if`, `return`, `let`, `fn`, `def`, `var`, `const`. Using them produces a friendly error with the ilo equivalent: -- ERROR: `if` is a reserved word. Use: ?cond{true:...;false:...} -- ERROR: `return` is a reserved word. Last expression is the return value. -- ERROR: `let` is a reserved word. Use: name = expr -- ERROR: `fn`/`def` is a reserved word. Use: name param:type > rettype; body These checks fire at parse time across every context the keyword can appear in: top-level declaration head (`fn>n;...`), binding LHS (`fn=5`), and **parameter position** (`g fn:n>n;fn` rejects with ILO-P011 against the param name, not a cryptic ILO-P003 against the missing `>`). Builtin names (`flat`, `frq`, `map`, `flt`, `cat`, `len`, `srt`, `hd`, `tl`, `ord`, `fld`, `lst`, ...) are also rejected as user-function names and as local-binding LHS. Without this, calls to the user fn or use sites of the local binding silently mis-dispatch to the builtin and surface as a confusing `ILO-T006` arity mismatch. The parser intercepts at the declaration site with ILO-P011 and a rename hint: flat n:n>n;n -- ERROR ILO-P011: `flat` is a builtin and cannot be used as a function name -- hint: rename to something like `myflat` or `flatof`. main>n;flat=cat xs " ";spl flat ". " -- ERROR ILO-P011: `flat` is a builtin and cannot be used as a binding name -- hint: rename to something like `myflat` or `flatv`. [Reserved namespaces] Short builtin names are precious surface and ilo reserves a stable subset of them. To save agents (and their carry-forward scripts) from "what got reserved this release?" debugging cycles, the language publishes the full short-name reserve list plus a forward-compatibility rule for future builtins. **Currently reserved short names (1-3 characters).** Every name in this list is a builtin today and triggers `ILO-P011` if used as a binding or user-function name: 1-char e 2-char at hd pi tl rd wr ct 3-char abs avg b64 cap cat cel chr cos del det dot env ewm exp fft fld flr flt fmt frq get grp has hed hex inv len log lsd lst lwr map max min mod now num opt ord pat pow pst put rdb rdl rep rev rgx rng rnd rou run sin slc spl srt str sum tan tau trm unq upr wra wrl zip All builtin aliases (`head`, `length`, `filter`, `concat`, `tail`, `sort`, `reverse`, `flatten`, `contains`, `group`, `average`, `print`, `trim`, `split`, `format`, `regex`, `read`, `readlines`, `readbuf`, `write`, `writelines`, `lset`, `floor`, `ceil`, `round`, `rand`, `random`, `rng`, `string`, `number`, `slice`, `unique`, `fold`) are reserved with the same shadow-prevention semantics as canonical builtin names. Binding an alias name or using it as a user-function name fires `ILO-P011` at parse time with the canonical form in the diagnostic, since the call-site rewrite to the canonical builtin silently bypasses any user binding of the same name. Previously only `rng` and `rand` had individual guards; as of 0.12.1 every alias in the table above is covered by a single `resolve_alias` check, so new aliases automatically inherit the protection when added to the table. Longer builtin names (`acos`, `asin`, `atan`, `flat`, `take`, `drop`, `mget`, `mset`, `mmap`, `prnt`, `mapr`, `solve`, `lstsq`, `clamp`, `cumsum`, `cprod`, `median`, `matmul`, `range`, `window`, `chunks`, `walk`, `glob`, `prod`, `fsize`, `mtime`, `isfile`, `isdir`, …) are also reserved and rejected by `ILO-P011`, but the short-name namespace above is where carry-forward scripts most often collide, so it gets explicit enumeration. Longer builtin names (`acos`, `asin`, `atan`, `flat`, `take`, `drop`, `mget`, `mset`, `mmap`, `prnt`, `mapr`, `solve`, `clamp`, `cumsum`, `cprod`, `median`, `matmul`, `range`, `window`, `chunks`, `walk`, `glob`, `prod`, `fsize`, `mtime`, `isfile`, `isdir`, `ones`, `linspace`, …) are also reserved and rejected by `ILO-P011`, but the short-name namespace above is where carry-forward scripts most often collide, so it gets explicit enumeration. **Forward-compatibility rule.** Future ilo releases add new builtins under names **4 characters or longer**. A 2-character name that is not on this list today is safe to use as a binding or function name and stays safe across releases. A 3-character name that is not on this list is _highly likely_ to stay safe but is not a hard promise - the 3-char surface is already dense, and a rare ergonomic win may justify an addition, called out in the changelog. This gives agents a deterministic safe-name strategy: **2 chars**: any unreserved 2-char name is permanently fine for bindings (`ce` for "category", `ix` for index, `mn` for "mean", `pq` for "priority queue", …). Names on the reserved list above never get removed. **3 chars**: prefer unreserved 3-char names where possible. If a future release reserves one, the migration is a 1-character rename plus a changelog entry. **4+ chars**: always safe. New builtins land here first; any short alias is added later only if the long name is unambiguous and the short doesn't shadow a plausible user binding. When a collision does happen, `ILO-P011` surfaces it at the binding site with a rename suggestion - never silently mis-dispatches at the call site (see the `flat=cat xs " "` example above). Combined with the reserve list, that turns every name-collision incident into a single-character rename instead of a debugging spiral. [Cross-language gotchas] Common shapes reached for from other languages. The parser and lexer surface each with a friendly hint: `AND a b`, `OR a b`, `NOT a`=`&a b`, `|a b`, `!a`=`ILO-L001` `=a b`=`<=a b`, `>=a b` (single token)=`ILO-P003` `f=fn x:n>n;+x 1` (lambda)=`(x:n>n;+x 1)` (parenthesised lambda)=`ILO-P009` `\x{+x 1}` (Haskell/Rust lambda)=`(x:n>n;+x 1)` (parenthesised lambda)=`ILO-L001` `main:>n;body`=`main>n;body` (no `:` before `>`)=`ILO-P003` Multi-line body without braces=`@k xs{body}`, `cond{body}` on one line=`ILO-P003` `cond{^"err"}` braced-cond=Braceless `cond ^"err"` for early return=hint only `- -*a b *c d` (double-minus)=`- 0 +*a b *c d` (negate the sum)=`ILO-P021` `[k fmt2 v 2]` (call in list)=`[k (fmt2 v 2)]` or bind-first=`ILO-P101` `pts=gen-pts;cs0=[...];prnt cs0` at top level=`main>_;pts=gen-pts;cs0=[...];prnt cs0` (wrap in `main>_;`)=`ILO-P102` `((((...((1+1))))...))` 1000 deep=bind intermediates, or pass `--max-ast-depth N`=`ILO-P103` `dx=xj 0-xi` (call vs binop)=`-xj xi` or pre-bind: `nxi=0-xi;+xj nxi`=`ILO-T005` `tup.0` / `pair.0` (tuple access)=bind from `zip`-pair, then `at pair 0` (no tuple type)=`ILO-T004` Each case fires a hint pointing at the canonical form; the agent's first retry should be the right one. Identifier-shaped collisions with builtin names (`len=...`, `sin=...`) are rejected with `ILO-P011` plus a rename suggestion. The list-literal call trap (`ILO-P101`) catches the case where a variadic builtin (`fmt`, `fmt2`) appears bare inside `[...]`. Fixed-arity builtins (`str`, `at`, `map`, ...) auto-expand to a call as one element, but variadic ones can't (the parser doesn't know where their args end), so the bare form would silently fall through as multiple elements with the builtin name as an undefined Ref. Fix by wrapping the call in parens (`[k (fmt2 v 2)]`) or binding first. The top-level chain trap (`ILO-P102`) catches a bare `name=expr` at the top level. ilo requires every binding to live inside a function body; a top-level `pts=gen-pts;cs0=[[...]]; ...; prnt cs2` without a `main>_;` (or any) header used to either die on the `=` (a bare `ILO-P003`) or get slurped into a previous function's body and emit a wall of misleading `ILO-T005` cascades on the wrong line. `ILO-P102` collapses both shapes into a single diagnostic that names the offending binding and suggests the canonical `main>_;` wrapper. The double-minus trap (`ILO-P021`) catches the silent-miscompile shape `- - a b c d` for `` in `{+,*,/}`. Read intuitively as `-(a*b) - (c*d)` but parses as `-((a*b) - (c*d)) = -(a*b) + (c*d)` because the inner `-` greedily consumes both prefix-binop groups as binary subtract and the outer `-` falls back to unary negate. Fix by negating the sum (`- 0 +*a b *c d`) or binding first (`p=*a b;q=*c d;- 0 +p q`). Single-atom variants like `- -a b` remain accepted since they're unambiguous. The call-vs-binop trap (`ILO-T005` with tailored hint) catches the assignment-RHS shape `name expr` where `name` is a bound non-fn value (typically a parameter). Whitespace-juxtaposition is the call syntax in ilo, so `dx=xj 0-xi` parses as `dx=(xj 0)-xi` — a call to `xj` with argument `0`. Verification fails because `xj` isn't a function. The hint surfaces the prefix-operator alternatives (`-xj xi`, `+xj `) and the pre-bind workaround. The misparse is most common when an agent reaches for infix arithmetic between a parameter and a subexpression; pre-binding the operand always resolves the ambiguity. `ilo --explain ILO-T005` includes the full gotcha walkthrough. The tuple-access trap (`ILO-T004` with the `at ` hint) catches `tup.0` / `pair.0` shapes where `tup` / `pair` was never bound. ilo has no tuple type. `zip xs ys` returns `L (L n)` — a list of two-element lists — so destructuring a pair is `at pair 0` / `at pair 1`, not `pair.0` / `pair.1`. The hint names the exact `at` call to write. (`pair.0` itself is still valid sugar for list indexing once `pair` is bound to an `L T`; the diagnostic only fires when the identifier is unbound.) The AST depth cap (`ILO-P103`) catches deeply nested source that would otherwise blow the parser stack. Any context that compiles untrusted text - `ilo serv`, the bare-positional dispatch, the `--ast` dump - is exposed to a payload of the shape `((((...((1+1))))...))` 1000 levels deep that recurses straight through the OS thread stack. The default cap of 256 is far above anything hand-written (the in-tree examples top out under 20) and low enough to keep the worst-case stack frame in `parse_atom`/`parse_expr` inside the default 8 MB main-thread stack. Override with `--max-ast-depth N` on `ilo`, `ilo run`, `ilo check`, `ilo build`, and `ilo serv` when a legitimate program needs deeper nesting. COMMENTS: -- full line comment +a b -- end of line comment -- no multi-line comments; use consecutive -- lines -- like this Single-line only. `--` to end of line. No multi-line comment syntax - newlines are a human display concern, not a language concern. An entire ilo program can be one line. Use consecutive `--` lines when humans need multi-line comments. Stripped at the lexer level before parsing - comments produce no AST nodes and cost zero runtime tokens. Generating `--` costs 1 LLM token, so comments are essentially free. **Gotcha:** `--x 1` is a comment, not "negate (x minus 1)". The lexer matches `--` greedily as a comment and eats the rest of the line. To negate a subtraction, use a space or bind first: -- DON'T: --x 1 (comment, not negate-subtract) -- DO: - -x 1 (space separates the two minus operators) -- DO: r=-x 1;-r (bind first) OPERATORS: Both prefix and infix notation are supported. **Prefix is preferred** - it is the token-optimal form that eliminates parentheses and produces denser code. Infix is available for readability when needed. [Binary] `+a b`=`a + b`=add / concat / list concat=`n`, `t`, `L` `+=a v`=append to list (returns new list, see [Append semantics](#append-semantics-+=))=`L` `-a b`=`a - b`=subtract=`n` `*a b`=`a * b`=multiply=`n` `/a b`=`a / b`=divide=`n` `=a b`=`a == b`=equal (prefix `=` is preferred; `==a b` also accepted)=any `!=a b`=`a != b`=not equal=any `>a b`=`a > b`=greater than=`n`, `t` `=a b`=`a >= b`=greater or equal=`n`, `t` `<=a b`=`a <= b`=less or equal=`n`, `t` `&a b`=`a & b`=logical AND (short-circuit)=any (truthy) `|a b`=`a | b`=logical OR (short-circuit)=any (truthy) [Append semantics (`+=`)] `+=xs v` is **pure-shaped**, despite the imperative-looking syntax. It returns a new list with `v` appended and does **not** mutate `xs` in the caller's scope. It works in every position a value-producing expression works: -- 1. Rebind (canonical accumulator pattern) xs=[];@i 0..3{xs=+=xs i};xs -- [0, 1, 2] -- 2. Non-rebind assignment (xs preserved) xs=[1, 2, 3];ys=+=xs 99 -- xs is still [1, 2, 3]; ys is [1, 2, 3, 99] -- 3. Pipeline / argument position len +=xs 99 -- length of [xs..., 99] sum +=xs 99 -- sum of [xs..., 99] The rebind shape `xs = +=xs v` is the standard foreach-build accumulator. When the binding is RC=1 the engines mutate the underlying buffer in place (amortised O(1) per push) - but this is a behind-the-scenes optimisation. To any observer the operation is still functional: nothing outside the rebind sees the old `xs`. The non-rebind shape `ys = +=xs v` always allocates a fresh list and leaves `xs` untouched, so source aliases are safe. There is no separate `push` builtin. `+=` covers every use case and is shorter; adding an alias would mean two ways to spell the same operation, costing reasoning tokens and surface area. [Unary] `-x`=negate=`n` `!x`=logical NOT=any (truthy) [Special infix] `a??b`=nil-coalesce (if a is nil, return b)=any `a>>f`=pipe (desugar to `f(a)`)=any **`??` precedence.** Infix `??` is parsed by `maybe_nil_coalesce` after the primary expression — it binds **looser than every arithmetic, comparison, and boolean operator**, and tighter than `>>` (pipe). So `c??0+1` is `c ?? (0+1)`, not `(c??0) + 1`. Prefix `??x default` mirrors the infix form: the default slot is a full expression, exactly like the right operand of any other prefix binop. This means **`??` inside a prefix-binop chain follows the standard prefix-binop rule**: the outer op consumes its left atom, and `??` then binds the next atom as its value and the rest as its default. To get `(a ?? d) + b` you must bind first or wrap in parens: +a ??d b -- = a + (d ?? b) ← parses as prefix `??d b` +(a??d) b -- = (a ?? d) + b ← parens force the grouping x=a??d;+x b -- = (a ?? d) + b ← bind-first, manifesto-preferred The same shape applies to every prefix binop (`-a ??d b`, `*x ??y z`, `>p ??d r`, etc.). The grouping is consistent with `+a *b c` = `a + (b*c)` — a prefix op in the right-operand slot consumes its own operands greedily. The trap is that `??` reads visually like it should be sticky to the preceding atom; it isn't. When the LHS of `??` is the value being defaulted, bind first or wrap in parens. The analogous shape with the boolean operators (`+a |0 b`, `*a &1 b`) parses the same way, but those produce a type error at verify time (`+` / `*` on a bool result), so they fail loudly rather than silently miscompiling. The `??` shape is the dangerous one: both sides of `??` can be `n`, so the parse silently produces the wrong arithmetic. [Prefix nesting (no parens needed)] +*a b c -- (a * b) + c *a +b c -- a * (b + c) >=+x y 100 -- (x + y) >= 100 -*a b *c d -- (a * b) - (c * d) +a ??c 0 -- a + (c ?? 0) ← not (a ?? 0) + c *x ??y 1 -- x * (y ?? 1) ← not (x ?? y) * 1 The outer prefix op binds the inner prefix subexpression as its **left** operand, regardless of operator precedence. With two same-precedence ops side by side this is easy to misread: */a b c -- (a/b) * c ← NOT (a*b)/c /*a b c -- (a*b) / c ← NOT (a/b)*c +-a b c -- (a-b) + c ← NOT (a+b)-c -+a b c -- (a+b) - c ← NOT (a-b)+c The runtime emits a `hint:` diagnostic when one of these four pairs appears at a prefix position, since the parse order disagrees with the natural left-to-right reading. To force the other grouping, swap the ops or bind the inner result first: -- Want (a*b)/c with a=6, b=2, c=3: r=*a b;/r c -- bind, then divide → 4 /*a b c -- equivalent, swapping the prefix-pair order [Infix precedence] Standard mathematical precedence (higher binds tighter): 6=`*` `/` 5=`+` `-` `+=` 4=`>` `<` `>=` `<=` 3=`=` `!=` 2=`&` 1=`|` 0=`??` (binds looser than every arithmetic/boolean op; tighter than `>>`) Function application binds tighter than all infix operators: f a + b -- (f a) + b, NOT f(a + b) x * y + 1 -- (x * y) + 1 (x + y) * 2 -- parens override precedence Each nested prefix operator saves 2 tokens (no `(` `)` needed). Flat prefix like `+a b` saves 1 char vs `a + b`. Across 25 expression patterns, prefix notation saves **22% tokens** and **42% characters** vs infix. See [research/explorations/prefix-vs-infix/](research/explorations/prefix-vs-infix/) for the full benchmark. Disambiguation: `-` followed by one atom is unary negate, followed by two atoms is binary subtract. [Operands] Operator operands are **atoms** (literals, refs, field access), **nested prefix operators**, or **known-arity function calls**. The prefix-binop operand parser dispatches to call parsing when the ident at the cursor is a known-arity user fn or builtin AND the next token can start another operand: wh >len q 0{body} -- parses as wh > (len q) 0 { body } +f g h -- if f is 1-arity: BinOp(+, Call(f, [g]), h) -lnx 5 lnx 3 -- BinOp(-, Call(lnx, [5]), Call(lnx, [3])) dbl 5 -- Negate(Call(dbl, [5])) - unary on a call This parallels the `??` precedent: `??x default` accepts a call expression on the value side. Applies to every prefix-binop family member - `+`, `-`, `*`, `/`, comparisons, `&`, `|`, `+=` - and to unary negate when the call consumes the only operand. The same expansion also applies to the then/else slots of the prefix-ternary family (`?=cond a b`, `?>cond a b`, …) and the `?h cond a b` keyword form, so `?h =a b sev sc "NONE"` parses `sev sc` as a nested call without parens or a bind-first. Bare locals that shadow a user fn name still resolve via `Ref` rather than expanding into a zero-arg call, so `&e f{...}` where `f` is a local still parses as the bool operator with two refs. When the call expansion isn't available (the ident is a local that shadows a fn name, or the call's arity doesn't fit the remaining tokens), bind the call result first: r=fac p;*n r -- bind, then operate - always unambiguous **Negative literals vs binary minus**: the lexer greedily includes a leading `-` into number tokens. `-1`, `-7`, `-0` are all number literals at fresh-expression positions. To subtract from zero at the start of a statement, use a space: `- 0 v` (Minus token, then `0`, then `v`). f v:n>n;-0 v -- WRONG: -0 is Number(-0.0); v is a stray token f v:n>n;- 0 v -- OK: binary subtract: 0 - v = -v The lexer splits a glued negative literal back into `Minus + Number` when the previous token is one of `;`, `\n`, `=`, `{`, `(`, or `-`. The `-` context covers the operand slot of an outer prefix-minus, so `- -0 a b` lexes as `-, -, 0, a, b` and parses as `Subtract(Subtract(0, a), b)` = `-a - b` rather than tripping `ILO-P020`. Negative literals after an Ident, `[`, or another prefix binop (`+`, `*`, `/`) stay glued so call args (`at xs -1`), list literals (`[-2 1 3]`), and binary operands (`+a -3`) read naturally. **Subtraction spacing convention**: for general subtraction at statement position, write `a - b` with spaces on **both** sides. `a -b` (glued, no space before the `-`) is not a binary subtract: the lexer packs `-b` into a negative-literal token because the previous token (`a`, an Ident) is one of the keep-glued contexts above. That's deliberate so call args and list elements read naturally, but it means `0 -1.5` is a parse error (`ILO-P001: expected declaration, got number `-1.5`` with a tailored hint pointing at this rule). For a bare negative value as an expression, wrap in parens: `(-1.5)`. STRING LITERALS: Text values are written in double quotes. Escape sequences: `\n`=newline (0x0A) `\t`=tab (0x09) `\r`=carriage return (0x0D) `\f`=form feed (0x0C, PDF page separator) `\b`=backspace (0x08) `\v`=vertical tab (0x0B) `\a`=bell (0x07) `\0`=null (0x00) `\"`=literal double quote `\\`=literal backslash `\/`=literal forward slash (JSON passthrough) Unknown escapes (e.g. `\z`) preserve the backslash + char verbatim. "hello\nworld" -- two-line string "col1\tcol2" -- tab-separated spl text "\n" -- split file content into lines spl pdf "\f" -- split pdftotext output into pages [Triple-quoted strings: `"""..."""`] Same surface as `"..."` (same escape decoding, same `{name}` interpolation) with two extra affordances: 1. Raw newlines are allowed inside the literal, so multi-line content does not need `cat`-concatenation or `\n` escapes. 2. When the closing `"""` sits on its own line, the leading newline is dropped and the common leading whitespace (matching the indent of the closing-`"""` line) is stripped from every content line. The terminating `\n` of the last content line is preserved. This is the Python PEP 257 / Rust `indoc!` convention, so indented source produces clean output. banner>t """ line one line two """ -- value is "line one\nline two\n" inline>t """foo bar""" -- value is "foo\n bar" (no dedent: closing inline) len """hello""" -- 5 (single-line form, no newline) len """""" -- 0 (empty body) Inside `"""..."""` a single `"` is literal: only `"""` ends the literal. Escapes (`\n`, `\t`, ...) and `{name}` interpolation decode identically to the single-quoted form, so triple-quoted is a drop-in upgrade rather than a parallel surface. [Interpolation: `{name}`] A bare `{name}` slot inside a double-quoted string desugars at parse time to a `fmt` call with the binding looked up by name. Manifesto principle 1: `"hello {name}"` is cheaper for an agent to write than the verbose `fmt "hello {}" name`, and both produce the same AST so they cost nothing extra at verify or run time. greet name:t>t fmt "hello {name}" -- desugars to: fmt "hello {}" name pair a:t b:t>t fmt "{a} and {b}" -- multiple slots, resolved left-to-right with-braces name:t>t fmt "{{json}} {name}" -- {{ / }} escape to literal { / } Scope (deliberately tight to keep the surface predictable): Only single-identifier slots matching the ident regex (`[a-z][a-z0-9]*(-[a-z0-9]+)*`). `{a-b}` works; `{Foo}`, `{x + 1}`, `{ }` pass through verbatim. `{{` / `}}` escape to literal `{` / `}`, but only inside strings that actually contain at least one `{ident}` slot. Strings with no interpolation slot keep `{{` / `}}` verbatim so existing programs (e.g. JSON templates) are not silently rewritten. Bare `{}` keeps its existing meaning as a positional placeholder filled by trailing args of the enclosing `fmt` call. Mixing `{ident}` and bare `{}` in the same string is left verbatim: pick one style per string. Use `fmt "{name} {} done" other` and the parser keeps the `{name}` literal so the bare `{}` resolves to `other`, or write `"{name} {other} done"` and drop the trailing arg. Undefined `{name}` slots surface as a normal ILO-T004 undefined-variable diagnostic against the desugared `fmt` arg, not a silent empty substitution. Interpolation does not apply in pattern literals (`"foo":` arm of a match) - literal patterns stay literal. From cdb38926c359b97c2e7c484976e277b0e079893e Mon Sep 17 00:00:00 2001 From: Daniel Morris Date: Thu, 21 May 2026 21:30:01 +0100 Subject: [PATCH 4/4] tests: bump SKILL.md bootstrap cap from 8 KB to 12 KB SKILL.md has grown past the original 8 KB cap with 0.12.x doc additions - HTTP verb cluster, getx/pstx, crypto primitives, calendar arithmetic, the headers + jpth quick-reference block. The cap exists as the guardrail against re-monolithising back toward the pre-split ~50 KB shape, not as a hard token budget, so it gets bumped in step with deliberate additions. 12 KB keeps a healthy margin under the pre-split monolith. --- tests/skill_md.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/tests/skill_md.rs b/tests/skill_md.rs index 3dfc7ea9..7dbbfe46 100644 --- a/tests/skill_md.rs +++ b/tests/skill_md.rs @@ -249,11 +249,16 @@ fn body_is_thin_bootstrap() { "SKILL.md bootstrap missing required marker: {required}" ); } - // Bootstrap cap: the file must stay short. The old monolith was ~50 KB; - // a healthy bootstrap is well under 5 KB. Trip if it bloats past 8 KB. + // Bootstrap cap: the file must stay short. The old monolith was ~50 KB. + // 0.12.x growth (HTTP verb cluster, getx/pstx, crypto primitives, calendar + // arithmetic, headers + jpth quick-references) has pushed the bootstrap + // pointer past the original 8 KB ceiling. The cap is the guardrail against + // re-monolithisation, not a hard token budget, so it gets bumped in step + // with deliberate doc additions — 12 KB keeps a healthy margin under the + // ~50 KB pre-split monolith. assert!( - body.len() < 8_000, - "SKILL.md body is {} bytes; bootstrap shape should stay well under 8 KB", + body.len() < 12_000, + "SKILL.md body is {} bytes; bootstrap shape should stay well under 12 KB", body.len() ); }