From 0851ea03ec1897d12711256b36916296502d27a6 Mon Sep 17 00:00:00 2001
From: Boris Batkin <bbatkin@gmail.com>
Date: Sat, 16 May 2026 19:06:10 -0700
Subject: [PATCH 01/14] linq_fold Phase 2B Rings 1+2: aggregates + early-exit
 terminators
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Splice-mode planner now folds ten more terminator operators into fused
single-loop invokes via two new lanes:

  Ring 1 (accumulator):   sum, min, max, average, long_count
  Ring 2 (early-exit):    first, first_or_default, any, all, contains

LinqLane dispatch in plan_loop_or_count routes by terminator; per-lane
helpers (emit_counter_lane / emit_array_lane / emit_accumulator_lane /
emit_early_exit_lane) factor the emission. min/max use workhorse-branch
direct < / > on workhorse types and _::less on non-workhorse. any with
no predicate emits !empty(src) shortcut. long_count piggybacks on the
existing length() shortcut. All accumulator/early-exit paths preserve
linq.das empty-source semantics (sum→0, min/max→default<T>, average→NaN,
first→panic, first_or_default→d, any→false, all→true, contains→false).

Functional + AST shape coverage:

  tests/linq/test_linq_fold.das        — Ring 1 ~25 cases, all cross-
                                          checked _fold == _old_fold
  tests/linq/test_linq_fold_ast.das   — 8 Ring 1 + 8 Ring 2 shape tests,
                                          asserts workhorse-branch ops,
                                          length/empty shortcuts, fall-
                                          through on out-of-scope chains
  tests/linq/test_linq_fold_ring2.das  — Ring 2 functional tests; lives
                                          in own file as workaround for
                                          ICE 50609 ("multiple instances
                                          of linq.all / linq.contains")
                                          — see follow-up commit

Three new 4-way benchmarks at 100K rows
(long_count_aggregate, first_or_default_match, contains_match);
existing Ring 1/2 benchmark m3f columns move from m3-parity (~25-30 ns)
to single-digit ns/op with zero allocations.

LINQ.md updated with Phase 2B delta tables.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 benchmarks/sql/LINQ.md                    |  61 +-
 benchmarks/sql/contains_match.das         |  75 +++
 benchmarks/sql/first_or_default_match.das |  76 +++
 benchmarks/sql/long_count_aggregate.das   |  72 +++
 daslib/linq.das                           |   2 +-
 daslib/linq_fold.das                      | 681 +++++++++++++++++++---
 tests/linq/test_linq_fold.das             | 161 +++++
 tests/linq/test_linq_fold_ast.das         | 544 +++++++++++++++++
 tests/linq/test_linq_fold_ring2.das       | 142 +++++
 9 files changed, 1725 insertions(+), 89 deletions(-)
 create mode 100644 benchmarks/sql/contains_match.das
 create mode 100644 benchmarks/sql/first_or_default_match.das
 create mode 100644 benchmarks/sql/long_count_aggregate.das
 create mode 100644 tests/linq/test_linq_fold_ring2.das
diff --git a/benchmarks/sql/LINQ.md b/benchmarks/sql/LINQ.md
index 54506d59c..8029d4a6a 100644
--- a/benchmarks/sql/LINQ.md
+++ b/benchmarks/sql/LINQ.md
@@ -23,7 +23,9 @@ See `~/.claude/plans/keen-hopping-balloon.md` for the long-form plan.
 | 0 | Rename `_fold` → `_old_fold` in linq_boost; extract `_fold` and `_old_fold` into new `daslib/linq_fold.das` module; `linq_boost` `require linq_fold public` for re-export | ✅ done |
 | 1 | Benchmark suite: 24 files under `benchmarks/sql/`, each 4-way (m1 `_sql` / m3 plain linq / m3f_old `_old_fold` / m3f `_fold`) at 100K rows; baseline numbers captured | ✅ done |
 | 2A | Loop planner — `_fold` emits explicit for-loops for `[where_*][select?]` (array lane) and `[where_*][select?] |> count` (counter lane); anything else falls through unfolded. No comprehensions, no dispatch back to `_old_fold`. | ✅ done |
-| 2B | Aggregate accumulators: `sum`, `min`, `max`, `average`, `first`, `any`, `all`, `long_count`. Also `take`/`skip` in counter/array lane and chained-`_select|_select` fusion (needs `ExprRef2Value`-aware projection substitution) | ⏳ next |
+| 2B Ring 1 | Accumulator lane: `sum`, `min`, `max`, `average`, `long_count` with workhorse `<` / `>` for min/max scalars and `_::less` fallback for tuples/user types. `long_count` shares the count-length shortcut. | ✅ done |
+| 2B Ring 2 | Early-exit lane: `first`, `first_or_default`, `any`, `all`, `contains` via `invoke($block { ... return val })`. Predicate-free `any` gets a `length(src) > 0` shortcut. | ✅ done |
+| 2C | `take(N)` / `skip(N)` in counter/array/accumulator/early-exit lanes; non-workhorse chained selects via `:=`-clone; `_select|_where` (where-after-select; needs `ExprRef2Value` substitution). | ⏳ |
 | 3+ | Buffer-required operators: `distinct`, `sort`, `reverse`, `groupby`, `zip`, `join`. Once we go array, we stay array | ⏳ |
 | 4 | Final coverage pass + docs; full 4-way comparison table refresh; parity-test sweep | ⏳ |
 
@@ -101,6 +103,63 @@ The first cut was 18% slower than the comprehension. Three independent fixes bro
 
 A fourth simplification dropped `emplace` from the emission entirely. emplace **moves** out of its argument and can corrupt the source when the projection returns a ref into it (e.g. `_._field`). The safe pattern is `push` for workhorse (cheap copy) and `push_clone` for non-workhorse (deep clone). No intermediate `var v = projection; emplace(v)` is needed in either case — the planner pushes the projection expression directly.
 
+## Phase 2B Ring 1 — Accumulator lane (2026-05-16)
+
+`_fold` now recognizes `[where_*][select*] |> {sum,min,max,average,long_count}` and emits a single-pass loop with a typed accumulator. New private dispatch infrastructure: `LinqLane` enum + `classify_terminator(name)` route each terminator to per-lane emit helpers (`emit_counter_lane`, `emit_array_lane`, `emit_length_shortcut`, **`emit_accumulator_lane`**); `plan_loop_or_count` is now an analyzer that builds the shared `whereCond` / `intermediateBinds` / `projection` state then dispatches.
+
+**Per-op shapes:**
+- `sum` — `var acc : T = default<T>; for { ...; acc += v }; return acc`. T = projection type (or element type).
+- `long_count` — `var acc : int64 = 0l; for { ...; acc++ }; return acc`. Length shortcut: `int64(length(src))` when no predicate + all pure.
+- `min` / `max` — `var first_iter = true; var best : T; for { ...; let v = ...; if (first_iter) { best := v; first_iter = false } elif (compare) { best := v } }; return best`.
+  - **Workhorse branch** (Boris's call: best perf > emission simplicity, see `feedback_linq_fold_perf_max`): T workhorse → direct `<` / `>`; non-workhorse → `_::less(...)` to preserve user/tuple comparator overloads.
+- `average` — `var sum_acc : T = default<T>; var cnt = 0; for { ...; sum_acc += v; cnt++ }; return double(sum_acc) / double(cnt)`. Returns `double` (matches linq.das line 1358); empty source → NaN.
+
+**Empty-source semantics** match linq.das exactly: sum/long_count → 0; min/max → `default<T>` (never assigned via the first-iter flag); average → div-by-zero NaN.
+
+**Modifier strip on accumulator type.** The element/projection type often carries `const &` (e.g. `int const &` for array elements). The accumulator must be a mutable value, so the planner strips both `flags.constant` and `flags.ref` on the cloned `accType` before emission. Without this, `var acc : int const = ...; acc += x` fails ("numeric operator '+=' left side can't be constant").
+
+### Phase 2B Ring 1 deltas (100K rows, INTERP)
+
+All Ring 1 ops hit single-digit ns/op with **zero allocations** (counter-lane parity).
+
+| Benchmark | Shape | m3f_old | m3f (Ring 1) | Delta |
+|---|---|---:|---:|---|
+| sum_aggregate | `select → sum` | 16 | **2** | **8× faster** |
+| sum_where | `where → select → sum` | 12 | **4** | **3× faster** |
+| min_aggregate | `select → min` | 25 | **6** | **4.2× faster** |
+| max_aggregate | `select → max` | 23 | **6** | **3.8× faster** |
+| average_aggregate | `select → average` | 20 | **5** | **4× faster** |
+| long_count_aggregate (new) | `where → long_count` | 15 | **4** | **3.75× faster** |
+
+The workhorse-branch decision is decisive for min/max: emitting `v < best` directly instead of dispatching through `_::less` cuts the per-element cost roughly in half on int columns. The non-workhorse path (tuples, user types) still goes through `_::less` to preserve overload semantics — see `test_min_non_workhorse_uses_less` in `tests/linq/test_linq_fold_ast.das`.
+
+## Phase 2B Ring 2 — Early-exit lane (2026-05-16)
+
+`_fold` now recognizes `[where_*][select*] |> {first,first_or_default,any,all,contains}` and emits a loop wrapped in `invoke($block { for { ...; return X }; tail }, src)`. The block-level `return` yields the invoke's value as an expression without escaping the user's enclosing function (Boris confirmed the idiom: `$() { ... }` is stack-allocated, no heap). New emit helper `emit_early_exit_lane` and a separate `emit_any_empty_shortcut` for the predicate-free `any` length-bearing shortcut.
+
+**Per-op shapes:**
+- `first` — `for { ...; return val }; panic("sequence contains no elements"); return default<T>` (matches linq.das line 2383; sentinel return makes the typer happy on the post-panic line).
+- `first_or_default(d)` — `let d_bound = d; for { ...; return val }; return d_bound` — eager evaluation of `d` matches linq.das line 2397 (no lazy-vs-eager divergence on observable side effects).
+- `any` — loop emission with `return true` on match, `return false` in tail. Shortcut: `length(src) > 0` when no upstream where + no per-element work + length-bearing source.
+- `all(pred)` — loop with `if (!pred) return false`, tail returns `true` (vacuously true on empty source).
+- `contains(v)` — `let v_bound = v; for { ...; if (it == v_bound) return true }; return false` — `v` bound once at top to avoid re-evaluating an expensive argument.
+
+**Workhorse branch deferred for `contains`**: daslang's `==` already handles tuples and user-defined `operator ==`, so no separate non-workhorse path is needed.
+
+### Phase 2B Ring 2 deltas (100K rows, INTERP)
+
+All Ring 2 ops hit single-digit ns/op with **zero allocations**. Early-exit cases (first hit near the front) collapse to sub-ns per element — true O(1) behavior at scale.
+
+| Benchmark | Shape | m3f_old | m3f (Ring 2) | Delta |
+|---|---|---:|---:|---|
+| first_match | `where → first` | 15 | **0** | early-exit at first hit |
+| first_or_default_match (new) | `where → first_or_default(d)` | 15 | **0** | same |
+| any_match | `where → first_opt`/`any` | 0 | **0** | parity (already sub-ns) |
+| all_match | `count(where ¬p)==0` / `all` | 24 | **3** | **8× faster** |
+| contains_match (new) | `select → contains(v)` | 15 | **2** | **7.5× faster** |
+
+`first` / `first_or_default` collapse to sub-ns/op because the where matches near the front of the array; the early-exit returns at the first hit and per-element timing measures the loop overhead per the chunk_size (100K), not per actual iteration. The same is why `any_match` was already at 0 ns/op pre-Phase-2B — `_old_fold` and m3 also bail early on first match.
+
 ## Operator-coverage checklist (parity tests)
 
 The 24 benchmarks above cover the most common shapes. The end-game target is one benchmark per `_fold`-applicable scenario in the broader `tests/linq/` operator suite. Tracking the long-tail coverage below; PRs that add splice support for new operators should add a benchmark here if not already present.
diff --git a/benchmarks/sql/contains_match.das b/benchmarks/sql/contains_match.das
new file mode 100644
index 000000000..0326357e5
--- /dev/null
+++ b/benchmarks/sql/contains_match.das
@@ -0,0 +1,75 @@
+options gen2
+options persistent_heap
+
+require _common public
+
+let TARGET_ID = 50000
+
+// contains compares each element (or projected value) against a fixed needle. The needle
+// is bound once at the top of the invoke block so it's evaluated only once even if the
+// argument is expensive. Early-exit on first match.
+
+def run_m1(b : B?; n : int) {
+    with_sqlite(":memory:") $(db) {
+        fixture_db(db, n)
+        b |> run("m1_sql/{n}", n) {
+            // SQL doesn't have a direct CONTAINS for arbitrary values; use _any with _where.
+            let opt = _sql(db |> select_from(type<Car>) |> _where(_.id == TARGET_ID) |> _first_opt())
+            if (!is_some(opt)) {
+                b->failNow()
+            }
+        }
+    }
+}
+
+def run_m3(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    b |> run("m3_array/{n}", n) {
+        // Project ids out then contains. Mirrors what `_fold(...select.contains(...))` does
+        // — an array-source linq chain materializes `select` first then iterates contains.
+        let yes = arr |> _select(_.id) |> contains(TARGET_ID)
+        if (!yes) {
+            b->failNow()
+        }
+    }
+}
+
+def run_m3f_old(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    b |> run("m3f_old_array_fold/{n}", n) {
+        let yes = _old_fold(each(arr)._select(_.id).contains(TARGET_ID))
+        if (!yes) {
+            b->failNow()
+        }
+    }
+}
+
+def run_m3f(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    b |> run("m3f_array_fold/{n}", n) {
+        let yes = _fold(each(arr)._select(_.id).contains(TARGET_ID))
+        if (!yes) {
+            b->failNow()
+        }
+    }
+}
+
+[benchmark]
+def contains_match_m1(b : B?) {
+    run_m1(b, 100000)
+}
+
+[benchmark]
+def contains_match_m3(b : B?) {
+    run_m3(b, 100000)
+}
+
+[benchmark]
+def contains_match_m3f_old(b : B?) {
+    run_m3f_old(b, 100000)
+}
+
+[benchmark]
+def contains_match_m3f(b : B?) {
+    run_m3f(b, 100000)
+}
diff --git a/benchmarks/sql/first_or_default_match.das b/benchmarks/sql/first_or_default_match.das
new file mode 100644
index 000000000..afb2dfe2e
--- /dev/null
+++ b/benchmarks/sql/first_or_default_match.das
@@ -0,0 +1,76 @@
+options gen2
+options persistent_heap
+
+require _common public
+
+let THRESHOLD = 500
+let SENTINEL_ID = -1
+
+// first_or_default mirrors first but returns a caller-provided default on empty/no-match
+// instead of panicking. Same early-exit characteristics; default bound once at the top of
+// the invoke block (matches linq.das line 2397 eager evaluation).
+
+def run_m1(b : B?; n : int) {
+    with_sqlite(":memory:") $(db) {
+        fixture_db(db, n)
+        b |> run("m1_sql/{n}", n) {
+            let row = _sql(db |> select_from(type<Car>) |> _where(_.price > THRESHOLD) |> _first())
+            if (row.price == 0) {
+                b->failNow()
+            }
+        }
+    }
+}
+
+def run_m3(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    let sentinel = Car(id = SENTINEL_ID, name = "none", price = 0, brand = 0, year = 0, dealer_id = 0)
+    b |> run("m3_array/{n}", n) {
+        let row = arr |> _where(_.price > THRESHOLD) |> first_or_default(sentinel)
+        if (row.id == SENTINEL_ID) {
+            b->failNow()
+        }
+    }
+}
+
+def run_m3f_old(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    let sentinel = Car(id = SENTINEL_ID, name = "none", price = 0, brand = 0, year = 0, dealer_id = 0)
+    b |> run("m3f_old_array_fold/{n}", n) {
+        let row = _old_fold(each(arr)._where(_.price > THRESHOLD).first_or_default(sentinel))
+        if (row.id == SENTINEL_ID) {
+            b->failNow()
+        }
+    }
+}
+
+def run_m3f(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    let sentinel = Car(id = SENTINEL_ID, name = "none", price = 0, brand = 0, year = 0, dealer_id = 0)
+    b |> run("m3f_array_fold/{n}", n) {
+        let row = _fold(each(arr)._where(_.price > THRESHOLD).first_or_default(sentinel))
+        if (row.id == SENTINEL_ID) {
+            b->failNow()
+        }
+    }
+}
+
+[benchmark]
+def first_or_default_match_m1(b : B?) {
+    run_m1(b, 100000)
+}
+
+[benchmark]
+def first_or_default_match_m3(b : B?) {
+    run_m3(b, 100000)
+}
+
+[benchmark]
+def first_or_default_match_m3f_old(b : B?) {
+    run_m3f_old(b, 100000)
+}
+
+[benchmark]
+def first_or_default_match_m3f(b : B?) {
+    run_m3f(b, 100000)
+}
diff --git a/benchmarks/sql/long_count_aggregate.das b/benchmarks/sql/long_count_aggregate.das
new file mode 100644
index 000000000..dec6d4a63
--- /dev/null
+++ b/benchmarks/sql/long_count_aggregate.das
@@ -0,0 +1,72 @@
+options gen2
+options persistent_heap
+
+require _common public
+
+let THRESHOLD = 500
+
+// long_count mirrors count but with int64 accumulator semantics — same access pattern,
+// same fusion shape, just a wider counter. The 4-way comparison validates that the
+// accumulator-lane planner matches counter-lane perf for the int64 variant.
+
+def run_m1(b : B?; n : int) {
+    with_sqlite(":memory:") $(db) {
+        fixture_db(db, n)
+        b |> run("m1_sql/{n}", n) {
+            let c = _sql(db |> select_from(type<Car>) |> _where(_.price > THRESHOLD) |> count())
+            if (c == 0) {
+                b->failNow()
+            }
+        }
+    }
+}
+
+def run_m3(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    b |> run("m3_array/{n}", n) {
+        let c = arr |> _where(_.price > THRESHOLD) |> long_count()
+        if (c == 0l) {
+            b->failNow()
+        }
+    }
+}
+
+def run_m3f_old(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    b |> run("m3f_old_array_fold/{n}", n) {
+        let c = _old_fold(each(arr)._where(_.price > THRESHOLD).long_count())
+        if (c == 0l) {
+            b->failNow()
+        }
+    }
+}
+
+def run_m3f(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    b |> run("m3f_array_fold/{n}", n) {
+        let c = _fold(each(arr)._where(_.price > THRESHOLD).long_count())
+        if (c == 0l) {
+            b->failNow()
+        }
+    }
+}
+
+[benchmark]
+def long_count_aggregate_m1(b : B?) {
+    run_m1(b, 100000)
+}
+
+[benchmark]
+def long_count_aggregate_m3(b : B?) {
+    run_m3(b, 100000)
+}
+
+[benchmark]
+def long_count_aggregate_m3f_old(b : B?) {
+    run_m3f_old(b, 100000)
+}
+
+[benchmark]
+def long_count_aggregate_m3f(b : B?) {
+    run_m3f(b, 100000)
+}
diff --git a/daslib/linq.das b/daslib/linq.das
index e02ccd054..3967d0f74 100644
--- a/daslib/linq.das
+++ b/daslib/linq.das
@@ -681,7 +681,7 @@ def count(a : array<auto(TT)>; predicate : block<(arg : TT -&) : bool>) : int {
 def long_count(var a : iterator<auto(TT)>) : int64 {
     //! Counts elements in an iterator, using a long integer
     var count = 0l
-    for (it in a) {
+    for (_ in a) {
         count ++
     }
     return count
diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das
index 39ebbde3f..af723ef6e 100644
--- a/daslib/linq_fold.das
+++ b/daslib/linq_fold.das
@@ -543,6 +543,29 @@ def private is_each_call(call : ExprCall?) : bool {
         || (call.func.fromGeneric != null && call.func.fromGeneric.name == "each"))
 }
 
+enum private LinqLane {
+    //! Terminator-category classification driving `plan_loop_or_count`'s emit dispatch.
+    //! ARRAY: bare or `[where_*][select*]` chain — produces `array<T>` / `iterator<T>`.
+    //! COUNTER: `[...] |> count` — produces `int` accumulator.
+    //! ACCUMULATOR: `[...] |> {sum,min,max,average,long_count}` — typed single-pass accumulator.
+    //! EARLY_EXIT: `[...] |> {first,first_or_default,any,all,contains}` — loop with early return.
+    //! UNKNOWN: terminator not recognized by the planner; fall through to plain linq.
+    UNKNOWN
+    ARRAY
+    COUNTER
+    ACCUMULATOR
+    EARLY_EXIT
+}
+
+[macro_function]
+def private classify_terminator(name : string) : LinqLane {
+    if (name == "count") return LinqLane.COUNTER
+    if (name == "where_" || name == "select") return LinqLane.ARRAY
+    if (name == "sum" || name == "min" || name == "max" || name == "average" || name == "long_count") return LinqLane.ACCUMULATOR
+    if (name == "first" || name == "first_or_default" || name == "any" || name == "all" || name == "contains") return LinqLane.EARLY_EXIT
+    return LinqLane.UNKNOWN
+}
+
 [macro_function]
 def private peel_each(var top : Expression?) : Expression? {
     // Unwrap `each(<arr>)` to `<arr>` when `<arr>` is a true array (or fixed-size array).
@@ -560,19 +583,556 @@ def private peel_each(var top : Expression?) : Expression? {
     return clone_expression(argExpr)
 }
 
+[macro_function]
+def private finalize_invoke(var res : Expression?; at : LineInfo) : Expression? {
+    // Shared post-emit cleanup for every `invoke($block, $src)` the planner returns.
+    // Stamping the location and the generated-flag keeps diagnostics pointing at the user's
+    // chain site, and `can_shadow` lets the gensym source name coexist with whatever the
+    // user already has in scope at the splice point.
+    res.force_at(at)
+    res.force_generated(true)
+    let blk = (res as ExprInvoke).arguments[0] as ExprMakeBlock
+    (blk._block as ExprBlock).arguments[0].flags.can_shadow = true
+    return res
+}
+
+[macro_function]
+def private emit_length_shortcut(opName : string; var top : Expression?; srcName : string; at : LineInfo) : Expression? {
+    // Count-shaped shortcut: emit `length(src)` (count) or `int64(length(src))` (long_count)
+    // directly, eliding the loop. Caller has verified count-shaped terminator + no predicate
+    // + all projections pure + source has length.
+    var topExpr = clone_expression(top)
+    topExpr.genFlags.alwaysSafe = true
+    var res : Expression?
+    if (opName == "long_count") {
+        res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+            return int64(length($i(srcName)))
+        }, $e(topExpr)))
+    } else {
+        res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+            return length($i(srcName))
+        }, $e(topExpr)))
+    }
+    return finalize_invoke(res, at)
+}
+
+[macro_function]
+def private emit_counter_lane(var top : Expression?; srcName, accName, itName : string; var loopBody : Expression?; at : LineInfo) : Expression? {
+    // Counter lane: `var acc = 0; for (it in src) { $loopBody }; return acc` inside invoke.
+    // Iterator sources must strip `-const` on the block param so the body can consume them;
+    // length-bearing sources keep modifiers so a `const&` source matches.
+    let topIsIter = top._type != null && top._type.isIterator
+    var topExpr = clone_expression(top)
+    topExpr.genFlags.alwaysSafe = true
+    var res : Expression?
+    if (topIsIter) {
+        res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
+            var $i(accName) = 0
+            for ($i(itName) in $i(srcName)) {
+                $e(loopBody)
+            }
+            return $i(accName)
+        }, $e(topExpr)))
+    } else {
+        res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+            var $i(accName) = 0
+            for ($i(itName) in $i(srcName)) {
+                $e(loopBody)
+            }
+            return $i(accName)
+        }, $e(topExpr)))
+    }
+    return finalize_invoke(res, at)
+}
+
+[macro_function]
+def private emit_array_lane(var top : Expression?; var expr : Expression?; var loopBody : Expression?; var elementType : TypeDeclPtr; srcName, accName, itName : string; at : LineInfo) : Expression? {
+    // Array lane: `var acc : array<T>; [reserve]; for (it in src) { $loopBody }; return <- acc`
+    // wrapped in invoke. Three internal shapes selected by:
+    //   - whole-pipeline iterator-ness (`expr._type.isIterator`) — drives the `to_sequence_move`
+    //     return path so an iterator pipeline still ends in an iterator,
+    //   - source-length availability — drives the pre-reserve hint.
+    let isIter = expr._type.isIterator
+    let sourceHasLength = type_has_length(top._type)
+    var topExpr = clone_expression(top)
+    topExpr.genFlags.alwaysSafe = true
+    var res : Expression?
+    if (isIter) {
+        res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
+            var $i(accName) : array<$t(elementType)>
+            for ($i(itName) in $i(srcName)) {
+                $e(loopBody)
+            }
+            return <- $i(accName).to_sequence_move()
+        }, $e(topExpr)))
+    } elif (sourceHasLength) {
+        res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+            var $i(accName) : array<$t(elementType)>
+            $i(accName) |> reserve(length($i(srcName)))
+            for ($i(itName) in $i(srcName)) {
+                $e(loopBody)
+            }
+            return <- $i(accName)
+        }, $e(topExpr)))
+    } else {
+        res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
+            var $i(accName) : array<$t(elementType)>
+            for ($i(itName) in $i(srcName)) {
+                $e(loopBody)
+            }
+            return <- $i(accName)
+        }, $e(topExpr)))
+    }
+    return finalize_invoke(res, at)
+}
+
+[macro_function]
+def private emit_accumulator_lane(
+                                  opName : string;
+                                  var top : Expression?;
+                                  var projection : Expression?;
+                                  var whereCond : Expression?;
+                                  var intermediateBinds : array<Expression?>;
+                                  var elementType : TypeDeclPtr;
+                                  srcName, accName, itName : string;
+                                  at : LineInfo
+                                  ) : Expression? {
+    // Ring 1 single-pass accumulator lane: sum / min / max / average / long_count.
+    // Builds the per-matched-element statement block (intermediates → final-value bind if needed
+    // → per-op update), wraps in `if (whereCond)` when present, then wraps in the op-specific
+    // invoke shell (accumulator init + for-loop + return-of-accumulator).
+    // For min/max the workhorse-branch is decisive for perf: workhorse types use direct
+    // `<` / `>` (single-instruction); non-workhorse falls back to `_::less` to preserve user
+    // and tuple comparator overloads.
+    var perMatchStmts : array<Expression?>
+    perMatchStmts |> reserve(length(intermediateBinds) + 4)
+    for (b in intermediateBinds) {
+        perMatchStmts |> push(b)
+    }
+    var valueExpr : Expression?
+    if (projection != null) {
+        valueExpr = clone_expression(projection)
+    } else {
+        valueExpr = qmacro_expr() {
+            $i(itName)
+        }
+    }
+    // Per-op per-element update
+    let valBindName = "`val`{at.line}`{at.column}"
+    let firstName = "`first`{at.line}`{at.column}"
+    let cntName = "`cnt`{at.line}`{at.column}"
+    if (opName == "long_count") {
+        // Value is unused; mirror counter-lane discipline and bind only when projection has
+        // observable side effects so user-visible f(x) calls still fire.
+        if (projection != null && has_sideeffects(projection)) {
+            let finalBindName = "`vfinal`{at.line}`{at.column}"
+            perMatchStmts |> push <| qmacro_expr() {
+                var $i(finalBindName) = $e(projection)
+            }
+        }
+        perMatchStmts |> push <| qmacro_expr() {
+            $i(accName) ++
+        }
+    } elif (opName == "sum") {
+        perMatchStmts |> push <| qmacro_expr() {
+            $i(accName) += $e(valueExpr)
+        }
+    } elif (opName == "average") {
+        perMatchStmts |> push <| qmacro_expr() {
+            $i(accName) += $e(valueExpr)
+        }
+        perMatchStmts |> push <| qmacro_expr() {
+            $i(cntName) ++
+        }
+    } elif (opName == "min" || opName == "max") {
+        let workhorse = ((projection != null && projection._type != null && projection._type.isWorkhorseType)
+            || (projection == null && elementType != null && elementType.isWorkhorseType))
+        var compareExpr : Expression?
+        if (workhorse) {
+            if (opName == "min") {
+                compareExpr = qmacro($i(valBindName) < $i(accName))
+            } else {
+                compareExpr = qmacro($i(valBindName) > $i(accName))
+            }
+        } else {
+            if (opName == "min") {
+                compareExpr = qmacro(_::less($i(valBindName), $i(accName)))
+            } else {
+                compareExpr = qmacro(_::less($i(accName), $i(valBindName)))
+            }
+        }
+        perMatchStmts |> push <| qmacro_expr() {
+            let $i(valBindName) = $e(valueExpr)
+        }
+        perMatchStmts |> push <| qmacro_expr() {
+            if ($i(firstName)) {
+                $i(accName) := $i(valBindName)
+                $i(firstName) = false
+            } elif ($e(compareExpr)) {
+                $i(accName) := $i(valBindName)
+            }
+        }
+    } else {
+        return null
+    }
+    var perMatchBlock : Expression?
+    if (length(perMatchStmts) == 1) {
+        perMatchBlock = perMatchStmts[0]
+    } else {
+        perMatchBlock = qmacro_block() {
+            $b(perMatchStmts)
+        }
+    }
+    var loopBody : Expression?
+    if (whereCond != null) {
+        loopBody = qmacro_expr() {
+            if ($e(whereCond)) {
+                $e(perMatchBlock)
+            }
+        }
+    } else {
+        loopBody = perMatchBlock
+    }
+    // Accumulator type: from the projection if present, else from the source's element type.
+    // Strip `const` and `&` modifiers — the accumulator must be a mutable value so `+=` /
+    // `:=` work on it, regardless of how the source delivers elements (`int const &`).
+    var accType : TypeDeclPtr
+    if (projection != null) {
+        accType = clone_type(projection._type)
+    } else {
+        accType = clone_type(elementType)
+    }
+    if (accType != null) {
+        accType.flags.constant = false
+        accType.flags.ref = false
+    }
+    let topIsIter = top._type != null && top._type.isIterator
+    var topExpr = clone_expression(top)
+    topExpr.genFlags.alwaysSafe = true
+    var res : Expression?
+    if (opName == "long_count") {
+        if (topIsIter) {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
+                var $i(accName) : int64 = 0l
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return $i(accName)
+            }, $e(topExpr)))
+        } else {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+                var $i(accName) : int64 = 0l
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return $i(accName)
+            }, $e(topExpr)))
+        }
+    } elif (opName == "sum") {
+        if (topIsIter) {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
+                var $i(accName) : $t(accType) = default<$t(accType)>
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return $i(accName)
+            }, $e(topExpr)))
+        } else {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+                var $i(accName) : $t(accType) = default<$t(accType)>
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return $i(accName)
+            }, $e(topExpr)))
+        }
+    } elif (opName == "average") {
+        if (topIsIter) {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
+                var $i(accName) : $t(accType) = default<$t(accType)>
+                var $i(cntName) = 0
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return double($i(accName)) / double($i(cntName))
+            }, $e(topExpr)))
+        } else {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+                var $i(accName) : $t(accType) = default<$t(accType)>
+                var $i(cntName) = 0
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return double($i(accName)) / double($i(cntName))
+            }, $e(topExpr)))
+        }
+    } elif (opName == "min" || opName == "max") {
+        if (topIsIter) {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
+                var $i(firstName) = true
+                var $i(accName) : $t(accType)
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return $i(accName)
+            }, $e(topExpr)))
+        } else {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+                var $i(firstName) = true
+                var $i(accName) : $t(accType)
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return $i(accName)
+            }, $e(topExpr)))
+        }
+    } else {
+        return null
+    }
+    return finalize_invoke(res, at)
+}
+
+[macro_function]
+def private emit_any_empty_shortcut(var top : Expression?; srcName : string; at : LineInfo) : Expression? {
+    // any (no predicate) shortcut: emit `!empty(src)` directly. Caller has verified no
+    // upstream where + no impure projection + source has length. Behaviorally identical to
+    // running the loop and returning true on first element, but skips the loop entirely.
+    // `!empty` over `length > 0` avoids the strlen detour PERF017 flags on string sources.
+    var topExpr = clone_expression(top)
+    topExpr.genFlags.alwaysSafe = true
+    var res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+        return !empty($i(srcName))
+    }, $e(topExpr)))
+    return finalize_invoke(res, at)
+}
+
+[macro_function]
+def private emit_early_exit_lane(
+                                 opName : string;
+                                 var top : Expression?;
+                                 var projection : Expression?;
+                                 var whereCond : Expression?;
+                                 var intermediateBinds : array<Expression?>;
+                                 var elementType : TypeDeclPtr;
+                                 terminatorCall : ExprCall?;
+                                 srcName, itName : string;
+                                 at : LineInfo
+                                 ) : Expression? {
+    // Ring 2 early-exit lane: first / first_or_default / any / all / contains.
+    // Emits `invoke($block { [arg-binds]; for { ...; return X }; tail }, src)` where the
+    // block-level `return` yields the invoke's result without escaping the user's function.
+    // Stack-allocated block (no heap alloc), per Boris's confirmation.
+    var perMatchStmts : array<Expression?>
+    perMatchStmts |> reserve(length(intermediateBinds) + 4)
+    for (b in intermediateBinds) {
+        perMatchStmts |> push(b)
+    }
+    // If there's a projection chain, bind the final projection to a local so the
+    // op-specific work (return value, predicate input, equality compare) names the value
+    // directly. Avoids re-evaluating the projection in compares and side-steps the
+    // ExprRef2Value substitution trap that splicing typed expressions hits.
+    var valueName = itName
+    let projBindName = "`vproj`{at.line}`{at.column}"
+    if (projection != null) {
+        perMatchStmts |> push <| qmacro_expr() {
+            let $i(projBindName) = $e(projection)
+        }
+        valueName = projBindName
+    }
+    // Per-op per-element work (early-return inside the invoke block)
+    if (opName == "first" || opName == "first_or_default") {
+        perMatchStmts |> push <| qmacro_expr() {
+            return $i(valueName)
+        }
+    } elif (opName == "any") {
+        let argCount = length(terminatorCall.arguments)
+        if (argCount > 1) {
+            var predExpr = fold_linq_cond(clone_expression(terminatorCall.arguments[1]), valueName)
+            perMatchStmts |> push <| qmacro_expr() {
+                if ($e(predExpr)) {
+                    return true
+                }
+            }
+        } else {
+            perMatchStmts |> push <| qmacro_expr() {
+                return true
+            }
+        }
+    } elif (opName == "all") {
+        var predExpr = fold_linq_cond(clone_expression(terminatorCall.arguments[1]), valueName)
+        perMatchStmts |> push <| qmacro_expr() {
+            if (!$e(predExpr)) {
+                return false
+            }
+        }
+    } elif (opName == "contains") {
+        // `v` was bound once at the top of the block (see invoke wrappers below); per-element
+        // compare uses the bound name to avoid re-evaluating an expensive argument.
+        let containsValName = "`cval`{at.line}`{at.column}"
+        perMatchStmts |> push <| qmacro_expr() {
+            if ($i(valueName) == $i(containsValName)) {
+                return true
+            }
+        }
+    } else {
+        return null
+    }
+    var perMatchBlock : Expression?
+    if (length(perMatchStmts) == 1) {
+        perMatchBlock = perMatchStmts[0]
+    } else {
+        perMatchBlock = qmacro_block() {
+            $b(perMatchStmts)
+        }
+    }
+    var loopBody : Expression?
+    if (whereCond != null) {
+        loopBody = qmacro_expr() {
+            if ($e(whereCond)) {
+                $e(perMatchBlock)
+            }
+        }
+    } else {
+        loopBody = perMatchBlock
+    }
+    // Build the invoke wrapper per op.
+    let topIsIter = top._type != null && top._type.isIterator
+    var topExpr = clone_expression(top)
+    topExpr.genFlags.alwaysSafe = true
+    var res : Expression?
+    if (opName == "first") {
+        // Return type for first: projection's type (if any) or source's element type.
+        // Strip const/ref so `default<T>` (the fallback path) yields a fresh value.
+        var retType : TypeDeclPtr
+        if (projection != null) {
+            retType = clone_type(projection._type)
+        } else {
+            retType = clone_type(elementType)
+        }
+        if (retType != null) {
+            retType.flags.constant = false
+            retType.flags.ref = false
+        }
+        // Tail: panic + unreachable default-return (matches linq.das line 2383).
+        if (topIsIter) {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                panic("sequence contains no elements")
+                return default<$t(retType)>
+            }, $e(topExpr)))
+        } else {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                panic("sequence contains no elements")
+                return default<$t(retType)>
+            }, $e(topExpr)))
+        }
+    } elif (opName == "first_or_default") {
+        // Bind `d` once at the top of the block (eager evaluation, matches linq.das line 2397).
+        let defaultName = "`dval`{at.line}`{at.column}"
+        var defaultExpr = clone_expression(terminatorCall.arguments[1])
+        if (topIsIter) {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
+                let $i(defaultName) = $e(defaultExpr)
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return $i(defaultName)
+            }, $e(topExpr)))
+        } else {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+                let $i(defaultName) = $e(defaultExpr)
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return $i(defaultName)
+            }, $e(topExpr)))
+        }
+    } elif (opName == "any") {
+        // Tail: return false (loop didn't hit).
+        if (topIsIter) {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return false
+            }, $e(topExpr)))
+        } else {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return false
+            }, $e(topExpr)))
+        }
+    } elif (opName == "all") {
+        // Tail: return true (vacuously true if loop empty; also true if no element failed pred).
+        if (topIsIter) {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return true
+            }, $e(topExpr)))
+        } else {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return true
+            }, $e(topExpr)))
+        }
+    } elif (opName == "contains") {
+        // Bind `v` once at the top of the block.
+        let containsValName = "`cval`{at.line}`{at.column}"
+        var valExpr = clone_expression(terminatorCall.arguments[1])
+        if (topIsIter) {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
+                let $i(containsValName) = $e(valExpr)
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return false
+            }, $e(topExpr)))
+        } else {
+            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+                let $i(containsValName) = $e(valExpr)
+                for ($i(itName) in $i(srcName)) {
+                    $e(loopBody)
+                }
+                return false
+            }, $e(topExpr)))
+        }
+    } else {
+        return null
+    }
+    return finalize_invoke(res, at)
+}
+
 [macro_function]
 def private plan_loop_or_count(var expr : Expression?) : Expression? {
-    // Phase-2A loop planner. Recognizes chains of shape `[where_*][select?]` (array lane)
-    // and `[where_*][select?] |> count` (counter lane). Fuses chained wheres into `&&` and
-    // chained selects via expression composition; emits one inline `invoke($block, $src)`
-    // with a plain for-loop. Returns null for anything else — caller falls through unfolded.
+    // Phase-2B loop planner. Recognizes chains of shape `[where_*][select*]` plus a terminator,
+    // dispatched by `classify_terminator` into one of four lanes:
+    //   ARRAY        — `[where_*][select*]`                            → array<T> / iterator<T>
+    //   COUNTER      — `[where_*][select*] |> count`                   → int
+    //   ACCUMULATOR  — `[where_*][select*] |> {sum,min,max,average,long_count}` → typed scalar
+    //   EARLY_EXIT   — `[where_*][select*] |> {first,first_or_default,any,all,contains}`
+    // Fuses chained wheres into `&&` and chained selects via let-binding composition; emits one
+    // inline `invoke($block, $src)`. Returns null for shapes outside scope — caller falls through.
     var (top, calls) = flatten_linq(expr)
     if (empty(calls)) return null
     top = peel_each(top)
     let lastName = calls.back()._1.name
-    if (lastName != "count" && lastName != "where_" && lastName != "select") return null
-    let counterLane = lastName == "count"
-    let intermediateCount = counterLane ? length(calls) - 1 : length(calls)
+    let lane = classify_terminator(lastName)
+    // Phase 2B emits all four lanes; UNKNOWN terminators fall through unfolded.
+    if (lane == LinqLane.UNKNOWN) return null
+    let counterLane = lane == LinqLane.COUNTER
+    let hasTerminator = lane != LinqLane.ARRAY
+    let intermediateCount = hasTerminator ? length(calls) - 1 : length(calls)
     let at = calls[0]._0.at
     let srcName = "`source`{at.line}`{at.column}"
     let itName  = "`it`{at.line}`{at.column}"
@@ -625,24 +1185,32 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
     if (projection != null && has_sideeffects(projection)) {
         allProjectionsPure = false
     }
-    // Counter-lane shortcut: when there's no filter and every projection in the chain is
-    // pure, the count is simply `length(source)`. Skip the loop entirely — no per-element
-    // increments, no per-element side-effect evaluation. Gated on `type_has_length` so we
-    // only emit `length(src)` when it's statically resolvable.
-    if (counterLane && whereCond == null && allProjectionsPure
-            && type_has_length(top._type)) {
-        var topExpr = clone_expression(top)
-        topExpr.genFlags.alwaysSafe = true
-        var res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-            return length($i(srcName))
-        }, $e(topExpr)))
-        res.force_at(at)
-        res.force_generated(true)
-        let blk = (res as ExprInvoke).arguments[0] as ExprMakeBlock
-        (blk._block as ExprBlock).arguments[0].flags.can_shadow = true
-        return res
+    // Count-shaped shortcut: when terminator is `count` (→ int) or `long_count` (→ int64),
+    // there's no filter, and every projection is pure, the result is just the source length.
+    // Skip the loop entirely.
+    let isCountShaped = (lane == LinqLane.COUNTER
+        || (lane == LinqLane.ACCUMULATOR && lastName == "long_count"))
+    if (isCountShaped && whereCond == null && allProjectionsPure
+            && type_has_length(top._type))
+        return emit_length_shortcut(lastName, top, srcName, at)
+    // Ring 1: accumulator lane builds its own per-op loop body (typed accumulator, optional
+    // first-iteration init for min/max, sum+count for average) — dispatch before the
+    // COUNTER/ARRAY loopBody construction.
+    if (lane == LinqLane.ACCUMULATOR)
+        return emit_accumulator_lane(lastName, top, projection, whereCond,
+            intermediateBinds, elementType, srcName, accName, itName, at)
+    // Ring 2: early-exit lane — `any` no-pred + no upstream work + length-bearing source
+    // gets the empty-shortcut; everything else dispatches to the loop emitter.
+    if (lane == LinqLane.EARLY_EXIT) {
+        let terminatorCall = calls.back()._0
+        let isAnyNoPred = lastName == "any" && length(terminatorCall.arguments) == 1
+        if (isAnyNoPred && whereCond == null && allProjectionsPure
+                && type_has_length(top._type))
+            return emit_any_empty_shortcut(top, srcName, at)
+        return emit_early_exit_lane(lastName, top, projection, whereCond,
+            intermediateBinds, elementType, terminatorCall, srcName, itName, at)
     }
-    // Build the per-element loop body.
+    // Build the per-element loop body for COUNTER / ARRAY.
     var loopBody : Expression?
     if (counterLane) {
         // Counter lane must evaluate the projection (and any chained intermediates) per
@@ -749,72 +1317,11 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
             return null
         }
     }
-    var topExpr = clone_expression(top)
-    topExpr.genFlags.alwaysSafe = true
-    var res : Expression?
-    // Pick the block-parameter typedecl modifier by source shape:
-    //   - iterator (rvalue, e.g. `each(range(10))`) — strip `-const` so the body can
-    //     consume the iterator. Without the strip, daslang's typer reports
-    //     "can't iterate over const iterator".
-    //   - container with length (array/table/string/range/fixed-array) — keep modifiers
-    //     so a `const&` source (e.g. `let arr <-`) matches the param exactly.
-    let topIsIter = top._type != null && top._type.isIterator
     if (counterLane) {
-        if (topIsIter) {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
-                var $i(accName) = 0
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return $i(accName)
-            }, $e(topExpr)))
-        } else {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-                var $i(accName) = 0
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return $i(accName)
-            }, $e(topExpr)))
-        }
+        return emit_counter_lane(top, srcName, accName, itName, loopBody, at)
     } else {
-        let isIter = expr._type.isIterator
-        // Pre-reserve the accumulator to the source's length when the source has a known
-        // length (array, table, range — anything that isn't an iterator). Avoids realloc
-        // walks during growth; matches what ExprArrayComprehension lowering does.
-        let sourceHasLength = type_has_length(top._type)
-        if (isIter) {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
-                var $i(accName) : array<$t(elementType)>
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return <- $i(accName).to_sequence_move()
-            }, $e(topExpr)))
-        } elif (sourceHasLength) {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-                var $i(accName) : array<$t(elementType)>
-                $i(accName) |> reserve(length($i(srcName)))
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return <- $i(accName)
-            }, $e(topExpr)))
-        } else {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
-                var $i(accName) : array<$t(elementType)>
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return <- $i(accName)
-            }, $e(topExpr)))
-        }
+        return emit_array_lane(top, expr, loopBody, elementType, srcName, accName, itName, at)
     }
-    res.force_at(at)
-    res.force_generated(true)
-    let blk = (res as ExprInvoke).arguments[0] as ExprMakeBlock
-    (blk._block as ExprBlock).arguments[0].flags.can_shadow = true
-    return res
 }
 
 [call_macro(name="_fold")]
diff --git a/tests/linq/test_linq_fold.das b/tests/linq/test_linq_fold.das
index 929037be9..7ce8584f3 100644
--- a/tests/linq/test_linq_fold.das
+++ b/tests/linq/test_linq_fold.das
@@ -782,3 +782,164 @@ def test_counter_lane_projection_side_effects(t : T?) {
     }
 }
 
+// ===== Phase 2B Ring 1 — Accumulator lane (sum / min / max / average / long_count) =====
+
+[test]
+def test_sum_accumulator(t : T?) {
+    t |> run("sum: empty") @(t : T?) {
+        let arr : array<int>
+        let s = _fold(each(arr).sum())
+        t |> equal(0, s)
+    }
+    t |> run("sum: singleton") @(t : T?) {
+        let arr <- [42]
+        let s = _fold(each(arr).sum())
+        t |> equal(42, s)
+    }
+    t |> run("sum: many") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let s = _fold(each(arr).sum())
+        t |> equal(15, s)
+        // parity vs _old_fold
+        let s_old = _old_fold(each(arr).sum())
+        t |> equal(s_old, s)
+    }
+    t |> run("sum: where filter") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let s = _fold(each(arr)._where(_ > 2).sum())
+        t |> equal(3 + 4 + 5, s)
+    }
+    t |> run("sum: select projection") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let s = _fold(each(arr)._select(_ * 10).sum())
+        t |> equal(150, s)
+    }
+    t |> run("sum: where + select") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let s = _fold(each(arr)._where(_ > 2)._select(_ * 10).sum())
+        t |> equal(120, s)
+    }
+}
+
+[test]
+def test_min_accumulator(t : T?) {
+    t |> run("min: empty → default") @(t : T?) {
+        let arr : array<int>
+        let m = _fold(each(arr).min())
+        // Matches linq.das min_impl: never assigned, returns default<T> = 0.
+        t |> equal(0, m)
+    }
+    t |> run("min: singleton") @(t : T?) {
+        let arr <- [7]
+        let m = _fold(each(arr).min())
+        t |> equal(7, m)
+    }
+    t |> run("min: many") @(t : T?) {
+        let arr <- [5, 3, 8, 1, 4]
+        let m = _fold(each(arr).min())
+        t |> equal(1, m)
+        let m_old = _old_fold(each(arr).min())
+        t |> equal(m_old, m)
+    }
+    t |> run("min: where filter") @(t : T?) {
+        let arr <- [5, 3, 8, 1, 4]
+        let m = _fold(each(arr)._where(_ >= 3).min())
+        t |> equal(3, m)
+    }
+    t |> run("min: select projection") @(t : T?) {
+        let arr <- [5, 3, 8, 1, 4]
+        let m = _fold(each(arr)._select(_ * -1).min())
+        t |> equal(-8, m)
+    }
+}
+
+[test]
+def test_max_accumulator(t : T?) {
+    t |> run("max: empty → default") @(t : T?) {
+        let arr : array<int>
+        let m = _fold(each(arr).max())
+        t |> equal(0, m)
+    }
+    t |> run("max: singleton") @(t : T?) {
+        let arr <- [7]
+        let m = _fold(each(arr).max())
+        t |> equal(7, m)
+    }
+    t |> run("max: many") @(t : T?) {
+        let arr <- [5, 3, 8, 1, 4]
+        let m = _fold(each(arr).max())
+        t |> equal(8, m)
+        let m_old = _old_fold(each(arr).max())
+        t |> equal(m_old, m)
+    }
+    t |> run("max: where filter") @(t : T?) {
+        let arr <- [5, 3, 8, 1, 4]
+        let m = _fold(each(arr)._where(_ <= 5).max())
+        t |> equal(5, m)
+    }
+    t |> run("max: select projection") @(t : T?) {
+        let arr <- [5, 3, 8, 1, 4]
+        let m = _fold(each(arr)._select(_ * 2).max())
+        t |> equal(16, m)
+    }
+}
+
+[test]
+def test_average_accumulator(t : T?) {
+    t |> run("average: empty → NaN") @(t : T?) {
+        let arr : array<int>
+        let a = _fold(each(arr).average())
+        t |> equal(true, is_nan(a))
+    }
+    t |> run("average: singleton") @(t : T?) {
+        let arr <- [42]
+        let a = _fold(each(arr).average())
+        t |> equal(42.0lf, a)
+    }
+    t |> run("average: many") @(t : T?) {
+        let arr <- [2, 4, 6, 8]
+        let a = _fold(each(arr).average())
+        t |> equal(5.0lf, a)
+        let a_old = _old_fold(each(arr).average())
+        t |> equal(a_old, a)
+    }
+    t |> run("average: where filter") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let a = _fold(each(arr)._where(_ > 2).average())
+        t |> equal(4.0lf, a)
+    }
+    t |> run("average: returns double for int source") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let a = _fold(each(arr).average())
+        t |> equal("double const", typeinfo typename(a))
+        t |> equal(2.0lf, a)
+    }
+}
+
+[test]
+def test_long_count_accumulator(t : T?) {
+    t |> run("long_count: empty") @(t : T?) {
+        let arr : array<int>
+        let c = _fold(each(arr).long_count())
+        t |> equal(0l, c)
+    }
+    t |> run("long_count: many") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let c = _fold(each(arr).long_count())
+        t |> equal(5l, c)
+        let c_old = _old_fold(each(arr).long_count())
+        t |> equal(c_old, c)
+    }
+    t |> run("long_count: where filter") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let c = _fold(each(arr)._where(_ > 2).long_count())
+        t |> equal(3l, c)
+    }
+    t |> run("long_count: pure select hits length shortcut") @(t : T?) {
+        // Pure projection + length-bearing source → planner elides loop, emits int64(length(src)).
+        // Verified functionally here; AST shape test in test_linq_fold_ast.das.
+        let arr <- [1, 2, 3, 4, 5]
+        let c = _fold(each(arr)._select(_ * 2).long_count())
+        t |> equal(5l, c)
+    }
+}
diff --git a/tests/linq/test_linq_fold_ast.das b/tests/linq/test_linq_fold_ast.das
index 78a70051c..f0c6c5945 100644
--- a/tests/linq/test_linq_fold_ast.das
+++ b/tests/linq/test_linq_fold_ast.das
@@ -450,6 +450,96 @@ def target_each_range_count() : int {
     return _fold(each(range(10))._where(_ > 5).count())
 }
 
+// ── Targets for Phase-2B Ring 1 accumulator lane ───────────────────────
+
+[export, marker(no_coverage)]
+def target_sum_fold() : int {
+    return _fold(each([1, 2, 3, 4, 5]).sum())
+}
+
+[export, marker(no_coverage)]
+def target_min_workhorse_fold() : int {
+    return _fold(each([5, 3, 8, 1, 4]).min())
+}
+
+[export, marker(no_coverage)]
+def target_max_workhorse_fold() : int {
+    return _fold(each([5, 3, 8, 1, 4]).max())
+}
+
+[export, marker(no_coverage)]
+def target_min_non_workhorse_fold() : tuple<int; int> {
+    // tuple<int;int> is NOT workhorse — min must dispatch to `_::less` for the compare.
+    // Single-return body so qmatch_function can extract the expression directly.
+    return _fold(each([(1, 20), (3, 5), (2, 10)]).min())
+}
+
+[export, marker(no_coverage)]
+def target_average_fold() : double {
+    return _fold(each([2, 4, 6, 8]).average())
+}
+
+[export, marker(no_coverage)]
+def target_long_count_loop_fold() : int64 {
+    // Where filter forces the loop path (not the length shortcut).
+    return _fold(each([1, 2, 3, 4, 5])._where(_ > 2).long_count())
+}
+
+[export, marker(no_coverage)]
+def target_long_count_shortcut_fold() : int64 {
+    // Bare long_count on array source — pure path → length shortcut → int64(length(src)).
+    return _fold(each([1, 2, 3, 4, 5]).long_count())
+}
+
+[export, marker(no_coverage)]
+def target_select_where_sum_fall_through() : int {
+    // `_select |> _where |> sum` is select-then-where which Phase 2A/2B planner rejects
+    // (where-after-select is blocked on ExprRef2Value substitution). Should fall through
+    // unfolded — body is the raw call chain, not an invoke wrapper. Use array-source form
+    // (no each) so the unfolded chain stays in safe land.
+    return [1, 2, 3, 4, 5]._select(_ * 2)._where(_ > 4).sum()._fold()
+}
+
+// ── Targets for Phase-2B Ring 2 early-exit lane ────────────────────────
+
+[export, marker(no_coverage)]
+def target_first_fold() : int {
+    return _fold(each([1, 2, 3, 4, 5])._where(_ > 2).first())
+}
+
+[export, marker(no_coverage)]
+def target_first_or_default_fold() : int {
+    return _fold(each([1, 2, 3, 4, 5])._where(_ > 99).first_or_default(-1))
+}
+
+[export, marker(no_coverage)]
+def target_any_loop_fold() : bool {
+    return _fold(each([1, 2, 3, 4, 5])._where(_ > 3).any())
+}
+
+[export, marker(no_coverage)]
+def target_any_shortcut_fold() : bool {
+    // No upstream where + no per-element work + length-bearing source → `length(src) > 0`.
+    return _fold(each([1, 2, 3, 4, 5]).any())
+}
+
+[export, marker(no_coverage)]
+def target_all_fold() : bool {
+    return _fold(each([1, 2, 3, 4, 5])._all(_ > 0))
+}
+
+[export, marker(no_coverage)]
+def target_contains_fold() : bool {
+    return _fold(each([1, 2, 3, 4, 5]).contains(3))
+}
+
+[export, marker(no_coverage)]
+def target_early_exit_select_where_fall_through() : bool {
+    // select-then-where-then-any is rejected by the planner (where-after-select).
+    // Falls through unfolded; body is raw chain. Array-source form for safety.
+    return [1, 2, 3, 4, 5]._select(_ * 2)._where(_ > 4).any()._fold()
+}
+
 // ── Tests: `_fold` Phase-2A loop emission ──────────────────────────────
 // Phase-2A `_fold` emits explicit for-loops inside an `invoke($block, $src)` wrapper
 // (no `ExprArrayComprehension` nodes). Each test asserts the invoke wrapper exists
@@ -804,3 +894,457 @@ def test_target_each_range_count_runs(t : T?) {
     // expected count. range(10) → [0,1,2,3,4,5,6,7,8,9]; filter > 5 → 4 elements.
     t |> equal(target_each_range_count(), 4)
 }
+
+// ── Phase 2B Ring 1 — accumulator lane shape assertions ────────────────
+// Recursive AST walkers: count ExprOp2 with a given operator and ExprCall with a given
+// function name across the entire subtree. These let us verify the workhorse-vs-`_::less`
+// branch in min/max emission and the `acc += ...` / `+= 1l` shape in sum/long_count
+// without locking into the exact ExprIfThenElse nesting (which is fragile to refactors).
+
+def count_op2(expr : Expression?; op : string) : int {
+    if (expr == null) return 0
+    var n = 0
+    if (expr is ExprOp2 && (expr as ExprOp2).op == op) {
+        n ++
+    }
+    if (expr is ExprBlock) {
+        let b = expr as ExprBlock
+        for (s in b.list) {
+            n += count_op2(s, op)
+        }
+        for (s in b.finalList) {
+            n += count_op2(s, op)
+        }
+    } elif (expr is ExprFor) {
+        let f = expr as ExprFor
+        for (s in f.sources) {
+            n += count_op2(s, op)
+        }
+        n += count_op2(f.body, op)
+    } elif (expr is ExprIfThenElse) {
+        let i = expr as ExprIfThenElse
+        n += count_op2(i.cond, op)
+        n += count_op2(i.if_true, op)
+        n += count_op2(i.if_false, op)
+    } elif (expr is ExprOp2) {
+        let o = expr as ExprOp2
+        n += count_op2(o.left, op)
+        n += count_op2(o.right, op)
+    } elif (expr is ExprCall) {
+        let c = expr as ExprCall
+        for (a in c.arguments) {
+            n += count_op2(a, op)
+        }
+    } elif (expr is ExprMakeBlock) {
+        let mb = expr as ExprMakeBlock
+        n += count_op2(mb._block, op)
+    } elif (expr is ExprInvoke) {
+        let inv = expr as ExprInvoke
+        for (a in inv.arguments) {
+            n += count_op2(a, op)
+        }
+    } elif (expr is ExprReturn) {
+        let r = expr as ExprReturn
+        n += count_op2(r.subexpr, op)
+    } elif (expr is ExprCopy) {
+        let c = expr as ExprCopy
+        n += count_op2(c.left, op)
+        n += count_op2(c.right, op)
+    } elif (expr is ExprMove) {
+        let m = expr as ExprMove
+        n += count_op2(m.left, op)
+        n += count_op2(m.right, op)
+    } elif (expr is ExprClone) {
+        let c = expr as ExprClone
+        n += count_op2(c.left, op)
+        n += count_op2(c.right, op)
+    } elif (expr is ExprOp1) {
+        let o = expr as ExprOp1
+        n += count_op2(o.subexpr, op)
+    }
+    return n
+}
+
+def count_call(expr : Expression?; funcName : string) : int {
+    if (expr == null) return 0
+    var n = 0
+    if (expr is ExprCall) {
+        let c = expr as ExprCall
+        if (c.func != null) {
+            // Generic instances carry a mangled `name` (e.g. `less`17876...`); the
+            // un-mangled function name lives in `func.fromGeneric.name`. Check both.
+            // `das_string` supports `==` directly so no `string()` cast needed (PERF007).
+            if (c.func.fromGeneric != null) {
+                if (c.func.fromGeneric.name == funcName) {
+                    n ++
+                }
+            } elif (c.func.name == funcName) {
+                n ++
+            }
+        }
+    }
+    if (expr is ExprBlock) {
+        let b = expr as ExprBlock
+        for (s in b.list) {
+            n += count_call(s, funcName)
+        }
+        for (s in b.finalList) {
+            n += count_call(s, funcName)
+        }
+    } elif (expr is ExprFor) {
+        let f = expr as ExprFor
+        for (s in f.sources) {
+            n += count_call(s, funcName)
+        }
+        n += count_call(f.body, funcName)
+    } elif (expr is ExprIfThenElse) {
+        let i = expr as ExprIfThenElse
+        n += count_call(i.cond, funcName)
+        n += count_call(i.if_true, funcName)
+        n += count_call(i.if_false, funcName)
+    } elif (expr is ExprOp2) {
+        let o = expr as ExprOp2
+        n += count_call(o.left, funcName)
+        n += count_call(o.right, funcName)
+    } elif (expr is ExprCall) {
+        let c = expr as ExprCall
+        for (a in c.arguments) {
+            n += count_call(a, funcName)
+        }
+    } elif (expr is ExprMakeBlock) {
+        let mb = expr as ExprMakeBlock
+        n += count_call(mb._block, funcName)
+    } elif (expr is ExprInvoke) {
+        let inv = expr as ExprInvoke
+        for (a in inv.arguments) {
+            n += count_call(a, funcName)
+        }
+    } elif (expr is ExprReturn) {
+        let r = expr as ExprReturn
+        n += count_call(r.subexpr, funcName)
+    } elif (expr is ExprOp1) {
+        let o = expr as ExprOp1
+        n += count_call(o.subexpr, funcName)
+    }
+    return n
+}
+
+def count_op1(expr : Expression?; op : string) : int {
+    if (expr == null) return 0
+    var n = 0
+    if (expr is ExprOp1 && (expr as ExprOp1).op == op) {
+        n ++
+    }
+    if (expr is ExprBlock) {
+        let b = expr as ExprBlock
+        for (s in b.list) {
+            n += count_op1(s, op)
+        }
+        for (s in b.finalList) {
+            n += count_op1(s, op)
+        }
+    } elif (expr is ExprFor) {
+        let f = expr as ExprFor
+        for (s in f.sources) {
+            n += count_op1(s, op)
+        }
+        n += count_op1(f.body, op)
+    } elif (expr is ExprIfThenElse) {
+        let i = expr as ExprIfThenElse
+        n += count_op1(i.cond, op)
+        n += count_op1(i.if_true, op)
+        n += count_op1(i.if_false, op)
+    } elif (expr is ExprOp2) {
+        let o = expr as ExprOp2
+        n += count_op1(o.left, op)
+        n += count_op1(o.right, op)
+    } elif (expr is ExprCall) {
+        let c = expr as ExprCall
+        for (a in c.arguments) {
+            n += count_op1(a, op)
+        }
+    } elif (expr is ExprMakeBlock) {
+        let mb = expr as ExprMakeBlock
+        n += count_op1(mb._block, op)
+    } elif (expr is ExprInvoke) {
+        let inv = expr as ExprInvoke
+        for (a in inv.arguments) {
+            n += count_op1(a, op)
+        }
+    } elif (expr is ExprReturn) {
+        let r = expr as ExprReturn
+        n += count_op1(r.subexpr, op)
+    } elif (expr is ExprOp1) {
+        let o = expr as ExprOp1
+        n += count_op1(o.subexpr, op)
+    }
+    return n
+}
+
+// Counts top-level `var` declarations in the outer block of an invoke wrapper.
+// sum/long_count emit one accumulator; min/max emit two (first flag + best); average
+// emits two (sum acc + count).
+def count_outer_let_vars(body_expr : Expression?) : int {
+    if (!(body_expr is ExprInvoke)) return -1
+    let inv = body_expr as ExprInvoke
+    if (empty(inv.arguments) || !(inv.arguments[0] is ExprMakeBlock)) return -1
+    let mb = inv.arguments[0] as ExprMakeBlock
+    let outer = mb._block as ExprBlock
+    if (outer == null) return -1
+    var n = 0
+    for (stmt in outer.list) {
+        if (stmt is ExprLet) n ++
+    }
+    return n
+}
+
+[test]
+def test_sum_loop_shape(t : T?) {
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_sum_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        // sum emits a single accumulator: `var acc : int = default<int>`
+        t |> equal(1, count_outer_let_vars(body_expr), "sum: exactly one accumulator var")
+        // Per-element update is `acc += value` — count ExprOp2 with op `+=`
+        t |> success(count_op2(body_expr, "+=") >= 1, "sum: at least one '+=' per element")
+    }
+}
+
+[test]
+def test_min_workhorse_uses_lt(t : T?) {
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_min_workhorse_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        // Workhorse int → emit direct `<` compare, NOT `_::less` call
+        t |> success(count_op2(body_expr, "<") >= 1, "min workhorse: at least one ExprOp2 '<'")
+        t |> equal(0, count_call(body_expr, "less"), "min workhorse must NOT emit _::less call")
+        // min/max emit two outer vars: `first_iter` flag + `best` accumulator
+        t |> equal(2, count_outer_let_vars(body_expr), "min: first_iter flag + best")
+    }
+}
+
+[test]
+def test_max_workhorse_uses_gt(t : T?) {
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_max_workhorse_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        // Workhorse int max → direct `>` compare
+        t |> success(count_op2(body_expr, ">") >= 1, "max workhorse: at least one ExprOp2 '>'")
+        t |> equal(0, count_call(body_expr, "less"), "max workhorse must NOT emit _::less call")
+        t |> equal(2, count_outer_let_vars(body_expr), "max: first_iter flag + best")
+    }
+}
+
+[test]
+def test_min_non_workhorse_uses_less(t : T?) {
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_min_non_workhorse_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        // Non-workhorse tuple return is move-returned (`return <- ...`) — match that form.
+        let r = qmatch_function(func) $() {
+            return <- $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        // tuple<int;int> is non-workhorse → fall back to _::less for the compare
+        t |> success(count_call(body_expr, "less") >= 1, "min non-workhorse: at least one _::less call")
+    }
+}
+
+[test]
+def test_average_emits_sum_and_count(t : T?) {
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_average_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        // average emits two accumulators: sum + count
+        t |> equal(2, count_outer_let_vars(body_expr), "average: sum + count accumulators")
+        // Per-element work: sum += value (ExprOp2 '+=') and cnt ++ (postfix ExprOp1 '+++')
+        t |> success(count_op2(body_expr, "+=") >= 1, "average: at least one '+=' for sum acc")
+        t |> success(count_op1(body_expr, "++") >= 1, "average: at least one '++' for count")
+    }
+}
+
+[test]
+def test_long_count_loop_shape(t : T?) {
+    ast_gc_guard() {
+        // The where-filtered chain forces the loop path (length shortcut is gated on
+        // whereCond == null) — verifies the int64 accumulator emission.
+        var func = find_module_function_via_rtti(compiling_module(), @@target_long_count_loop_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        t |> equal(1, count_outer_let_vars(body_expr), "long_count: exactly one int64 accumulator")
+        // long_count emits `acc++` (postfix ExprOp1 '+++') per matched element
+        t |> success(count_op1(body_expr, "++") >= 1, "long_count: at least one '++' per matched element")
+    }
+}
+
+[test]
+def test_long_count_length_shortcut(t : T?) {
+    ast_gc_guard() {
+        // Bare long_count on array source → length shortcut elides the loop entirely.
+        var func = find_module_function_via_rtti(compiling_module(), @@target_long_count_shortcut_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        let nForLoops = count_inner_for_loops(body_expr)
+        t |> equal(0, nForLoops, "long_count shortcut: no for-loop (length emitted directly)")
+    }
+}
+
+[test]
+def test_accumulator_falls_through_on_select_where(t : T?) {
+    ast_gc_guard() {
+        // `select |> where_ |> sum` is rejected by the planner (select-then-where blocker).
+        // Should fall through unfolded — body is the raw call chain, not an invoke wrapper.
+        var func = find_module_function_via_rtti(compiling_module(), @@target_select_where_sum_fall_through)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched, "should have return expression")
+        t |> success(!(body_expr is ExprInvoke),
+            "select|where|sum should fall through unfolded (no invoke wrapper)")
+    }
+}
+
+// ── Phase 2B Ring 2 — early-exit lane shape assertions ─────────────────
+
+[test]
+def test_first_loop_shape(t : T?) {
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_first_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        // first emits a loop with early return + tail panic. Verify presence of a panic call
+        // (sentinel for first's "sequence contains no elements" branch).
+        t |> success(count_call(body_expr, "panic") >= 1, "first: panic call in tail")
+    }
+}
+
+[test]
+def test_first_or_default_returns_d(t : T?) {
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_first_or_default_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        // first_or_default tail returns the bound default — never panics.
+        t |> equal(0, count_call(body_expr, "panic"), "first_or_default: no panic call in tail")
+    }
+}
+
+[test]
+def test_any_loop_shape(t : T?) {
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_any_loop_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        // any (with where) emits a loop. There IS a for-loop (not shortcut path).
+        t |> equal(1, count_inner_for_loops(body_expr), "any (with where): one for-loop")
+    }
+}
+
+[test]
+def test_any_empty_shortcut(t : T?) {
+    ast_gc_guard() {
+        // Predicate-free `each(arr).any()` on length-bearing source → `!empty(src)`.
+        // No for-loop, no per-element work.
+        var func = find_module_function_via_rtti(compiling_module(), @@target_any_shortcut_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        t |> equal(0, count_inner_for_loops(body_expr), "any-shortcut: no for-loop emitted")
+        t |> success(count_call(body_expr, "empty") >= 1, "any-shortcut: `empty(src)` call present")
+        t |> success(count_op1(body_expr, "!") >= 1, "any-shortcut: `!empty(src)` negation present")
+    }
+}
+
+[test]
+def test_all_loop_shape(t : T?) {
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_all_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        // all emits `if (!pred) return false` — at least one ExprOp1 with op `!`.
+        t |> success(count_op1(body_expr, "!") >= 1, "all: at least one `!` for predicate negation")
+        t |> equal(1, count_inner_for_loops(body_expr), "all: one for-loop")
+    }
+}
+
+[test]
+def test_contains_loop_shape(t : T?) {
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_contains_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        // contains emits `if (it == v) return true` — at least one ExprOp2 with op `==`.
+        t |> success(count_op2(body_expr, "==") >= 1, "contains: at least one `==` per element")
+        t |> equal(1, count_inner_for_loops(body_expr), "contains: one for-loop")
+    }
+}
+
+[test]
+def test_early_exit_falls_through_on_select_where(t : T?) {
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_early_exit_select_where_fall_through)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched, "should have return expression")
+        t |> success(!(body_expr is ExprInvoke),
+            "select|where|any should fall through unfolded (no invoke wrapper)")
+    }
+}
+
diff --git a/tests/linq/test_linq_fold_ring2.das b/tests/linq/test_linq_fold_ring2.das
new file mode 100644
index 000000000..7fb68bac8
--- /dev/null
+++ b/tests/linq/test_linq_fold_ring2.das
@@ -0,0 +1,142 @@
+options gen2
+require daslib/linq
+require dastest/testing_boost public
+
+require daslib/linq_boost
+
+// Phase 2B Ring 2 — early-exit lane (first / first_or_default / any / all / contains).
+// Lives in its own file so the iterator-type generic instances `linq.all` / `linq.contains`
+// don't collide with the ones registered by `test_any_all_contains` in test_linq_fold.das.
+// `first` on empty source panics (matches linq.das line 2383); not tested directly here
+// since dastest lacks a verify_panic helper. Non-empty cases cover the main paths.
+
+[test]
+def test_first_early_exit(t : T?) {
+    t |> run("first: singleton") @(t : T?) {
+        let arr <- [42]
+        let f = _fold(each(arr).first())
+        t |> equal(42, f)
+    }
+    t |> run("first: many returns first") @(t : T?) {
+        let arr <- [7, 8, 9]
+        let f = _fold(each(arr).first())
+        t |> equal(7, f)
+        let f_old = _old_fold(each(arr).first())
+        t |> equal(f_old, f)
+    }
+    t |> run("first: where matches returns first match") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let f = _fold(each(arr)._where(_ > 2).first())
+        t |> equal(3, f)
+    }
+    t |> run("first: select returns projected first") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let f = _fold(each(arr)._select(_ * 10).first())
+        t |> equal(10, f)
+    }
+}
+
+[test]
+def test_first_or_default_early_exit(t : T?) {
+    t |> run("first_or_default: empty returns default") @(t : T?) {
+        let arr : array<int>
+        let f = _fold(each(arr).first_or_default(99))
+        t |> equal(99, f)
+    }
+    t |> run("first_or_default: non-empty returns first") @(t : T?) {
+        let arr <- [7, 8, 9]
+        let f = _fold(each(arr).first_or_default(99))
+        t |> equal(7, f)
+        let f_old = _old_fold(each(arr).first_or_default(99))
+        t |> equal(f_old, f)
+    }
+    t |> run("first_or_default: where matches returns first match") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let f = _fold(each(arr)._where(_ > 3).first_or_default(0))
+        t |> equal(4, f)
+    }
+    t |> run("first_or_default: where no match returns default") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let f = _fold(each(arr)._where(_ > 100).first_or_default(-1))
+        t |> equal(-1, f)
+    }
+}
+
+[test]
+def test_any_early_exit(t : T?) {
+    t |> run("any: empty → false") @(t : T?) {
+        let arr : array<int>
+        let r = _fold(each(arr).any())
+        t |> equal(false, r)
+    }
+    t |> run("any: non-empty (no pred) → true (shortcut path)") @(t : T?) {
+        let arr <- [1]
+        let r = _fold(each(arr).any())
+        t |> equal(true, r)
+    }
+    t |> run("any: with where matching → true") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr)._where(_ > 3).any())
+        t |> equal(true, r)
+    }
+    t |> run("any: with where not matching → false") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let r = _fold(each(arr)._where(_ > 100).any())
+        t |> equal(false, r)
+    }
+    t |> run("any: with pred matching → true") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr)._any(_ > 3))
+        t |> equal(true, r)
+    }
+}
+
+[test]
+def test_all_early_exit(t : T?) {
+    t |> run("all: empty → true (vacuous)") @(t : T?) {
+        let arr : array<int>
+        let r = _fold(each(arr)._all(_ > 0))
+        t |> equal(true, r)
+    }
+    t |> run("all: all match → true") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr)._all(_ > 0))
+        t |> equal(true, r)
+    }
+    t |> run("all: one fails → false") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr)._all(_ > 2))
+        t |> equal(false, r)
+    }
+    t |> run("all: with upstream where") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        // Only elements > 2 pass: 3, 4, 5. All of those > 0 → true.
+        let r = _fold(each(arr)._where(_ > 2)._all(_ > 0))
+        t |> equal(true, r)
+    }
+}
+
+[test]
+def test_contains_early_exit(t : T?) {
+    t |> run("contains: empty → false") @(t : T?) {
+        let arr : array<int>
+        let r = _fold(each(arr).contains(3))
+        t |> equal(false, r)
+    }
+    t |> run("contains: found → true") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr).contains(3))
+        t |> equal(true, r)
+    }
+    t |> run("contains: not found → false") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr).contains(99))
+        t |> equal(false, r)
+    }
+    t |> run("contains: select then compare against projected") @(t : T?) {
+        let arr <- [1, 2, 3]
+        // Projected values: [10, 20, 30]. contains(20) → true.
+        let r = _fold(each(arr)._select(_ * 10).contains(20))
+        t |> equal(true, r)
+    }
+}

From cd1b5655f67b3a7efe9894c7c09384ec4605ff8d Mon Sep 17 00:00:00 2001
From: Boris Batkin <bbatkin@gmail.com>
Date: Sat, 16 May 2026 19:47:25 -0700
Subject: [PATCH 02/14] linq: defuse ICE 50609 for `all`/`contains` via
 element-const overload
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Phase 2B Ring 2 (commit 0851ea03e) had to keep its tests in a separate file
because both `linq.all` and `linq.contains` ICE with
`error[50609]: multiple instances of <gen>` when a single module hits the
same op from both a mut-element source (`each(arr)` → `iterator<T -&>`) and
a const-element source (iterator comprehensions → `iterator<T const -&>`).
Root cause: the generic-name mangler ignores inner element-constness, so
two genuinely distinct instantiations hash to one symbol.

Pointer-like variance fact (verified empirically; see
tests/type_traits/test_iterator_variance.das): `iterator<T -&>` flows into
`iterator<T const -&>`, not the reverse. So declaring the iterator overload
as `iterator<auto(TT) const>` lets both source flavors converge on a
single instantiation per op — the collision becomes impossible.

Scope deliberately narrow: only `all` and `contains` are constified
(the two known ICE triggers). A blanket sweep of all ~78 iterator overloads
caused real cascading breakage — select_impl moves `it` via emplace
(can't from const), select_many iterates `it` (needs mutable handle),
multiple downstream AOT codegen issues with const struct default-init,
and user-defined non-const operators (ComplexType.==). The full fix
belongs in the C++ mangler (separate PR); this commit is the narrowest
library-side defuse for the two demonstrably broken ops.

Side effects:
  - tests/linq/_common.das: ComplexType `==` and `!=` made `def const`
    (was `def`); the const-element form of contains() iterates const
    values and needs a const-callable equality op. This is a legitimate
    operator-design improvement, not a workaround.
  - tests/linq/test_linq_fold_ring2.das (134 lines, Ring 2 functional
    tests) merged into tests/linq/test_linq_fold.das; the workaround
    file is no longer needed. All 133 fold tests pass.
  - tests/type_traits/test_iterator_variance.das: new 3-case language
    regression test pinning the variance rule (positive case, comprehension
    case, cross-flavor case — the same scenario that ICEs without the fix).
  - CLAUDE.md: two bullets under `### Iterators and `each`` documenting
    the variance fact and the mangler pitfall + library workaround.

Verification: 695 INTERP + 695 AOT + 695 JIT tests in tests/linq pass.
ICE no longer reproducible in the working tree.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CLAUDE.md                                    |   2 +
 daslib/linq.das                              |   9 +-
 tests/linq/_common.das                       |   4 +-
 tests/linq/test_linq_fold.das                | 130 +++++++++++++++++
 tests/linq/test_linq_fold_ring2.das          | 142 -------------------
 tests/type_traits/test_iterator_variance.das |  59 ++++++++
 6 files changed, 200 insertions(+), 146 deletions(-)
 delete mode 100644 tests/linq/test_linq_fold_ring2.das
 create mode 100644 tests/type_traits/test_iterator_variance.das

diff --git a/CLAUDE.md b/CLAUDE.md
index 0e32ba0b2..4b37f06e6 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -219,6 +219,8 @@ Full migration table (when reading older docs that say `var inscope` or `<-` for
 - `[unsafe_outside_of_for] def each(x) : iterator<T>` makes a type iterable in `for` loops
 - When the iterator is named `each`, the call can be omitted: `for (v in each(x))` is identical to `for (v in x)`
 - Other iterator names (e.g. `filter`, `map`) cannot be omitted
+- **Iterator element-const variance is pointer-like:** `iterator<T -&>` flows into `iterator<T const -&>` (mut → const), not the reverse. So a generic param declared as `iterator<auto(TT) const>` takes both `each(array<T>)` (yields `iterator<T -&>`) and iterator-comprehension (yields `iterator<T const -&>`) sources via a single instantiation. The `const` qualifier alone is enough — do NOT add `&` (that's a separate ref-form modifier, not what you want for variance)
+- **Generic-mangling pitfall:** instantiations of the same generic that differ only in inner element-const (`iterator<int -&>` vs `iterator<int const -&>`) currently hash-collide in the instance registry, producing `error[50609]: multiple instances of …` when both arise in one module. Workaround at the library level: declare the iterator overload as `iterator<auto(TT) const>` instead of `iterator<auto(TT)>` — both source flavors then converge on a single instance per the variance rule above. Caveat: the constify makes `it` inside the body const, so the body must not move from / mutate / call non-const operators on `it`. linq.das only constifies `all` and `contains` for this reason; the rest stay vulnerable until the mangler is fixed upstream
 
 ### String access functions
 
diff --git a/daslib/linq.das b/daslib/linq.das
index 3967d0f74..935b192b3 100644
--- a/daslib/linq.das
+++ b/daslib/linq.das
@@ -1949,8 +1949,12 @@ def private all_impl_const(src : auto(ARGT); tt : auto(TT); predicate : block<(a
     return all_impl(unsafe(reinterpret<ARGT -const>(src)), type<TT -const -&>, predicate)
 }
 
-def all(var src : iterator<auto(TT)>; predicate : block<(arg : TT -&) : bool>) : bool {
+def all(var src : iterator<auto(TT) const>; predicate : block<(arg : TT -&) : bool>) : bool {
     //! Returns true if all elements in the iterator satisfy the predicate
+    //! `const` on the inner element collapses mut-ref and const-ref iterator sources
+    //! (`each(arr)` vs iterator comprehensions) into a single generic instance, sidestepping
+    //! the generic-mangler hash collision (error[50609]) that fires when both flavors
+    //! arise in one module. Full fix lives in the mangler itself (follow-up PR).
     return all_impl(src, type<TT -const -&>, predicate)
 }
 
@@ -2160,8 +2164,9 @@ def private contains_impl_const(src : auto(ARGT); tt : auto(TT); element : TT -&
     return contains_impl(unsafe(reinterpret<ARGT -const>(src)), type<TT -const -&>, element)
 }
 
-def contains(var src : iterator<auto(TT)>; element : TT -&) : bool {
+def contains(var src : iterator<auto(TT) const>; element : TT -&) : bool {
     //! Returns true if the element is present in the iterator
+    //! `const` on the inner element — see comment on `all()` above for the rationale.
     return contains_impl(src, type<TT -const -&>, element)
 }
 
diff --git a/tests/linq/_common.das b/tests/linq/_common.das
index 0015907ca..14400f511 100644
--- a/tests/linq/_common.das
+++ b/tests/linq/_common.das
@@ -19,7 +19,7 @@ struct ComplexType { // this one can't be copied
             i = int(int64(i) / divisor)
         }
     }
-    def operator ==(b : ComplexType) : bool {
+    def const operator ==(b : ComplexType) : bool {
         for (i, j in a, b.a) {
             if (i != j) {
                 return false
@@ -28,7 +28,7 @@ struct ComplexType { // this one can't be copied
         return true
     }
 
-    def operator !=(b : ComplexType) : bool {
+    def const operator !=(b : ComplexType) : bool {
         for (i, j in a, b.a) {
             if (i != j) {
                 return true
diff --git a/tests/linq/test_linq_fold.das b/tests/linq/test_linq_fold.das
index 7ce8584f3..e211d1752 100644
--- a/tests/linq/test_linq_fold.das
+++ b/tests/linq/test_linq_fold.das
@@ -943,3 +943,133 @@ def test_long_count_accumulator(t : T?) {
         t |> equal(5l, c)
     }
 }
+[test]
+def test_first_early_exit(t : T?) {
+    t |> run("first: singleton") @(t : T?) {
+        let arr <- [42]
+        let f = _fold(each(arr).first())
+        t |> equal(42, f)
+    }
+    t |> run("first: many returns first") @(t : T?) {
+        let arr <- [7, 8, 9]
+        let f = _fold(each(arr).first())
+        t |> equal(7, f)
+        let f_old = _old_fold(each(arr).first())
+        t |> equal(f_old, f)
+    }
+    t |> run("first: where matches returns first match") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let f = _fold(each(arr)._where(_ > 2).first())
+        t |> equal(3, f)
+    }
+    t |> run("first: select returns projected first") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let f = _fold(each(arr)._select(_ * 10).first())
+        t |> equal(10, f)
+    }
+}
+
+[test]
+def test_first_or_default_early_exit(t : T?) {
+    t |> run("first_or_default: empty returns default") @(t : T?) {
+        let arr : array<int>
+        let f = _fold(each(arr).first_or_default(99))
+        t |> equal(99, f)
+    }
+    t |> run("first_or_default: non-empty returns first") @(t : T?) {
+        let arr <- [7, 8, 9]
+        let f = _fold(each(arr).first_or_default(99))
+        t |> equal(7, f)
+        let f_old = _old_fold(each(arr).first_or_default(99))
+        t |> equal(f_old, f)
+    }
+    t |> run("first_or_default: where matches returns first match") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let f = _fold(each(arr)._where(_ > 3).first_or_default(0))
+        t |> equal(4, f)
+    }
+    t |> run("first_or_default: where no match returns default") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let f = _fold(each(arr)._where(_ > 100).first_or_default(-1))
+        t |> equal(-1, f)
+    }
+}
+
+[test]
+def test_any_early_exit(t : T?) {
+    t |> run("any: empty → false") @(t : T?) {
+        let arr : array<int>
+        let r = _fold(each(arr).any())
+        t |> equal(false, r)
+    }
+    t |> run("any: non-empty (no pred) → true (shortcut path)") @(t : T?) {
+        let arr <- [1]
+        let r = _fold(each(arr).any())
+        t |> equal(true, r)
+    }
+    t |> run("any: with where matching → true") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr)._where(_ > 3).any())
+        t |> equal(true, r)
+    }
+    t |> run("any: with where not matching → false") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let r = _fold(each(arr)._where(_ > 100).any())
+        t |> equal(false, r)
+    }
+    t |> run("any: with pred matching → true") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr)._any(_ > 3))
+        t |> equal(true, r)
+    }
+}
+
+[test]
+def test_all_early_exit(t : T?) {
+    t |> run("all: empty → true (vacuous)") @(t : T?) {
+        let arr : array<int>
+        let r = _fold(each(arr)._all(_ > 0))
+        t |> equal(true, r)
+    }
+    t |> run("all: all match → true") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr)._all(_ > 0))
+        t |> equal(true, r)
+    }
+    t |> run("all: one fails → false") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr)._all(_ > 2))
+        t |> equal(false, r)
+    }
+    t |> run("all: with upstream where") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        // Only elements > 2 pass: 3, 4, 5. All of those > 0 → true.
+        let r = _fold(each(arr)._where(_ > 2)._all(_ > 0))
+        t |> equal(true, r)
+    }
+}
+
+[test]
+def test_contains_early_exit(t : T?) {
+    t |> run("contains: empty → false") @(t : T?) {
+        let arr : array<int>
+        let r = _fold(each(arr).contains(3))
+        t |> equal(false, r)
+    }
+    t |> run("contains: found → true") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr).contains(3))
+        t |> equal(true, r)
+    }
+    t |> run("contains: not found → false") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr).contains(99))
+        t |> equal(false, r)
+    }
+    t |> run("contains: select then compare against projected") @(t : T?) {
+        let arr <- [1, 2, 3]
+        // Projected values: [10, 20, 30]. contains(20) → true.
+        let r = _fold(each(arr)._select(_ * 10).contains(20))
+        t |> equal(true, r)
+    }
+}
diff --git a/tests/linq/test_linq_fold_ring2.das b/tests/linq/test_linq_fold_ring2.das
deleted file mode 100644
index 7fb68bac8..000000000
--- a/tests/linq/test_linq_fold_ring2.das
+++ /dev/null
@@ -1,142 +0,0 @@
-options gen2
-require daslib/linq
-require dastest/testing_boost public
-
-require daslib/linq_boost
-
-// Phase 2B Ring 2 — early-exit lane (first / first_or_default / any / all / contains).
-// Lives in its own file so the iterator-type generic instances `linq.all` / `linq.contains`
-// don't collide with the ones registered by `test_any_all_contains` in test_linq_fold.das.
-// `first` on empty source panics (matches linq.das line 2383); not tested directly here
-// since dastest lacks a verify_panic helper. Non-empty cases cover the main paths.
-
-[test]
-def test_first_early_exit(t : T?) {
-    t |> run("first: singleton") @(t : T?) {
-        let arr <- [42]
-        let f = _fold(each(arr).first())
-        t |> equal(42, f)
-    }
-    t |> run("first: many returns first") @(t : T?) {
-        let arr <- [7, 8, 9]
-        let f = _fold(each(arr).first())
-        t |> equal(7, f)
-        let f_old = _old_fold(each(arr).first())
-        t |> equal(f_old, f)
-    }
-    t |> run("first: where matches returns first match") @(t : T?) {
-        let arr <- [1, 2, 3, 4, 5]
-        let f = _fold(each(arr)._where(_ > 2).first())
-        t |> equal(3, f)
-    }
-    t |> run("first: select returns projected first") @(t : T?) {
-        let arr <- [1, 2, 3]
-        let f = _fold(each(arr)._select(_ * 10).first())
-        t |> equal(10, f)
-    }
-}
-
-[test]
-def test_first_or_default_early_exit(t : T?) {
-    t |> run("first_or_default: empty returns default") @(t : T?) {
-        let arr : array<int>
-        let f = _fold(each(arr).first_or_default(99))
-        t |> equal(99, f)
-    }
-    t |> run("first_or_default: non-empty returns first") @(t : T?) {
-        let arr <- [7, 8, 9]
-        let f = _fold(each(arr).first_or_default(99))
-        t |> equal(7, f)
-        let f_old = _old_fold(each(arr).first_or_default(99))
-        t |> equal(f_old, f)
-    }
-    t |> run("first_or_default: where matches returns first match") @(t : T?) {
-        let arr <- [1, 2, 3, 4, 5]
-        let f = _fold(each(arr)._where(_ > 3).first_or_default(0))
-        t |> equal(4, f)
-    }
-    t |> run("first_or_default: where no match returns default") @(t : T?) {
-        let arr <- [1, 2, 3]
-        let f = _fold(each(arr)._where(_ > 100).first_or_default(-1))
-        t |> equal(-1, f)
-    }
-}
-
-[test]
-def test_any_early_exit(t : T?) {
-    t |> run("any: empty → false") @(t : T?) {
-        let arr : array<int>
-        let r = _fold(each(arr).any())
-        t |> equal(false, r)
-    }
-    t |> run("any: non-empty (no pred) → true (shortcut path)") @(t : T?) {
-        let arr <- [1]
-        let r = _fold(each(arr).any())
-        t |> equal(true, r)
-    }
-    t |> run("any: with where matching → true") @(t : T?) {
-        let arr <- [1, 2, 3, 4, 5]
-        let r = _fold(each(arr)._where(_ > 3).any())
-        t |> equal(true, r)
-    }
-    t |> run("any: with where not matching → false") @(t : T?) {
-        let arr <- [1, 2, 3]
-        let r = _fold(each(arr)._where(_ > 100).any())
-        t |> equal(false, r)
-    }
-    t |> run("any: with pred matching → true") @(t : T?) {
-        let arr <- [1, 2, 3, 4, 5]
-        let r = _fold(each(arr)._any(_ > 3))
-        t |> equal(true, r)
-    }
-}
-
-[test]
-def test_all_early_exit(t : T?) {
-    t |> run("all: empty → true (vacuous)") @(t : T?) {
-        let arr : array<int>
-        let r = _fold(each(arr)._all(_ > 0))
-        t |> equal(true, r)
-    }
-    t |> run("all: all match → true") @(t : T?) {
-        let arr <- [1, 2, 3, 4, 5]
-        let r = _fold(each(arr)._all(_ > 0))
-        t |> equal(true, r)
-    }
-    t |> run("all: one fails → false") @(t : T?) {
-        let arr <- [1, 2, 3, 4, 5]
-        let r = _fold(each(arr)._all(_ > 2))
-        t |> equal(false, r)
-    }
-    t |> run("all: with upstream where") @(t : T?) {
-        let arr <- [1, 2, 3, 4, 5]
-        // Only elements > 2 pass: 3, 4, 5. All of those > 0 → true.
-        let r = _fold(each(arr)._where(_ > 2)._all(_ > 0))
-        t |> equal(true, r)
-    }
-}
-
-[test]
-def test_contains_early_exit(t : T?) {
-    t |> run("contains: empty → false") @(t : T?) {
-        let arr : array<int>
-        let r = _fold(each(arr).contains(3))
-        t |> equal(false, r)
-    }
-    t |> run("contains: found → true") @(t : T?) {
-        let arr <- [1, 2, 3, 4, 5]
-        let r = _fold(each(arr).contains(3))
-        t |> equal(true, r)
-    }
-    t |> run("contains: not found → false") @(t : T?) {
-        let arr <- [1, 2, 3, 4, 5]
-        let r = _fold(each(arr).contains(99))
-        t |> equal(false, r)
-    }
-    t |> run("contains: select then compare against projected") @(t : T?) {
-        let arr <- [1, 2, 3]
-        // Projected values: [10, 20, 30]. contains(20) → true.
-        let r = _fold(each(arr)._select(_ * 10).contains(20))
-        t |> equal(true, r)
-    }
-}
diff --git a/tests/type_traits/test_iterator_variance.das b/tests/type_traits/test_iterator_variance.das
new file mode 100644
index 000000000..04e150f54
--- /dev/null
+++ b/tests/type_traits/test_iterator_variance.das
@@ -0,0 +1,59 @@
+options gen2
+require dastest/testing_boost public
+
+// Iterator element-const variance is pointer-like: `iterator<T -&>` flows into
+// `iterator<T const -&>`, but not the reverse. Tested empirically while wiring
+// the linq.das constify fix (commit 0851ea03e). These tests pin the rule so
+// future language regressions surface as test failures rather than as a fresh
+// generic-mangler ICE in user code.
+
+def private sum_const_iter(var src : iterator<int const&>) : int {
+    var total = 0
+    for (it in src) {
+        total += it
+    }
+    return total
+}
+
+def private sum_const_generic(var src : iterator<auto(TT) const&>) : int {
+    var total = 0
+    for (it in src) {
+        total += int(it)
+    }
+    return total
+}
+
+[test]
+def test_mut_iter_flows_into_const_param(t : T?) {
+    // each(array<int>) yields iterator<int -&> (mut element). Passing it into a
+    // function expecting iterator<int const -&> should succeed via variance.
+    let arr <- [1, 2, 3, 4, 5]
+    unsafe {
+        let total = sum_const_iter(each(arr))
+        t |> equal(15, total)
+    }
+}
+
+[test]
+def test_const_source_into_const_param(t : T?) {
+    // Iterator comprehension yields iterator<int const -&>. Passing it into a
+    // function expecting iterator<int const -&> is the trivial case.
+    let total = sum_const_iter([iterator for(x in 1..6); x])
+    t |> equal(15, total)
+}
+
+[test]
+def test_generic_const_param_accepts_both_flavors(t : T?) {
+    // The regression case: the same generic instantiated from BOTH a mut-element
+    // source AND a const-element source within one module. Pre-fix this would
+    // ICE with `error[50609]: multiple instances of …` because the mangler
+    // ignored inner element-const. Post-fix both flavors converge on a single
+    // instance (TT = int) via variance, no collision.
+    let arr <- [10, 20, 30]
+    unsafe {
+        let from_mut   = sum_const_generic(each(arr))
+        let from_const = sum_const_generic([iterator for(x in 100..103); x])
+        t |> equal(60, from_mut)
+        t |> equal(303, from_const)
+    }
+}

From 57e515464a509f070622748793584030785bf01b Mon Sep 17 00:00:00 2001
From: Boris Batkin <bbatkin@gmail.com>
Date: Sat, 16 May 2026 19:59:14 -0700
Subject: [PATCH 03/14] linq_fold: collapse topIsIter if/else arms via shared
 param-type helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Every emit lane in linq_fold.das had a pair of qmacro templates that
differed only in `typedecl($e(topExpr)) - const` vs plain
`typedecl($e(topExpr))` on the invoke block's `$src` parameter. The
strip-when-iter logic was identical across lanes; the duplication had
no real branching purpose beyond "I couldn't conditionally append the
modifier inside a single qmacro template."

New `invoke_src_param_type(top)` helper computes the param TypeDecl
once — clone top._type, strip outer `constant` flag when isIterator —
and the emit lanes splice it with `$t(srcParamType)` instead of
`typedecl($e(topExpr)) - const` / `typedecl($e(topExpr))`.

Collapsed:
  - emit_counter_lane: 2 arms → 1
  - emit_array_lane: 3 arms → 3 (still need 3 — the axes split on
    `expr._type.isIterator` for the return path AND `sourceHasLength`
    for the reserve hint; param type itself is now uniform)
  - emit_accumulator_lane: 10 arms (5 ops × 2) → 5 (one per op)
  - emit_early_exit_lane: 10 arms (5 ops × 2) → 5 (one per op)

Net -89 lines. Behavior unchanged: 133 fold tests, 66 AST shape tests,
695 INTERP, 695 AOT, 695 JIT all pass.

Boris flagged the duplication during the iterator-variance discussion
("why do we still have two versions?"); deferred while we landed the
ICE 50609 defuse, picked back up now.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 daslib/linq_fold.das | 273 +++++++++++++++----------------------------
 1 file changed, 92 insertions(+), 181 deletions(-)

diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das
index af723ef6e..1a0ed64ee 100644
--- a/daslib/linq_fold.das
+++ b/daslib/linq_fold.das
@@ -616,49 +616,50 @@ def private emit_length_shortcut(opName : string; var top : Expression?; srcName
     return finalize_invoke(res, at)
 }
 
+[macro_function]
+def private invoke_src_param_type(var top : Expression?) : TypeDeclPtr {
+    // Compute the invoke block's `$src` parameter type from the chain top:
+    // iterator sources need outer `-const` stripped so the body can advance the handle;
+    // length-bearing sources keep modifiers so a `const&` source matches.
+    var srcParamType = clone_type(top._type)
+    if (top._type.isIterator) {
+        srcParamType.flags.constant = false
+    }
+    return srcParamType
+}
+
 [macro_function]
 def private emit_counter_lane(var top : Expression?; srcName, accName, itName : string; var loopBody : Expression?; at : LineInfo) : Expression? {
     // Counter lane: `var acc = 0; for (it in src) { $loopBody }; return acc` inside invoke.
-    // Iterator sources must strip `-const` on the block param so the body can consume them;
-    // length-bearing sources keep modifiers so a `const&` source matches.
-    let topIsIter = top._type != null && top._type.isIterator
     var topExpr = clone_expression(top)
     topExpr.genFlags.alwaysSafe = true
-    var res : Expression?
-    if (topIsIter) {
-        res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
-            var $i(accName) = 0
-            for ($i(itName) in $i(srcName)) {
-                $e(loopBody)
-            }
-            return $i(accName)
-        }, $e(topExpr)))
-    } else {
-        res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-            var $i(accName) = 0
-            for ($i(itName) in $i(srcName)) {
-                $e(loopBody)
-            }
-            return $i(accName)
-        }, $e(topExpr)))
-    }
+    var srcParamType = invoke_src_param_type(top)
+    var res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+        var $i(accName) = 0
+        for ($i(itName) in $i(srcName)) {
+            $e(loopBody)
+        }
+        return $i(accName)
+    }, $e(topExpr)))
     return finalize_invoke(res, at)
 }
 
 [macro_function]
 def private emit_array_lane(var top : Expression?; var expr : Expression?; var loopBody : Expression?; var elementType : TypeDeclPtr; srcName, accName, itName : string; at : LineInfo) : Expression? {
     // Array lane: `var acc : array<T>; [reserve]; for (it in src) { $loopBody }; return <- acc`
-    // wrapped in invoke. Three internal shapes selected by:
+    // wrapped in invoke. Two orthogonal axes select the body shape:
     //   - whole-pipeline iterator-ness (`expr._type.isIterator`) — drives the `to_sequence_move`
     //     return path so an iterator pipeline still ends in an iterator,
-    //   - source-length availability — drives the pre-reserve hint.
+    //   - source-length availability — drives the pre-reserve hint (only useful when source is
+    //     array-shaped; iter sources can't reserve cheaply and already strip outer const).
     let isIter = expr._type.isIterator
     let sourceHasLength = type_has_length(top._type)
     var topExpr = clone_expression(top)
     topExpr.genFlags.alwaysSafe = true
+    var srcParamType = invoke_src_param_type(top)
     var res : Expression?
     if (isIter) {
-        res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
+        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
             var $i(accName) : array<$t(elementType)>
             for ($i(itName) in $i(srcName)) {
                 $e(loopBody)
@@ -666,7 +667,7 @@ def private emit_array_lane(var top : Expression?; var expr : Expression?; var l
             return <- $i(accName).to_sequence_move()
         }, $e(topExpr)))
     } elif (sourceHasLength) {
-        res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
             var $i(accName) : array<$t(elementType)>
             $i(accName) |> reserve(length($i(srcName)))
             for ($i(itName) in $i(srcName)) {
@@ -675,7 +676,7 @@ def private emit_array_lane(var top : Expression?; var expr : Expression?; var l
             return <- $i(accName)
         }, $e(topExpr)))
     } else {
-        res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
+        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
             var $i(accName) : array<$t(elementType)>
             for ($i(itName) in $i(srcName)) {
                 $e(loopBody)
@@ -806,86 +807,44 @@ def private emit_accumulator_lane(
         accType.flags.constant = false
         accType.flags.ref = false
     }
-    let topIsIter = top._type != null && top._type.isIterator
     var topExpr = clone_expression(top)
     topExpr.genFlags.alwaysSafe = true
+    var srcParamType = invoke_src_param_type(top)
     var res : Expression?
     if (opName == "long_count") {
-        if (topIsIter) {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
-                var $i(accName) : int64 = 0l
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return $i(accName)
-            }, $e(topExpr)))
-        } else {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-                var $i(accName) : int64 = 0l
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return $i(accName)
-            }, $e(topExpr)))
-        }
+        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+            var $i(accName) : int64 = 0l
+            for ($i(itName) in $i(srcName)) {
+                $e(loopBody)
+            }
+            return $i(accName)
+        }, $e(topExpr)))
     } elif (opName == "sum") {
-        if (topIsIter) {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
-                var $i(accName) : $t(accType) = default<$t(accType)>
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return $i(accName)
-            }, $e(topExpr)))
-        } else {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-                var $i(accName) : $t(accType) = default<$t(accType)>
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return $i(accName)
-            }, $e(topExpr)))
-        }
+        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+            var $i(accName) : $t(accType) = default<$t(accType)>
+            for ($i(itName) in $i(srcName)) {
+                $e(loopBody)
+            }
+            return $i(accName)
+        }, $e(topExpr)))
     } elif (opName == "average") {
-        if (topIsIter) {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
-                var $i(accName) : $t(accType) = default<$t(accType)>
-                var $i(cntName) = 0
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return double($i(accName)) / double($i(cntName))
-            }, $e(topExpr)))
-        } else {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-                var $i(accName) : $t(accType) = default<$t(accType)>
-                var $i(cntName) = 0
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return double($i(accName)) / double($i(cntName))
-            }, $e(topExpr)))
-        }
+        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+            var $i(accName) : $t(accType) = default<$t(accType)>
+            var $i(cntName) = 0
+            for ($i(itName) in $i(srcName)) {
+                $e(loopBody)
+            }
+            return double($i(accName)) / double($i(cntName))
+        }, $e(topExpr)))
     } elif (opName == "min" || opName == "max") {
-        if (topIsIter) {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
-                var $i(firstName) = true
-                var $i(accName) : $t(accType)
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return $i(accName)
-            }, $e(topExpr)))
-        } else {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-                var $i(firstName) = true
-                var $i(accName) : $t(accType)
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return $i(accName)
-            }, $e(topExpr)))
-        }
+        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+            var $i(firstName) = true
+            var $i(accName) : $t(accType)
+            for ($i(itName) in $i(srcName)) {
+                $e(loopBody)
+            }
+            return $i(accName)
+        }, $e(topExpr)))
     } else {
         return null
     }
@@ -996,9 +955,9 @@ def private emit_early_exit_lane(
         loopBody = perMatchBlock
     }
     // Build the invoke wrapper per op.
-    let topIsIter = top._type != null && top._type.isIterator
     var topExpr = clone_expression(top)
     topExpr.genFlags.alwaysSafe = true
+    var srcParamType = invoke_src_param_type(top)
     var res : Expression?
     if (opName == "first") {
         // Return type for first: projection's type (if any) or source's element type.
@@ -1014,99 +973,51 @@ def private emit_early_exit_lane(
             retType.flags.ref = false
         }
         // Tail: panic + unreachable default-return (matches linq.das line 2383).
-        if (topIsIter) {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                panic("sequence contains no elements")
-                return default<$t(retType)>
-            }, $e(topExpr)))
-        } else {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                panic("sequence contains no elements")
-                return default<$t(retType)>
-            }, $e(topExpr)))
-        }
+        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+            for ($i(itName) in $i(srcName)) {
+                $e(loopBody)
+            }
+            panic("sequence contains no elements")
+            return default<$t(retType)>
+        }, $e(topExpr)))
     } elif (opName == "first_or_default") {
         // Bind `d` once at the top of the block (eager evaluation, matches linq.das line 2397).
         let defaultName = "`dval`{at.line}`{at.column}"
         var defaultExpr = clone_expression(terminatorCall.arguments[1])
-        if (topIsIter) {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
-                let $i(defaultName) = $e(defaultExpr)
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return $i(defaultName)
-            }, $e(topExpr)))
-        } else {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-                let $i(defaultName) = $e(defaultExpr)
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return $i(defaultName)
-            }, $e(topExpr)))
-        }
+        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+            let $i(defaultName) = $e(defaultExpr)
+            for ($i(itName) in $i(srcName)) {
+                $e(loopBody)
+            }
+            return $i(defaultName)
+        }, $e(topExpr)))
     } elif (opName == "any") {
         // Tail: return false (loop didn't hit).
-        if (topIsIter) {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return false
-            }, $e(topExpr)))
-        } else {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return false
-            }, $e(topExpr)))
-        }
+        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+            for ($i(itName) in $i(srcName)) {
+                $e(loopBody)
+            }
+            return false
+        }, $e(topExpr)))
     } elif (opName == "all") {
         // Tail: return true (vacuously true if loop empty; also true if no element failed pred).
-        if (topIsIter) {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return true
-            }, $e(topExpr)))
-        } else {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return true
-            }, $e(topExpr)))
-        }
+        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+            for ($i(itName) in $i(srcName)) {
+                $e(loopBody)
+            }
+            return true
+        }, $e(topExpr)))
     } elif (opName == "contains") {
         // Bind `v` once at the top of the block.
         let containsValName = "`cval`{at.line}`{at.column}"
         var valExpr = clone_expression(terminatorCall.arguments[1])
-        if (topIsIter) {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr)) - const) {
-                let $i(containsValName) = $e(valExpr)
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return false
-            }, $e(topExpr)))
-        } else {
-            res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-                let $i(containsValName) = $e(valExpr)
-                for ($i(itName) in $i(srcName)) {
-                    $e(loopBody)
-                }
-                return false
-            }, $e(topExpr)))
-        }
+        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+            let $i(containsValName) = $e(valExpr)
+            for ($i(itName) in $i(srcName)) {
+                $e(loopBody)
+            }
+            return false
+        }, $e(topExpr)))
     } else {
         return null
     }

From 74fdc84fc618bd31d7da17465a9f80fd7488c85b Mon Sep 17 00:00:00 2001
From: Boris Batkin <bbatkin@gmail.com>
Date: Sat, 16 May 2026 20:26:46 -0700
Subject: [PATCH 04/14] linq_fold: collapse repeated emission patterns via
 helpers + single-$b
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cognitive load pass on linq_fold.das. Net -66 lines (504 changed).

Patterns collapsed:

1. push/push_clone workhorse splits (2 sites in plan_loop_or_count array
   lane) — use push_clone everywhere. For workhorse types clone == copy
   (no allocation, same byte cost as push); for non-workhorse it deep-
   clones, which is what we want anyway.

2. emit_length_shortcut: 2 op-arms (count vs long_count) collapse to one
   via `$c(castName)(length(src))` where castName is "int" (identity for
   int) or "int64". `$c` splices the function name at call position.

3. emit_accumulator_lane: 4 op-arms (long_count / sum / average / min-max)
   collapse to one invoke template. Per-op variation is built into 3 lists
   (preludeStmts, perMatchStmts, returnExpr); all flattened into a single
   bodyStmts list spliced via $b. This shares scope under one wrapping
   block — splitting decls / for / return into separate splice points
   would put each in its own sub-block, hiding the accumulator (caught by
   AST dump under `options log_infer_passes`).

4. emit_early_exit_lane: 5 op-arms (first / first_or_default / any / all
   / contains) collapse the same way — preludeStmts + perMatchStmts +
   tailStmts → single bodyStmts → single $b.

5. Multi-stmt-per-op cases use `qmacro_block_to_array() { stmt1; stmt2 }`
   to express the prelude/per-match as one literal block instead of N
   separate `qmacro_expr() { stmt }` pushes. Roughly halves the line
   count for ops with 2+ stmts (average, min/max).

New private helpers (all `[macro_function]`):
  - prepend_binds(stmts, intermediateBinds) — shared chain-bind prefix.
  - stmts_to_expr(stmts) — collapse N stmts to a single expression
    (pass through when N==1, wrap in qmacro_block when N>1). Used by
    every lane that builds a per-element block whose length is data-
    dependent.
  - wrap_with_condition(body, cond) — `if (cond) { body }` when cond
    is non-null; pass-through otherwise. Replaces the if/else dance
    every lane had for fusing the upstream `where_` predicate.
  - min_max_compare(workhorse, opName, valName, accName) — emits the
    perf-critical compare. Workhorse types use direct `<` / `>`
    (single instruction); non-workhorse uses `_::less` with operand
    flip for max. Replaces a 4-arm if/elif inline ladder.

Tests: 133 fold + 66 AST + 695 INTERP + 695 AOT + 695 JIT all pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 daslib/linq_fold.das | 504 +++++++++++++++++++------------------------
 1 file changed, 219 insertions(+), 285 deletions(-)

diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das
index 1a0ed64ee..2cae1691c 100644
--- a/daslib/linq_fold.das
+++ b/daslib/linq_fold.das
@@ -600,19 +600,14 @@ def private finalize_invoke(var res : Expression?; at : LineInfo) : Expression?
 def private emit_length_shortcut(opName : string; var top : Expression?; srcName : string; at : LineInfo) : Expression? {
     // Count-shaped shortcut: emit `length(src)` (count) or `int64(length(src))` (long_count)
     // directly, eliding the loop. Caller has verified count-shaped terminator + no predicate
-    // + all projections pure + source has length.
+    // + all projections pure + source has length. `int(int)` is identity, so a single
+    // template handles both via `$c(castName)`.
     var topExpr = clone_expression(top)
     topExpr.genFlags.alwaysSafe = true
-    var res : Expression?
-    if (opName == "long_count") {
-        res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-            return int64(length($i(srcName)))
-        }, $e(topExpr)))
-    } else {
-        res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
-            return length($i(srcName))
-        }, $e(topExpr)))
-    }
+    let castName = opName == "long_count" ? "int64" : "int"
+    var res = qmacro(invoke($($i(srcName) : typedecl($e(topExpr))) {
+        return $c(castName)(length($i(srcName)))
+    }, $e(topExpr)))
     return finalize_invoke(res, at)
 }
 
@@ -628,6 +623,58 @@ def private invoke_src_param_type(var top : Expression?) : TypeDeclPtr {
     return srcParamType
 }
 
+[macro_function]
+def private prepend_binds(var stmts : array<Expression?>; var intermediateBinds : array<Expression?>) {
+    // Splice the chain's let-bindings (one per upstream `select`) ahead of the per-element
+    // work. Every lane builds its per-match block this way: prior bindings first, then the
+    // op-specific stmts.
+    if (empty(intermediateBinds)) return
+    var prefixed : array<Expression?>
+    prefixed |> reserve(length(intermediateBinds) + length(stmts))
+    for (b in intermediateBinds) {
+        prefixed |> push(b)
+    }
+    for (s in stmts) {
+        prefixed |> push(s)
+    }
+    swap(stmts, prefixed)
+}
+
+[macro_function]
+def private stmts_to_expr(var stmts : array<Expression?>) : Expression? {
+    // Collapse a stmt list to a single expression: pass through when there's only one,
+    // wrap in a qmacro_block otherwise. Used wherever a lane builds an N-stmt block whose
+    // length is data-dependent.
+    if (length(stmts) == 1) return stmts[0]
+    return <- qmacro_block() {
+        $b(stmts)
+    }
+}
+
+[macro_function]
+def private wrap_with_condition(var body : Expression?; var cond : Expression?) : Expression? {
+    // Wrap `body` in `if ($cond) { body }` when `cond` is non-null, else return `body` as-is.
+    // Used by every lane that fuses an upstream `where_` predicate into the per-element work.
+    if (cond == null) return body
+    return <- qmacro_expr() {
+        if ($e(cond)) {
+            $e(body)
+        }
+    }
+}
+
+[macro_function]
+def private min_max_compare(workhorse : bool; opName : string; valName, accName : string) : Expression? {
+    // Workhorse types use direct `<` / `>` (single-instruction comparison) — this is the
+    // perf-critical branch. Non-workhorse falls back to `_::less` so user / tuple comparator
+    // overloads still apply. For max, the operands flip so `_::less` still semantically means
+    // "accName is less than valName" (i.e. valName > accName).
+    if (workhorse && opName == "min") return <- qmacro($i(valName) < $i(accName))
+    if (workhorse) return <- qmacro($i(valName) > $i(accName))
+    if (opName == "min") return <- qmacro(_::less($i(valName), $i(accName)))
+    return <- qmacro(_::less($i(accName), $i(valName)))
+}
+
 [macro_function]
 def private emit_counter_lane(var top : Expression?; srcName, accName, itName : string; var loopBody : Expression?; at : LineInfo) : Expression? {
     // Counter lane: `var acc = 0; for (it in src) { $loopBody }; return acc` inside invoke.
@@ -699,17 +746,9 @@ def private emit_accumulator_lane(
                                   at : LineInfo
                                   ) : Expression? {
     // Ring 1 single-pass accumulator lane: sum / min / max / average / long_count.
-    // Builds the per-matched-element statement block (intermediates → final-value bind if needed
-    // → per-op update), wraps in `if (whereCond)` when present, then wraps in the op-specific
-    // invoke shell (accumulator init + for-loop + return-of-accumulator).
-    // For min/max the workhorse-branch is decisive for perf: workhorse types use direct
-    // `<` / `>` (single-instruction); non-workhorse falls back to `_::less` to preserve user
-    // and tuple comparator overloads.
-    var perMatchStmts : array<Expression?>
-    perMatchStmts |> reserve(length(intermediateBinds) + 4)
-    for (b in intermediateBinds) {
-        perMatchStmts |> push(b)
-    }
+    // Per-op variation lives in (preludeStmts, perMatchStmts, returnExpr) — the invoke shell
+    // is one template. For min/max the workhorse-branch on `<` / `>` vs `_::less` is the
+    // perf-critical decision (single-instruction vs function call).
     var valueExpr : Expression?
     if (projection != null) {
         valueExpr = clone_expression(projection)
@@ -718,11 +757,28 @@ def private emit_accumulator_lane(
             $i(itName)
         }
     }
-    // Per-op per-element update
+    // Accumulator type: projection-typed if present, else source's element type. Strip
+    // `const` and `&` so the accumulator is a mutable value (`+=` / `:=` apply).
+    var accType : TypeDeclPtr
+    if (projection != null) {
+        accType = clone_type(projection._type)
+    } else {
+        accType = clone_type(elementType)
+    }
+    if (accType != null) {
+        accType.flags.constant = false
+        accType.flags.ref = false
+    }
     let valBindName = "`val`{at.line}`{at.column}"
     let firstName = "`first`{at.line}`{at.column}"
     let cntName = "`cnt`{at.line}`{at.column}"
+    var preludeStmts : array<Expression?>
+    var perMatchStmts : array<Expression?>
+    var returnExpr : Expression?
     if (opName == "long_count") {
+        preludeStmts <- qmacro_block_to_array() {
+            var $i(accName) : int64 = 0l
+        }
         // Value is unused; mirror counter-lane discipline and bind only when projection has
         // observable side effects so user-visible f(x) calls still fire.
         if (projection != null && has_sideeffects(projection)) {
@@ -734,38 +790,41 @@ def private emit_accumulator_lane(
         perMatchStmts |> push <| qmacro_expr() {
             $i(accName) ++
         }
+        returnExpr = qmacro_expr() {
+            $i(accName)
+        }
     } elif (opName == "sum") {
-        perMatchStmts |> push <| qmacro_expr() {
+        preludeStmts <- qmacro_block_to_array() {
+            var $i(accName) : $t(accType) = default<$t(accType)>
+        }
+        perMatchStmts <- qmacro_block_to_array() {
             $i(accName) += $e(valueExpr)
         }
+        returnExpr = qmacro_expr() {
+            $i(accName)
+        }
     } elif (opName == "average") {
-        perMatchStmts |> push <| qmacro_expr() {
-            $i(accName) += $e(valueExpr)
+        preludeStmts <- qmacro_block_to_array() {
+            var $i(accName) : $t(accType) = default<$t(accType)>
+            var $i(cntName) = 0
         }
-        perMatchStmts |> push <| qmacro_expr() {
+        perMatchStmts <- qmacro_block_to_array() {
+            $i(accName) += $e(valueExpr)
             $i(cntName) ++
         }
+        returnExpr = qmacro_expr() {
+            double($i(accName)) / double($i(cntName))
+        }
     } elif (opName == "min" || opName == "max") {
+        preludeStmts <- qmacro_block_to_array() {
+            var $i(firstName) = true
+            var $i(accName) : $t(accType)
+        }
         let workhorse = ((projection != null && projection._type != null && projection._type.isWorkhorseType)
             || (projection == null && elementType != null && elementType.isWorkhorseType))
-        var compareExpr : Expression?
-        if (workhorse) {
-            if (opName == "min") {
-                compareExpr = qmacro($i(valBindName) < $i(accName))
-            } else {
-                compareExpr = qmacro($i(valBindName) > $i(accName))
-            }
-        } else {
-            if (opName == "min") {
-                compareExpr = qmacro(_::less($i(valBindName), $i(accName)))
-            } else {
-                compareExpr = qmacro(_::less($i(accName), $i(valBindName)))
-            }
-        }
-        perMatchStmts |> push <| qmacro_expr() {
+        var compareExpr = min_max_compare(workhorse, opName, valBindName, accName)
+        perMatchStmts <- qmacro_block_to_array() {
             let $i(valBindName) = $e(valueExpr)
-        }
-        perMatchStmts |> push <| qmacro_expr() {
             if ($i(firstName)) {
                 $i(accName) := $i(valBindName)
                 $i(firstName) = false
@@ -773,81 +832,37 @@ def private emit_accumulator_lane(
                 $i(accName) := $i(valBindName)
             }
         }
+        returnExpr = qmacro_expr() {
+            $i(accName)
+        }
     } else {
         return null
     }
-    var perMatchBlock : Expression?
-    if (length(perMatchStmts) == 1) {
-        perMatchBlock = perMatchStmts[0]
-    } else {
-        perMatchBlock = qmacro_block() {
-            $b(perMatchStmts)
-        }
+    prepend_binds(perMatchStmts, intermediateBinds)
+    var loopBody = wrap_with_condition(stmts_to_expr(perMatchStmts), whereCond)
+    // Collect all body statements into one list so they share scope when spliced via $b.
+    // Splitting decls / for / return into separate splice tags would put each in its own
+    // sub-block, hiding the accumulator from later statements (caught by AST dump under
+    // `options log_infer_passes`).
+    var bodyStmts : array<Expression?>
+    bodyStmts |> reserve(length(preludeStmts) + 2)
+    for (s in preludeStmts) {
+        bodyStmts |> push(s)
     }
-    var loopBody : Expression?
-    if (whereCond != null) {
-        loopBody = qmacro_expr() {
-            if ($e(whereCond)) {
-                $e(perMatchBlock)
-            }
+    bodyStmts |> push <| qmacro_expr() {
+        for ($i(itName) in $i(srcName)) {
+            $e(loopBody)
         }
-    } else {
-        loopBody = perMatchBlock
-    }
-    // Accumulator type: from the projection if present, else from the source's element type.
-    // Strip `const` and `&` modifiers — the accumulator must be a mutable value so `+=` /
-    // `:=` work on it, regardless of how the source delivers elements (`int const &`).
-    var accType : TypeDeclPtr
-    if (projection != null) {
-        accType = clone_type(projection._type)
-    } else {
-        accType = clone_type(elementType)
     }
-    if (accType != null) {
-        accType.flags.constant = false
-        accType.flags.ref = false
+    bodyStmts |> push <| qmacro_expr() {
+        return $e(returnExpr)
     }
     var topExpr = clone_expression(top)
     topExpr.genFlags.alwaysSafe = true
     var srcParamType = invoke_src_param_type(top)
-    var res : Expression?
-    if (opName == "long_count") {
-        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
-            var $i(accName) : int64 = 0l
-            for ($i(itName) in $i(srcName)) {
-                $e(loopBody)
-            }
-            return $i(accName)
-        }, $e(topExpr)))
-    } elif (opName == "sum") {
-        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
-            var $i(accName) : $t(accType) = default<$t(accType)>
-            for ($i(itName) in $i(srcName)) {
-                $e(loopBody)
-            }
-            return $i(accName)
-        }, $e(topExpr)))
-    } elif (opName == "average") {
-        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
-            var $i(accName) : $t(accType) = default<$t(accType)>
-            var $i(cntName) = 0
-            for ($i(itName) in $i(srcName)) {
-                $e(loopBody)
-            }
-            return double($i(accName)) / double($i(cntName))
-        }, $e(topExpr)))
-    } elif (opName == "min" || opName == "max") {
-        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
-            var $i(firstName) = true
-            var $i(accName) : $t(accType)
-            for ($i(itName) in $i(srcName)) {
-                $e(loopBody)
-            }
-            return $i(accName)
-        }, $e(topExpr)))
-    } else {
-        return null
-    }
+    var res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+        $b(bodyStmts)
+    }, $e(topExpr)))
     return finalize_invoke(res, at)
 }
 
@@ -880,16 +895,13 @@ def private emit_early_exit_lane(
     // Ring 2 early-exit lane: first / first_or_default / any / all / contains.
     // Emits `invoke($block { [arg-binds]; for { ...; return X }; tail }, src)` where the
     // block-level `return` yields the invoke's result without escaping the user's function.
-    // Stack-allocated block (no heap alloc), per Boris's confirmation.
+    // Stack-allocated block (no heap alloc), per Boris's confirmation. Per-op variation
+    // lives in (preludeStmts, per-element return logic, tailStmts) — the invoke shell is
+    // one template; all stmts share scope via a single $b(bodyStmts) splice.
+    // If there's a projection chain, bind the final projection to a local so the per-element
+    // work names the value directly. Avoids re-evaluating the projection in compares and
+    // side-steps the ExprRef2Value substitution trap that splicing typed expressions hits.
     var perMatchStmts : array<Expression?>
-    perMatchStmts |> reserve(length(intermediateBinds) + 4)
-    for (b in intermediateBinds) {
-        perMatchStmts |> push(b)
-    }
-    // If there's a projection chain, bind the final projection to a local so the
-    // op-specific work (return value, predicate input, equality compare) names the value
-    // directly. Avoids re-evaluating the projection in compares and side-steps the
-    // ExprRef2Value substitution trap that splicing typed expressions hits.
     var valueName = itName
     let projBindName = "`vproj`{at.line}`{at.column}"
     if (projection != null) {
@@ -898,11 +910,43 @@ def private emit_early_exit_lane(
         }
         valueName = projBindName
     }
-    // Per-op per-element work (early-return inside the invoke block)
-    if (opName == "first" || opName == "first_or_default") {
+    // Per-op (preludeStmts, per-element work appended to perMatchStmts, tailStmts).
+    var preludeStmts : array<Expression?>
+    var tailStmts : array<Expression?>
+    if (opName == "first") {
+        // Return type for first: projection's type (if any) or source's element type.
+        // Strip const/ref so `default<T>` (the fallback path) yields a fresh value.
+        var retType : TypeDeclPtr
+        if (projection != null) {
+            retType = clone_type(projection._type)
+        } else {
+            retType = clone_type(elementType)
+        }
+        if (retType != null) {
+            retType.flags.constant = false
+            retType.flags.ref = false
+        }
+        perMatchStmts |> push <| qmacro_expr() {
+            return $i(valueName)
+        }
+        // Tail: panic + unreachable default-return (matches linq.das line 2383).
+        tailStmts <- qmacro_block_to_array() {
+            panic("sequence contains no elements")
+            return default<$t(retType)>
+        }
+    } elif (opName == "first_or_default") {
+        // Bind `d` once at the top of the block (eager evaluation, matches linq.das line 2397).
+        let defaultName = "`dval`{at.line}`{at.column}"
+        var defaultExpr = clone_expression(terminatorCall.arguments[1])
+        preludeStmts |> push <| qmacro_expr() {
+            let $i(defaultName) = $e(defaultExpr)
+        }
         perMatchStmts |> push <| qmacro_expr() {
             return $i(valueName)
         }
+        tailStmts |> push <| qmacro_expr() {
+            return $i(defaultName)
+        }
     } elif (opName == "any") {
         let argCount = length(terminatorCall.arguments)
         if (argCount > 1) {
@@ -917,6 +961,9 @@ def private emit_early_exit_lane(
                 return true
             }
         }
+        tailStmts |> push <| qmacro_expr() {
+            return false
+        }
     } elif (opName == "all") {
         var predExpr = fold_linq_cond(clone_expression(terminatorCall.arguments[1]), valueName)
         perMatchStmts |> push <| qmacro_expr() {
@@ -924,103 +971,50 @@ def private emit_early_exit_lane(
                 return false
             }
         }
+        tailStmts |> push <| qmacro_expr() {
+            return true
+        }
     } elif (opName == "contains") {
-        // `v` was bound once at the top of the block (see invoke wrappers below); per-element
-        // compare uses the bound name to avoid re-evaluating an expensive argument.
+        // `v` is bound once at the top of the block; per-element compare uses the bound name
+        // to avoid re-evaluating an expensive argument.
         let containsValName = "`cval`{at.line}`{at.column}"
+        var valExpr = clone_expression(terminatorCall.arguments[1])
+        preludeStmts |> push <| qmacro_expr() {
+            let $i(containsValName) = $e(valExpr)
+        }
         perMatchStmts |> push <| qmacro_expr() {
             if ($i(valueName) == $i(containsValName)) {
                 return true
             }
         }
+        tailStmts |> push <| qmacro_expr() {
+            return false
+        }
     } else {
         return null
     }
-    var perMatchBlock : Expression?
-    if (length(perMatchStmts) == 1) {
-        perMatchBlock = perMatchStmts[0]
-    } else {
-        perMatchBlock = qmacro_block() {
-            $b(perMatchStmts)
-        }
+    prepend_binds(perMatchStmts, intermediateBinds)
+    var loopBody = wrap_with_condition(stmts_to_expr(perMatchStmts), whereCond)
+    // Single-$b body so all stmts (prelude + for + tail) share scope under one wrapping block.
+    var bodyStmts : array<Expression?>
+    bodyStmts |> reserve(length(preludeStmts) + length(tailStmts) + 1)
+    for (s in preludeStmts) {
+        bodyStmts |> push(s)
     }
-    var loopBody : Expression?
-    if (whereCond != null) {
-        loopBody = qmacro_expr() {
-            if ($e(whereCond)) {
-                $e(perMatchBlock)
-            }
+    bodyStmts |> push <| qmacro_expr() {
+        for ($i(itName) in $i(srcName)) {
+            $e(loopBody)
         }
-    } else {
-        loopBody = perMatchBlock
     }
-    // Build the invoke wrapper per op.
+    for (s in tailStmts) {
+        bodyStmts |> push(s)
+    }
     var topExpr = clone_expression(top)
     topExpr.genFlags.alwaysSafe = true
     var srcParamType = invoke_src_param_type(top)
-    var res : Expression?
-    if (opName == "first") {
-        // Return type for first: projection's type (if any) or source's element type.
-        // Strip const/ref so `default<T>` (the fallback path) yields a fresh value.
-        var retType : TypeDeclPtr
-        if (projection != null) {
-            retType = clone_type(projection._type)
-        } else {
-            retType = clone_type(elementType)
-        }
-        if (retType != null) {
-            retType.flags.constant = false
-            retType.flags.ref = false
-        }
-        // Tail: panic + unreachable default-return (matches linq.das line 2383).
-        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
-            for ($i(itName) in $i(srcName)) {
-                $e(loopBody)
-            }
-            panic("sequence contains no elements")
-            return default<$t(retType)>
-        }, $e(topExpr)))
-    } elif (opName == "first_or_default") {
-        // Bind `d` once at the top of the block (eager evaluation, matches linq.das line 2397).
-        let defaultName = "`dval`{at.line}`{at.column}"
-        var defaultExpr = clone_expression(terminatorCall.arguments[1])
-        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
-            let $i(defaultName) = $e(defaultExpr)
-            for ($i(itName) in $i(srcName)) {
-                $e(loopBody)
-            }
-            return $i(defaultName)
-        }, $e(topExpr)))
-    } elif (opName == "any") {
-        // Tail: return false (loop didn't hit).
-        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
-            for ($i(itName) in $i(srcName)) {
-                $e(loopBody)
-            }
-            return false
-        }, $e(topExpr)))
-    } elif (opName == "all") {
-        // Tail: return true (vacuously true if loop empty; also true if no element failed pred).
-        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
-            for ($i(itName) in $i(srcName)) {
-                $e(loopBody)
-            }
-            return true
-        }, $e(topExpr)))
-    } elif (opName == "contains") {
-        // Bind `v` once at the top of the block.
-        let containsValName = "`cval`{at.line}`{at.column}"
-        var valExpr = clone_expression(terminatorCall.arguments[1])
-        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
-            let $i(containsValName) = $e(valExpr)
-            for ($i(itName) in $i(srcName)) {
-                $e(loopBody)
-            }
-            return false
-        }, $e(topExpr)))
-    } else {
-        return null
-    }
+    var res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+        $b(bodyStmts)
+    }, $e(topExpr)))
     return finalize_invoke(res, at)
 }
 
@@ -1121,112 +1115,52 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
         return emit_early_exit_lane(lastName, top, projection, whereCond,
             intermediateBinds, elementType, terminatorCall, srcName, itName, at)
     }
-    // Build the per-element loop body for COUNTER / ARRAY.
+    // Build the per-element loop body for COUNTER / ARRAY. Both lanes follow the same shape:
+    // collect per-element stmts (incl. side-effect-preserving projection binds), prepend
+    // chain `intermediateBinds`, collapse to a single expression, wrap with `whereCond`.
     var loopBody : Expression?
     if (counterLane) {
         // Counter lane must evaluate the projection (and any chained intermediates) per
         // matched element so user-visible side effects fire — `count(select(src, f))` in
         // plain LINQ invokes f per element, and our fold must match. Bind the final
         // projection to a discardable local; daslang macro output bypasses LINT002.
-        var sideEffectStmts : array<Expression?>
-        sideEffectStmts |> reserve(length(intermediateBinds) + 2)
-        for (b in intermediateBinds) {
-            sideEffectStmts |> push(b)
-        }
-        // Bind the final projection only when it might have side effects. Pure projections
-        // (the common case — `_._field * 2`) can be elided entirely; no need to rely on
-        // the optimizer to DCE a dead store afterwards.
+        // Pure projections (the common case — `_._field * 2`) get elided entirely; no need
+        // to rely on the optimizer to DCE a dead store afterwards.
+        var stmts : array<Expression?>
         if (projection != null && has_sideeffects(projection)) {
             let finalBindName = "`vfinal`{at.line}`{at.column}"
-            sideEffectStmts |> push <| qmacro_expr() {
+            stmts |> push <| qmacro_expr() {
                 var $i(finalBindName) = $e(projection)
             }
         }
-        sideEffectStmts |> push <| qmacro_expr() {
+        stmts |> push <| qmacro_expr() {
             $i(accName) ++
         }
-        var incBlock : Expression?
-        if (length(sideEffectStmts) == 1) {
-            incBlock = sideEffectStmts[0]
-        } else {
-            incBlock = qmacro_block() {
-                $b(sideEffectStmts)
-            }
-        }
-        if (whereCond != null) {
-            loopBody = qmacro_expr() {
-                if ($e(whereCond)) {
-                    $e(incBlock)
-                }
-            }
-        } else {
-            loopBody = incBlock
-        }
+        prepend_binds(stmts, intermediateBinds)
+        loopBody = wrap_with_condition(stmts_to_expr(stmts), whereCond)
     } else {
-        // array lane
+        // Array lane. `push_clone` is the safe append everywhere: for workhorse types it's a
+        // byte copy (same cost as `push`); for non-workhorse it deep-clones, avoiding the
+        // mutation that `emplace` would do via move-out — important when the projection
+        // returns a ref into the source. For chained selects, `intermediateBinds` carries
+        // N-1 prior bindings; splice them in before the push so each lambda body can resolve
+        // its renamed parameter to the correct binding name.
+        var stmts : array<Expression?>
         if (projection != null) {
-            // push for workhorse (cheap copy), push_clone for non-workhorse (deep clone,
-            // never mutates source). emplace would move out of the projection's value,
-            // which is unsafe when the projection returns a ref into the source.
-            // For chained selects, `intermediateBinds` carries N-1 prior bindings; splice
-            // them in before the push so each lambda body can resolve its renamed parameter
-            // to the correct binding name.
-            let workhorseProj = projection._type != null && projection._type.isWorkhorseType
-            var pushStmt : Expression?
-            if (workhorseProj) {
-                pushStmt = qmacro_expr() {
-                    $i(accName) |> push($e(projection))
-                }
-            } else {
-                pushStmt = qmacro_expr() {
-                    $i(accName) |> push_clone($e(projection))
-                }
-            }
-            var perElem : Expression?
-            if (empty(intermediateBinds)) {
-                perElem = pushStmt
-            } else {
-                var perElemStmts : array<Expression?>
-                perElemStmts |> reserve(length(intermediateBinds) + 1)
-                for (b in intermediateBinds) {
-                    perElemStmts |> push(b)
-                }
-                perElemStmts |> push(pushStmt)
-                perElem = qmacro_block() {
-                    $b(perElemStmts)
-                }
-            }
-            if (whereCond != null) {
-                loopBody = qmacro_expr() {
-                    if ($e(whereCond)) {
-                        $e(perElem)
-                    }
-                }
-            } else {
-                loopBody = perElem
+            stmts |> push <| qmacro_expr() {
+                $i(accName) |> push_clone($e(projection))
             }
         } elif (whereCond != null) {
-            // Identity case (no projection): `it` aliases the source element. Workhorse
-            // types can `push` (cheap copy); non-workhorse needs `push_clone` to avoid
-            // mutating the source via a move.
-            let elemWorkhorse = elementType != null && elementType.isWorkhorseType
-            if (elemWorkhorse) {
-                loopBody = qmacro_expr() {
-                    if ($e(whereCond)) {
-                        $i(accName) |> push($i(itName))
-                    }
-                }
-            } else {
-                loopBody = qmacro_expr() {
-                    if ($e(whereCond)) {
-                        $i(accName) |> push_clone($i(itName))
-                    }
-                }
+            // Identity case (no projection): `it` aliases the source element.
+            stmts |> push <| qmacro_expr() {
+                $i(accName) |> push_clone($i(itName))
             }
         } else {
             // identity chain — nothing to fuse; let the caller fall through.
             return null
         }
+        prepend_binds(stmts, intermediateBinds)
+        loopBody = wrap_with_condition(stmts_to_expr(stmts), whereCond)
     }
     if (counterLane) {
         return emit_counter_lane(top, srcName, accName, itName, loopBody, at)

From 23c14cc058148aefe7c9f058700d3c7052a62f80 Mon Sep 17 00:00:00 2001
From: Boris Batkin <bbatkin@gmail.com>
Date: Sat, 16 May 2026 20:30:08 -0700
Subject: [PATCH 05/14] linq_fold: collapse emit_array_lane 3 arms via
 single-$b bodyStmts

Same pattern as the accumulator + early-exit lanes from prior commit.
Three arms (isIter / sourceHasLength / else) become one invoke template
with a per-axis stmt list:
  - var $i(accName) : array<T>           (always)
  - reserve(length(src))                  (only when sourceHasLength && !isIter)
  - for-loop                              (always)
  - return <- acc.to_sequence_move()      (when isIter)
  - return <- acc                         (else)

All stmts share scope under the single $b(bodyStmts) splice. Same shape
applies across all four emit lanes in the file now.

Tests: 133 fold + 66 AST + 695 INTERP + 695 AOT all pass.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 daslib/linq_fold.das | 47 ++++++++++++++++++++++----------------------
 1 file changed, 24 insertions(+), 23 deletions(-)

diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das
index 2cae1691c..662ffcff3 100644
--- a/daslib/linq_fold.das
+++ b/daslib/linq_fold.das
@@ -698,39 +698,40 @@ def private emit_array_lane(var top : Expression?; var expr : Expression?; var l
     //   - whole-pipeline iterator-ness (`expr._type.isIterator`) — drives the `to_sequence_move`
     //     return path so an iterator pipeline still ends in an iterator,
     //   - source-length availability — drives the pre-reserve hint (only useful when source is
-    //     array-shaped; iter sources can't reserve cheaply and already strip outer const).
+    //     array-shaped; iter sources can't reserve cheaply).
+    // Single-$b body shares scope: `var acc` decl + optional reserve + for-loop + return all
+    // live in one block.
     let isIter = expr._type.isIterator
     let sourceHasLength = type_has_length(top._type)
     var topExpr = clone_expression(top)
     topExpr.genFlags.alwaysSafe = true
     var srcParamType = invoke_src_param_type(top)
-    var res : Expression?
+    var bodyStmts : array<Expression?>
+    bodyStmts |> push <| qmacro_expr() {
+        var $i(accName) : array<$t(elementType)>
+    }
+    if (sourceHasLength && !isIter) {
+        bodyStmts |> push <| qmacro_expr() {
+            $i(accName) |> reserve(length($i(srcName)))
+        }
+    }
+    bodyStmts |> push <| qmacro_expr() {
+        for ($i(itName) in $i(srcName)) {
+            $e(loopBody)
+        }
+    }
     if (isIter) {
-        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
-            var $i(accName) : array<$t(elementType)>
-            for ($i(itName) in $i(srcName)) {
-                $e(loopBody)
-            }
+        bodyStmts |> push <| qmacro_expr() {
             return <- $i(accName).to_sequence_move()
-        }, $e(topExpr)))
-    } elif (sourceHasLength) {
-        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
-            var $i(accName) : array<$t(elementType)>
-            $i(accName) |> reserve(length($i(srcName)))
-            for ($i(itName) in $i(srcName)) {
-                $e(loopBody)
-            }
-            return <- $i(accName)
-        }, $e(topExpr)))
+        }
     } else {
-        res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
-            var $i(accName) : array<$t(elementType)>
-            for ($i(itName) in $i(srcName)) {
-                $e(loopBody)
-            }
+        bodyStmts |> push <| qmacro_expr() {
             return <- $i(accName)
-        }, $e(topExpr)))
+        }
     }
+    var res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+        $b(bodyStmts)
+    }, $e(topExpr)))
     return finalize_invoke(res, at)
 }
 

From eac77b8dd97bf928f6d8162e39e074cb00743bf1 Mon Sep 17 00:00:00 2001
From: Boris Batkin <bbatkin@gmail.com>
Date: Sat, 16 May 2026 22:07:59 -0700
Subject: [PATCH 06/14] linq_fold Phase 2C Ring 3: take/skip splice in all 4
 lanes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Recognizes `[where_*][select*][skip?][take?] |> terminator` chains and
emits bounded-loop counters spliced into the per-element work across
counter, accumulator, early-exit, and array lanes. Trailing take/skip
(no explicit aggregator) routes to ARRAY lane with implicit to_array.

New helpers (linq_fold.das):
- append_skip_take_prelude — emits `var skipRem = K` / `var taken = 0`
  alongside the lane accumulator.
- wrap_with_skip_take — prepends take-limit break + skip-counter continue
  + take-counter increment to the per-match block. Take-increment placed
  BEFORE the per-match body so early-exit terminators (return-from-block)
  don't make it unreachable (LINT001).
- is_buffer_required_op — recognizes order_by/distinct/reverse/group_by/
  zip/join/left_join/group_join by name; planner returns null with
  per-op `// TODO Phase 2X: <FutureMode>` markers, leaving room for
  future BufferTopN / BufferDistinct / MultiSourceZip / BufferedJoin /
  BufferGroupBy / BufferReverse emit modes.

Planner extensions:
- ChainStage recognition state machine on (seenSelect, seenSkip, seenTake)
  rejects reverse-order shapes (e.g. where-after-skip). At most one skip
  and one take per chain in Phase 2C.
- Range-form `take(start..end)` / `skip(start..end)` (slice operator)
  falls through — different semantics from int-form.
- Count-shaped length shortcut + any-empty shortcut both gate on
  `noLimits` (skip/take truncate the length).
- classify_terminator: take/skip terminal → ARRAY lane (implicit to_array
  after `to_array` strip via linqCalls `skip = true`).
- Array-lane reserve tightens to inline `min(N, length(src))` when take
  is present (no math::min dep at the call site).

Tests:
- test_linq_fold.das: 28 new functional cases (4 lanes × {where, skip,
  take, where.skip.take, edge cases like take(0), skip(huge)}). Covers
  correctness against expected values; for accumulator long_count uses
  `let r` (drops typename assertion since `let` adds const).
- test_linq_fold_ast.das: 5 new AST tests with count_break_continue
  helper — assert splice form (one fused for-loop + break for take +
  continue for skip) for counter/accumulator/array lanes, and
  fall-through (no invoke wrapper) for order_by.take and distinct.

Benchmarks:
- New take_sum_aggregate.das (accumulator-lane take coverage) and
  take_count_filtered.das (counter-lane take + where).
- skip_take, take_sum_aggregate, take_count_filtered all drop to 0
  ns/op at 100K rows (bounded loop O(K+N) vs source O(100K)). take_count
  unchanged (m3f_old already iterator-fused).

Lint cleanup (Boris's "lint clean even in generated code"):
- daslib/ast_match.das: rename `next_var`-emitted gensyms from `_qm_N`
  to `qm_N` — used-downstream, the underscore prefix mis-signaled
  unused and triggered LINT004 in every qmatch_function consumer.
- daslib/ast_match.das: switch 4 specific qmacro_expr emissions from
  `var $i(...)` to `let $i(...)` (the gensyms aren't reassigned in
  generated code; LINT003 was firing across consumers).
- daslib/linq.das: zip_impl (predicate variant) and chunk_impl gain
  pre-loop reserve hints — both were emitting PERF006 from push_clone-
  in-loop-without-reserve when instantiated from test code.

LINQ.md gains a Phase 2C Ring 3 section with deltas + emission-shape
sketch, the buffer-required marker arms inventory, and a "Planned:
fail-loudly contract" subsection documenting the future PR that will
upgrade silent fallback to `macro_error("_fold: cannot splice — ...")`
per Boris's sqlite_linq-style "splice or error" design directive.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 benchmarks/sql/LINQ.md                 |  85 ++++++++-
 benchmarks/sql/take_count_filtered.das |  56 ++++++
 benchmarks/sql/take_sum_aggregate.das  |  56 ++++++
 daslib/ast_match.das                   |  13 +-
 daslib/linq.das                        |   8 +
 daslib/linq_fold.das                   | 241 +++++++++++++++++++++----
 tests/linq/test_linq_fold.das          | 236 ++++++++++++++++++++----
 tests/linq/test_linq_fold_ast.das      | 166 ++++++++++++++++-
 8 files changed, 773 insertions(+), 88 deletions(-)
 create mode 100644 benchmarks/sql/take_count_filtered.das
 create mode 100644 benchmarks/sql/take_sum_aggregate.das

diff --git a/benchmarks/sql/LINQ.md b/benchmarks/sql/LINQ.md
index 8029d4a6a..25105e908 100644
--- a/benchmarks/sql/LINQ.md
+++ b/benchmarks/sql/LINQ.md
@@ -25,8 +25,10 @@ See `~/.claude/plans/keen-hopping-balloon.md` for the long-form plan.
 | 2A | Loop planner — `_fold` emits explicit for-loops for `[where_*][select?]` (array lane) and `[where_*][select?] |> count` (counter lane); anything else falls through unfolded. No comprehensions, no dispatch back to `_old_fold`. | ✅ done |
 | 2B Ring 1 | Accumulator lane: `sum`, `min`, `max`, `average`, `long_count` with workhorse `<` / `>` for min/max scalars and `_::less` fallback for tuples/user types. `long_count` shares the count-length shortcut. | ✅ done |
 | 2B Ring 2 | Early-exit lane: `first`, `first_or_default`, `any`, `all`, `contains` via `invoke($block { ... return val })`. Predicate-free `any` gets a `length(src) > 0` shortcut. | ✅ done |
-| 2C | `take(N)` / `skip(N)` in counter/array/accumulator/early-exit lanes; non-workhorse chained selects via `:=`-clone; `_select|_where` (where-after-select; needs `ExprRef2Value` substitution). | ⏳ |
-| 3+ | Buffer-required operators: `distinct`, `sort`, `reverse`, `groupby`, `zip`, `join`. Once we go array, we stay array | ⏳ |
+| 2C Ring 3 | `take(N)` / `skip(N)` in counter/array/accumulator/early-exit lanes. Canonical chain order `[where_*][select*][skip?][take?] |> terminator`. Trailing take/skip (no explicit aggregator) → ARRAY lane with implicit `to_array`. Range-form `take(start..end)` falls through (slice operator, different semantics). Buffer-required ops (`order_by`, `distinct`, `reverse`, `group_by`, `zip`, `join`, `left_join`, `group_join`) recognized by name and emit silent fallback with future-mode markers (BufferTopN / BufferDistinct / BufferReverse / BufferGroupBy / MultiSourceZip / BufferedJoin). | ✅ done |
+| 2C Ring 4 | Non-workhorse chained selects via `:=`-clone. | ⏳ |
+| 2D | Fail-loudly contract — see "Planned" section below | ⏳ |
+| 3+ | Buffer-required emit modes: `distinct`, `sort`/`order_by`, `reverse`, `groupby`, `zip`, `join`. Once we go array, we stay array | ⏳ |
 | 4 | Final coverage pass + docs; full 4-way comparison table refresh; parity-test sweep | ⏳ |
 
 ## Baselines (100K rows, INTERP mode)
@@ -160,6 +162,77 @@ All Ring 2 ops hit single-digit ns/op with **zero allocations**. Early-exit case
 
 `first` / `first_or_default` collapse to sub-ns/op because the where matches near the front of the array; the early-exit returns at the first hit and per-element timing measures the loop overhead per the chunk_size (100K), not per actual iteration. The same is why `any_match` was already at 0 ns/op pre-Phase-2B — `_old_fold` and m3 also bail early on first match.
 
+## Phase 2C Ring 3 — splice-mode take/skip (2026-05-17)
+
+`_fold` now recognizes the chain shape `[where_*][select*][skip?][take?] |> terminator` and emits the per-element work wrapped with bounded-loop counters. Skip and take guards are spliced into the per-match block (after the optional `where` filter, before the lane-specific work and intermediate projection binds). Counters live alongside the lane's accumulator in the outer invoke block.
+
+**Emission shape (counter lane example, `where(p).skip(K).take(N).count()`):**
+
+```
+invoke($($i(src) : ...) {
+    var <skipRem> = K
+    var <takenCount> = 0
+    var <acc> = 0
+    for (<it> in <src>) {
+        if (p(<it>)) {
+            if (<takenCount> == N) break          // take guard
+            if (<skipRem> > 0) { <skipRem>--; continue }   // skip guard
+            <takenCount>++                         // before lane work — keeps reachable
+                                                   // even for early-exit terminators that
+                                                   // `return X` from per-match
+            <acc>++
+        }
+    }
+    return <acc>
+}, <topExpr>)
+```
+
+Same skeleton across all four lanes — the per-match payload (acc++ / push_clone / `acc += val` / `return X`) is the only thing that varies.
+
+**Chain-shape detection.** New planner state machine on `(seenSelect, seenSkip, seenTake)` accepts the canonical order and rejects reversed forms (`where` after select/skip/take, etc.) — those return null and the chain falls through to plain linq. At most one skip and one take per chain in this phase; multiple of either is a fall-through.
+
+**Take/skip as terminator.** When `take(N).to_array()` or `skip(K).take(N).to_array()` is the chain tail, the `to_array` strip (LinqCall `skip = true` in `linqCalls`) makes the last visible call `take` or `skip`. `classify_terminator` now routes those to ARRAY lane, with the trailing take/skip captured into `takeExpr`/`skipExpr` and the lane emitting implicit-to_array.
+
+**Buffer-required marker arms.** `is_buffer_required_op` recognizes `order_by`/`order`/`order_descending`/`order_by_descending`, `distinct`/`distinct_by`, `reverse`, `group_by`/`group_by_lazy`, `zip`, `join`/`left_join`/`group_join`. These all return null (silent fallback) with a `// TODO Phase 2X: <FutureMode>` comment naming the future emit mode. Future PRs replace each return-null with a dedicated emit path without re-walking the chain-recognition logic. AST tests `test_order_by_take_falls_through` and `test_distinct_falls_through` pin this behavior.
+
+**Range-form `take(start..end)` falls through.** The slice-style overload has different semantics (yield elements at indices `[start, end)`); decomposing to `skip(start).take(end-start)` is correct but introduces an arithmetic dependency the planner doesn't model today. Phase 2C handles only the int-form (`takeArg._type.baseType == Type.tInt`); range form drops to plain linq.
+
+**Reserve refinement.** When `take(N)` is present on a length-bearing source, the array-lane reserve hint tightens from `length(src)` to `min(N, length(src))` (inline `?:` — no `math::min` dep at the user's call site). Prevents over-allocating millions of slots when take cap is small.
+
+### Phase 2C Ring 3 deltas (100K rows, INTERP)
+
+| Benchmark | Shape | m3f_old | m3f (Phase 2C) | Delta |
+|---|---|---:|---:|---|
+| take_count | `take(N).to_array` (N=1000) | 0 | 0 | parity (`_old_fold`'s take iterator already bails early at N) |
+| skip_take | `skip(K).take(N).to_array` (K=10, N=1000) | 23 | **0** | bounded-loop exits after K+N iterations (small vs 100K source) |
+| take_sum_aggregate (new) | `select.take(N).sum` | 14 | **0** | accumulator-lane take splice |
+| take_count_filtered (new) | `where.take(N).count` | 11 | **0** | counter-lane take + where splice |
+
+Sub-ns/op on the three improved benchmarks reflects the bounded-loop nature: per-element timing normalizes to `chunk_size = 100000`, but the actual loop runs ≤ K+N times (≤ 1010 here). The win is asymptotic — `_fold` is O(K+N), `_old_fold` is O(K+N) per inner iterator + N×O(1) wrapper push.
+
+`_old_fold`'s `take_count` at 0 ns/op already reflects iterator-fusion at the linq-runtime layer; the Phase 2C delta there is allocation count (`_fold`: 1 alloc for the result array, `_old_fold`: same with extra take-iterator wrapper). The functional Phase 2C win for that shape is structural — the splice path now emits a single fused loop where `_old_fold` chains iterator instances.
+
+## Planned: fail-loudly contract
+
+The current contract: when `_fold` can't splice a chain (out-of-scope terminator, buffer-required op, multiple take/skip, range-form take/skip, etc.), it falls through to plain linq — same as today's master. This is **temporary**. The planned contract (Boris design directive 2026-05-17): `_fold` will emit `macro_error("_fold: cannot splice — <reason>")` for any unsupported shape, mirroring the sqlite_linq `_sql(...)` "splice or error" contract.
+
+When the switch lands, every `m3f` variant currently relying on silent fallback breaks. Approximate accounting from the current benchmark suite (8 affected `m3f` variants), grouped by future emit mode that would resolve each:
+
+| Benchmark | Future mode |
+|---|---|
+| `distinct_count` | BufferDistinct (hash set) |
+| `sort_first` | BufferTopN (order_by + early-exit) |
+| `sort_take` | BufferTopN (order_by + take/skip) |
+| `select_where_order_take` | BufferTopN with predicate prefix |
+| `groupby_count` | BufferGroupBy (hash multi-bucket) |
+| `groupby_sum` | BufferGroupBy + nested fold inside select |
+| `zip_dot_product` | MultiSourceZip (2 cursors advanced lockstep) |
+| `join_count` | BufferedJoin (hash-build + probe) |
+
+The fail-loudly PR will either (a) comment out `m3f` in the affected benchmarks until the corresponding emit mode lands, or (b) deliver one or more emit modes alongside the switch. Decision deferred to that PR.
+
+Tracking issue: the planner's `is_buffer_required_op` recognition + the named-arm `// TODO Phase 2X: <FutureMode>` markers are the in-code TODOs.
+
 ## Operator-coverage checklist (parity tests)
 
 The 24 benchmarks above cover the most common shapes. The end-game target is one benchmark per `_fold`-applicable scenario in the broader `tests/linq/` operator suite. Tracking the long-tail coverage below; PRs that add splice support for new operators should add a benchmark here if not already present.
@@ -167,15 +240,15 @@ The 24 benchmarks above cover the most common shapes. The end-game target is one
 | Source test file | Operator group | Covered by benchmark | Status |
 |---|---|---|---|
 | `test_linq.das` | comprehension basics | count_aggregate, sum_aggregate | ✅ |
-| `test_linq_aggregation.das` | count/sum/min/max/avg/aggregate | count/sum/min/max/average_aggregate, sum_where | ✅ core; `aggregate(seed, fn)` ⏳ |
-| `test_linq_querying.das` | any/all/contains | any_match, all_match | ✅ core; `contains` ⏳ |
+| `test_linq_aggregation.das` | count/sum/min/max/avg/aggregate | count/sum/min/max/average_aggregate, sum_where, long_count_aggregate | ✅ core; `aggregate(seed, fn)` ⏳ |
+| `test_linq_querying.das` | any/all/contains | any_match, all_match, contains_match | ✅ core |
 | `test_linq_transform.das` | select/select_many/zip | to_array_filter, zip_dot_product | ✅ select/zip; `select_many` ⏳ |
 | `test_linq_sorting.das` | order/order_by/reverse | sort_first, sort_take, select_where_order_take | ✅ ascending; `order_descending` + `reverse` ⏳ |
 | `test_linq_group_by.das` | group_by/group_by_lazy/having | groupby_count, groupby_sum | ✅ basic; `having_` ⏳ |
 | `test_linq_join.das` | join/left_join/right_join/full_outer/cross | join_count | ✅ inner; outer joins + cross ⏳ |
-| `test_linq_partition.das` | take/skip/take_while/skip_while/chunk | take_count, skip_take | ✅ take/skip; `_while` + `chunk` ⏳ |
+| `test_linq_partition.das` | take/skip/take_while/skip_while/chunk | take_count, skip_take, take_sum_aggregate, take_count_filtered | ✅ take/skip in splice lanes; `_while` + `chunk` ⏳ |
 | `test_linq_set.das` | distinct/union/except/intersect/unique | distinct_count | ✅ distinct; set ops ⏳ |
-| `test_linq_element.das` | first/last/single/element_at + _or_default | first_match | ✅ first; last/single/element_at ⏳ |
+| `test_linq_element.das` | first/last/single/element_at + _or_default | first_match, first_or_default_match | ✅ first/first_or_default; last/single/element_at ⏳ |
 | `test_linq_concat.das` | concat/prepend/append | — | ⏳ |
 | `test_linq_generation.das` | range/repeat/etc. | — | ⏳ |
 | `test_linq_bugs.das` | regression cases | — | ⏳ as bugs surface |
diff --git a/benchmarks/sql/take_count_filtered.das b/benchmarks/sql/take_count_filtered.das
new file mode 100644
index 000000000..6245c10ec
--- /dev/null
+++ b/benchmarks/sql/take_count_filtered.das
@@ -0,0 +1,56 @@
+options gen2
+options persistent_heap
+
+require _common public
+
+let TAKE_N = 1000
+let THRESHOLD = 500
+
+// `_sql` rejects `take(n) |> count()` (LIMIT-before-aggregate collapses to one row regardless
+// in SQLite), so the m1 variant is omitted. m3/m3f_old/m3f filter + bound + count over the
+// array. Exercises counter-lane take splice with an upstream `where` predicate.
+
+def run_m3(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    b |> run("m3_array/{n}", n) {
+        let c = arr |> _where(_.price > THRESHOLD) |> take(TAKE_N) |> count()
+        if (c == 0) {
+            b->failNow()
+        }
+    }
+}
+
+def run_m3f_old(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    b |> run("m3f_old_array_fold/{n}", n) {
+        let c = _old_fold(each(arr)._where(_.price > THRESHOLD).take(TAKE_N).count())
+        if (c == 0) {
+            b->failNow()
+        }
+    }
+}
+
+def run_m3f(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    b |> run("m3f_array_fold/{n}", n) {
+        let c = _fold(each(arr)._where(_.price > THRESHOLD).take(TAKE_N).count())
+        if (c == 0) {
+            b->failNow()
+        }
+    }
+}
+
+[benchmark]
+def take_count_filtered_m3(b : B?) {
+    run_m3(b, 100000)
+}
+
+[benchmark]
+def take_count_filtered_m3f_old(b : B?) {
+    run_m3f_old(b, 100000)
+}
+
+[benchmark]
+def take_count_filtered_m3f(b : B?) {
+    run_m3f(b, 100000)
+}
diff --git a/benchmarks/sql/take_sum_aggregate.das b/benchmarks/sql/take_sum_aggregate.das
new file mode 100644
index 000000000..518d76330
--- /dev/null
+++ b/benchmarks/sql/take_sum_aggregate.das
@@ -0,0 +1,56 @@
+options gen2
+options persistent_heap
+
+require _common public
+
+let TAKE_N = 1000
+
+// `_sql` rejects `take(n) |> sum()` (LIMIT-before-aggregate has no effect in SQLite — aggregate
+// collapses to one row regardless), so the m1 variant is omitted. m3/m3f_old/m3f bound the
+// projection-sum loop to the first TAKE_N matched elements (no upstream where here, so
+// "matched" == "every source element"). Exercises accumulator-lane take splice in _fold.
+
+def run_m3(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    b |> run("m3_array/{n}", n) {
+        let s = arr |> _select(_.price) |> take(TAKE_N) |> sum()
+        if (s == 0) {
+            b->failNow()
+        }
+    }
+}
+
+def run_m3f_old(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    b |> run("m3f_old_array_fold/{n}", n) {
+        let s = _old_fold(each(arr)._select(_.price).take(TAKE_N).sum())
+        if (s == 0) {
+            b->failNow()
+        }
+    }
+}
+
+def run_m3f(b : B?; n : int) {
+    let arr <- fixture_array(n)
+    b |> run("m3f_array_fold/{n}", n) {
+        let s = _fold(each(arr)._select(_.price).take(TAKE_N).sum())
+        if (s == 0) {
+            b->failNow()
+        }
+    }
+}
+
+[benchmark]
+def take_sum_aggregate_m3(b : B?) {
+    run_m3(b, 100000)
+}
+
+[benchmark]
+def take_sum_aggregate_m3f_old(b : B?) {
+    run_m3f_old(b, 100000)
+}
+
+[benchmark]
+def take_sum_aggregate_m3f(b : B?) {
+    run_m3f(b, 100000)
+}
diff --git a/daslib/ast_match.das b/daslib/ast_match.das
index 00c332028..95be772bb 100644
--- a/daslib/ast_match.das
+++ b/daslib/ast_match.das
@@ -508,7 +508,10 @@ def private qm_null_guard(at : LineInfo; actual_var : string) : Expression? {
 
 [macro_function]
 def private next_var(var index : int&) : string {
-    let name = "_qm_{index}"
+    // Generated locals for qmatch pattern-match bind sites. Leading `qm_` (no underscore
+    // prefix) sidesteps LINT004 — these vars are USED downstream in the emitted match
+    // arms, so the unused-prefix convention doesn't apply.
+    let name = "qm_{index}"
     index++
     return name
 }
@@ -749,7 +752,7 @@ def private generate_pattern_args_match(var pat_block : ExprBlock?; target_var :
 
                 // Get physical index at runtime (skipping fake args)
                 let phys_var = next_var(index)
-                body |> push <| qmacro_expr(${ var $i(phys_var) = qm_real_arg_index($i(target_var).arguments, $v(i)); })
+                body |> push <| qmacro_expr(${ let $i(phys_var) = qm_real_arg_index($i(target_var).arguments, $v(i)); })
 
                 // Name matching
                 if (!empty(arg_name)) {
@@ -1550,7 +1553,7 @@ def private generate_match(pattern : Expression?; actual_var : string; var body
                     let child_pattern = arg
                     if (child_pattern != null) {
                         let phys_var = next_var(index)
-                        body |> push <| qmacro_expr(${ var $i(phys_var) = qm_real_call_arg_index($i(cast_var).arguments, $v(i)); }) // nolint:LINT004
+                        body |> push <| qmacro_expr(${ let $i(phys_var) = qm_real_call_arg_index($i(cast_var).arguments, $v(i)); }) // nolint:LINT004
                         let child_var = next_var(index)
                         body |> push <| qmacro_expr(${ var $i(child_var) = clone_expression($i(cast_var).arguments[$i(phys_var)]); }) // nolint:LINT004
                         generate_match(child_pattern, child_var, body, index, at)
@@ -1793,7 +1796,7 @@ def private generate_block_match(var pat_block : ExprBlock?; actual_var : string
             // For simplicity: try block match first. If RTTI fails (not a block),
             // fall back to direct match. We use a runtime helper for this.
             let direct_var = next_var(index)
-            body |> push <| qmacro_expr(${ var $i(direct_var) = qm_rtti($i(actual_var)) != "ExprBlock" && qm_rtti($i(actual_var)) != "ExprMakeBlock"; })
+            body |> push <| qmacro_expr(${ let $i(direct_var) = qm_rtti($i(actual_var)) != "ExprBlock" && qm_rtti($i(actual_var)) != "ExprMakeBlock"; })
             // If it's not a block, match directly
             var direct_body : array<Expression?>
             generate_match(single_pat, actual_var, direct_body, index, at)
@@ -1842,7 +1845,7 @@ def private generate_block_match(var pat_block : ExprBlock?; actual_var : string
 
     // Get the total length of the actual block for bounds checking
     let len_var = next_var(index)
-    body |> push <| qmacro_expr(${ var $i(len_var) = length($i(cast_var).list); })
+    body |> push <| qmacro_expr(${ let $i(len_var) = length($i(cast_var).list); })
 
     // Walk through pattern statements
     // Track pending $b capture: save_var holds start position, b_vname holds user variable name
diff --git a/daslib/linq.das b/daslib/linq.das
index 935b192b3..7914da976 100644
--- a/daslib/linq.das
+++ b/daslib/linq.das
@@ -2743,11 +2743,16 @@ def private chunk_impl(var src; tt : auto(TT); size : int) : array<array<TT -con
     //! Splits an array into chunks of a specified size
     panic("chunk size must be greater than 0") if (size <= 0)
     var buffer : array<array<TT -const -&>>
+    static_if (typeinfo is_array(src) || typeinfo is_dim(src)) {
+        buffer |> reserve((length(src) + size - 1) / size)
+    }
     var inscope chunk : array<TT -const -&>
+    chunk |> reserve(size)
     for (it in src) {
         chunk.push_clone(it)
         if (chunk.length() == size) {
             buffer.emplace(chunk)
+            chunk |> reserve(size)
         }
     }
     if (!empty(chunk)) {
@@ -2890,6 +2895,9 @@ def zip_to_array(var a : iterator<auto(TT)>; var b : iterator<auto(UU)>) : array
 [unused_argument(tt, uu)]
 def private zip_impl(var a; tt : auto(TT); var b; uu : auto(UU); result_selector : block<(l : TT -&; r : UU -&) : auto>) : array<typedecl(result_selector(type<TT>, type<UU>)) -const -&> {
     var buffer : array<typedecl(result_selector(type<TT>, type<UU>)) -const -&>
+    static_if ((typeinfo is_array(a) || typeinfo is_dim(a)) && (typeinfo is_array(b) || typeinfo is_dim(b))) {
+        buffer |> reserve(min(length(a), length(b)))
+    }
     for (itA, itB in a, b) {
         buffer.push_clone(result_selector(itA, itB))
     }
diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das
index 662ffcff3..fa37b9fed 100644
--- a/daslib/linq_fold.das
+++ b/daslib/linq_fold.das
@@ -560,12 +560,30 @@ enum private LinqLane {
 [macro_function]
 def private classify_terminator(name : string) : LinqLane {
     if (name == "count") return LinqLane.COUNTER
-    if (name == "where_" || name == "select") return LinqLane.ARRAY
+    // take/skip as the trailing call (after to_array strip) → implicit array materialization,
+    // same lane as bare `[where_*][select*]` chains.
+    if (name == "where_" || name == "select" || name == "take" || name == "skip") return LinqLane.ARRAY
     if (name == "sum" || name == "min" || name == "max" || name == "average" || name == "long_count") return LinqLane.ACCUMULATOR
     if (name == "first" || name == "first_or_default" || name == "any" || name == "all" || name == "contains") return LinqLane.EARLY_EXIT
     return LinqLane.UNKNOWN
 }
 
+[macro_function]
+def private is_buffer_required_op(name : string) : bool {
+    // Operators that splice-mode planner can't handle today; each maps to a future emit mode.
+    // Recognized by name so the planner can `return null` (silent fallthrough) deliberately
+    // instead of falling out the unknown-op default. Future PRs replace each `return null` with
+    // a dedicated emit path (BufferTopN / BufferDistinct / BufferReverse / BufferGroupBy /
+    // MultiSourceZip / BufferedJoin) without re-walking the chain recognition.
+    return (name == "order_by" || name == "order" || name == "order_descending"
+        || name == "order_by_descending"
+        || name == "distinct" || name == "distinct_by"
+        || name == "reverse"
+        || name == "group_by" || name == "group_by_lazy"
+        || name == "zip"
+        || name == "join" || name == "left_join" || name == "group_join")
+}
+
 [macro_function]
 def private peel_each(var top : Expression?) : Expression? {
     // Unwrap `each(<arr>)` to `<arr>` when `<arr>` is a true array (or fixed-size array).
@@ -663,6 +681,67 @@ def private wrap_with_condition(var body : Expression?; var cond : Expression?)
     }
 }
 
+[macro_function]
+def private append_skip_take_prelude(var preludeStmts : array<Expression?>; var skipExpr : Expression?; var takeExpr : Expression?;
+                                     skipName, takeCountName : string) {
+    // Loop-level counters for skip/take. Both live in the outer invoke block, alongside any
+    // lane-specific accumulator decl. `skipRem` decrements per skipped match (initial value
+    // = the user's K). `takenCount` increments per yielded match; loop breaks when it equals
+    // the user's N. Pre-loop init only — emitted by the lane that builds `bodyStmts`.
+    if (skipExpr != null) {
+        var skipInit = clone_expression(skipExpr)
+        preludeStmts |> push <| qmacro_expr() {
+            var $i(skipName) = $e(skipInit)
+        }
+    }
+    if (takeExpr != null) {
+        preludeStmts |> push <| qmacro_expr() {
+            var $i(takeCountName) = 0
+        }
+    }
+}
+
+[macro_function]
+def private wrap_with_skip_take(var stmts : array<Expression?>; var skipExpr : Expression?; var takeExpr : Expression?;
+                                skipName, takeCountName : string) {
+    // Per-match-block wrapping. Order at the head of the block is:
+    //   1. take-limit `break` — fires before any projection runs (matches LINQ: `take(N)`
+    //      truncates upstream, so the (N+1)th match isn't pulled at all)
+    //   2. skip-counter `continue` — drops the first K matched elements
+    //   3. take-counter `++` — placed BEFORE the per-match work so early-exit terminators
+    //      (`return X` for first/first_or_default/any-no-pred) don't make it unreachable.
+    //      LINT001 would flag the trailing-increment placement.
+    //   4. existing per-match work
+    if (skipExpr == null && takeExpr == null) return
+    var prefixed : array<Expression?>
+    prefixed |> reserve(length(stmts) + 3)
+    if (takeExpr != null) {
+        var takeLimit = clone_expression(takeExpr)
+        prefixed |> push <| qmacro_expr() {
+            if ($i(takeCountName) == $e(takeLimit)) {
+                break
+            }
+        }
+    }
+    if (skipExpr != null) {
+        prefixed |> push <| qmacro_expr() {
+            if ($i(skipName) > 0) {
+                $i(skipName) --
+                continue
+            }
+        }
+    }
+    if (takeExpr != null) {
+        prefixed |> push <| qmacro_expr() {
+            $i(takeCountName) ++
+        }
+    }
+    for (s in stmts) {
+        prefixed |> push(s)
+    }
+    swap(stmts, prefixed)
+}
+
 [macro_function]
 def private min_max_compare(workhorse : bool; opName : string; valName, accName : string) : Expression? {
     // Workhorse types use direct `<` / `>` (single-instruction comparison) — this is the
@@ -676,43 +755,72 @@ def private min_max_compare(workhorse : bool; opName : string; valName, accName
 }
 
 [macro_function]
-def private emit_counter_lane(var top : Expression?; srcName, accName, itName : string; var loopBody : Expression?; at : LineInfo) : Expression? {
-    // Counter lane: `var acc = 0; for (it in src) { $loopBody }; return acc` inside invoke.
+def private emit_counter_lane(var top : Expression?; srcName, accName, itName, skipName, takeCountName : string;
+                              var skipExpr : Expression?; var takeExpr : Expression?;
+                              var loopBody : Expression?; at : LineInfo) : Expression? {
+    // Counter lane: `[skip/take init]; var acc = 0; for (it in src) { $loopBody }; return acc`
+    // inside invoke. Skip/take counters live alongside `acc` at block scope; the body has
+    // the guards/increment baked in by `wrap_with_skip_take` upstream.
     var topExpr = clone_expression(top)
     topExpr.genFlags.alwaysSafe = true
     var srcParamType = invoke_src_param_type(top)
-    var res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+    var bodyStmts : array<Expression?>
+    append_skip_take_prelude(bodyStmts, skipExpr, takeExpr, skipName, takeCountName)
+    bodyStmts |> push <| qmacro_expr() {
         var $i(accName) = 0
+    }
+    bodyStmts |> push <| qmacro_expr() {
         for ($i(itName) in $i(srcName)) {
             $e(loopBody)
         }
+    }
+    bodyStmts |> push <| qmacro_expr() {
         return $i(accName)
+    }
+    var res = qmacro(invoke($($i(srcName) : $t(srcParamType)) {
+        $b(bodyStmts)
     }, $e(topExpr)))
     return finalize_invoke(res, at)
 }
 
 [macro_function]
-def private emit_array_lane(var top : Expression?; var expr : Expression?; var loopBody : Expression?; var elementType : TypeDeclPtr; srcName, accName, itName : string; at : LineInfo) : Expression? {
-    // Array lane: `var acc : array<T>; [reserve]; for (it in src) { $loopBody }; return <- acc`
+def private emit_array_lane(var top : Expression?; var expr : Expression?; var loopBody : Expression?; var elementType : TypeDeclPtr;
+                            srcName, accName, itName, skipName, takeCountName : string;
+                            var skipExpr : Expression?; var takeExpr : Expression?;
+                            at : LineInfo) : Expression? {
+    // Array lane: `[skip/take init]; var acc : array<T>; [reserve]; for (it in src) { $loopBody }; return <- acc`
     // wrapped in invoke. Two orthogonal axes select the body shape:
     //   - whole-pipeline iterator-ness (`expr._type.isIterator`) — drives the `to_sequence_move`
     //     return path so an iterator pipeline still ends in an iterator,
     //   - source-length availability — drives the pre-reserve hint (only useful when source is
     //     array-shaped; iter sources can't reserve cheaply).
-    // Single-$b body shares scope: `var acc` decl + optional reserve + for-loop + return all
-    // live in one block.
+    // When `take(N)` is in play the reserve hint tightens to `min(N, length(src))` — at most
+    // N elements ever get pushed, so a length-of-source reserve over-allocates by far.
+    // Single-$b body shares scope: skip/take counters + `var acc` decl + optional reserve +
+    // for-loop + return all live in one block.
     let isIter = expr._type.isIterator
     let sourceHasLength = type_has_length(top._type)
     var topExpr = clone_expression(top)
     topExpr.genFlags.alwaysSafe = true
     var srcParamType = invoke_src_param_type(top)
     var bodyStmts : array<Expression?>
+    append_skip_take_prelude(bodyStmts, skipExpr, takeExpr, skipName, takeCountName)
     bodyStmts |> push <| qmacro_expr() {
         var $i(accName) : array<$t(elementType)>
     }
     if (sourceHasLength && !isIter) {
-        bodyStmts |> push <| qmacro_expr() {
-            $i(accName) |> reserve(length($i(srcName)))
+        if (takeExpr != null) {
+            // Inline `min` so the emission doesn't depend on `require math` at the call site.
+            // Reserve = min(takeN, length(src)): at most takeN elements ever get pushed.
+            var takeReserveA = clone_expression(takeExpr)
+            var takeReserveB = clone_expression(takeExpr)
+            bodyStmts |> push <| qmacro_expr() {
+                $i(accName) |> reserve($e(takeReserveA) < length($i(srcName)) ? $e(takeReserveB) : length($i(srcName)))
+            }
+        } else {
+            bodyStmts |> push <| qmacro_expr() {
+                $i(accName) |> reserve(length($i(srcName)))
+            }
         }
     }
     bodyStmts |> push <| qmacro_expr() {
@@ -743,7 +851,8 @@ def private emit_accumulator_lane(
                                   var whereCond : Expression?;
                                   var intermediateBinds : array<Expression?>;
                                   var elementType : TypeDeclPtr;
-                                  srcName, accName, itName : string;
+                                  srcName, accName, itName, skipName, takeCountName : string;
+                                  var skipExpr : Expression?; var takeExpr : Expression?;
                                   at : LineInfo
                                   ) : Expression? {
     // Ring 1 single-pass accumulator lane: sum / min / max / average / long_count.
@@ -840,13 +949,16 @@ def private emit_accumulator_lane(
         return null
     }
     prepend_binds(perMatchStmts, intermediateBinds)
+    wrap_with_skip_take(perMatchStmts, skipExpr, takeExpr, skipName, takeCountName)
     var loopBody = wrap_with_condition(stmts_to_expr(perMatchStmts), whereCond)
     // Collect all body statements into one list so they share scope when spliced via $b.
     // Splitting decls / for / return into separate splice tags would put each in its own
     // sub-block, hiding the accumulator from later statements (caught by AST dump under
-    // `options log_infer_passes`).
+    // `options log_infer_passes`). skip/take counter init goes first so the guards inside
+    // the perMatch block can name-resolve them.
     var bodyStmts : array<Expression?>
-    bodyStmts |> reserve(length(preludeStmts) + 2)
+    bodyStmts |> reserve(length(preludeStmts) + 4)
+    append_skip_take_prelude(bodyStmts, skipExpr, takeExpr, skipName, takeCountName)
     for (s in preludeStmts) {
         bodyStmts |> push(s)
     }
@@ -890,7 +1002,8 @@ def private emit_early_exit_lane(
                                  var intermediateBinds : array<Expression?>;
                                  var elementType : TypeDeclPtr;
                                  terminatorCall : ExprCall?;
-                                 srcName, itName : string;
+                                 srcName, itName, skipName, takeCountName : string;
+                                 var skipExpr : Expression?; var takeExpr : Expression?;
                                  at : LineInfo
                                  ) : Expression? {
     // Ring 2 early-exit lane: first / first_or_default / any / all / contains.
@@ -995,10 +1108,13 @@ def private emit_early_exit_lane(
         return null
     }
     prepend_binds(perMatchStmts, intermediateBinds)
+    wrap_with_skip_take(perMatchStmts, skipExpr, takeExpr, skipName, takeCountName)
     var loopBody = wrap_with_condition(stmts_to_expr(perMatchStmts), whereCond)
-    // Single-$b body so all stmts (prelude + for + tail) share scope under one wrapping block.
+    // Single-$b body so all stmts (skip/take counters + prelude + for + tail) share scope
+    // under one wrapping block.
     var bodyStmts : array<Expression?>
-    bodyStmts |> reserve(length(preludeStmts) + length(tailStmts) + 1)
+    bodyStmts |> reserve(length(preludeStmts) + length(tailStmts) + 3)
+    append_skip_take_prelude(bodyStmts, skipExpr, takeExpr, skipName, takeCountName)
     for (s in preludeStmts) {
         bodyStmts |> push(s)
     }
@@ -1021,20 +1137,25 @@ def private emit_early_exit_lane(
 
 [macro_function]
 def private plan_loop_or_count(var expr : Expression?) : Expression? {
-    // Phase-2B loop planner. Recognizes chains of shape `[where_*][select*]` plus a terminator,
-    // dispatched by `classify_terminator` into one of four lanes:
-    //   ARRAY        — `[where_*][select*]`                            → array<T> / iterator<T>
-    //   COUNTER      — `[where_*][select*] |> count`                   → int
-    //   ACCUMULATOR  — `[where_*][select*] |> {sum,min,max,average,long_count}` → typed scalar
-    //   EARLY_EXIT   — `[where_*][select*] |> {first,first_or_default,any,all,contains}`
-    // Fuses chained wheres into `&&` and chained selects via let-binding composition; emits one
-    // inline `invoke($block, $src)`. Returns null for shapes outside scope — caller falls through.
+    // Phase-2C loop planner. Recognizes chains of shape `[where_*][select*][skip?][take?]`
+    // plus a terminator, dispatched by `classify_terminator` into one of four lanes:
+    //   ARRAY        — `[where_*][select*][skip?][take?]` (or trailing take/skip → implicit
+    //                  to_array)                                  → array<T> / iterator<T>
+    //   COUNTER      — `[...] |> count`                           → int
+    //   ACCUMULATOR  — `[...] |> {sum,min,max,average,long_count}` → typed scalar
+    //   EARLY_EXIT   — `[...] |> {first,first_or_default,any,all,contains}`
+    // Fuses chained wheres into `&&`, chained selects via let-binding composition, and
+    // skip/take into bounded-loop counters; emits one inline `invoke($block, $src)`. Returns
+    // null for shapes outside scope — caller falls through. Buffer-required ops (`order_by`,
+    // `distinct`, `group_by`, `zip`, `join`, `reverse`) are recognized by name and return null
+    // with a future-mode marker; future PRs replace each with a dedicated emit path.
     var (top, calls) = flatten_linq(expr)
     if (empty(calls)) return null
     top = peel_each(top)
     let lastName = calls.back()._1.name
     let lane = classify_terminator(lastName)
-    // Phase 2B emits all four lanes; UNKNOWN terminators fall through unfolded.
+    // Marker: future PRs add BufferTopN / BufferDistinct / etc. for `is_buffer_required_op`
+    // names. Today: silent fallback.
     if (lane == LinqLane.UNKNOWN) return null
     let counterLane = lane == LinqLane.COUNTER
     let hasTerminator = lane != LinqLane.ARRAY
@@ -1043,10 +1164,16 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
     let srcName = "`source`{at.line}`{at.column}"
     let itName  = "`it`{at.line}`{at.column}"
     let accName = "`acc`{at.line}`{at.column}"
+    let skipName = "`skip`{at.line}`{at.column}"
+    let takeCountName = "`tc`{at.line}`{at.column}"
     var whereCond : Expression?
     var projection : Expression?
     var intermediateBinds : array<Expression?>
+    var skipExpr : Expression?
+    var takeExpr : Expression?
     var seenSelect = false
+    var seenSkip = false
+    var seenTake = false
     var allProjectionsPure = true
     var elementType = clone_type(top._type.firstType)
     var lastBindName = itName
@@ -1054,7 +1181,9 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
         var cll & = unsafe(calls[i])
         let opName = cll._1.name
         if (opName == "where_") {
-            if (seenSelect) return null    // where-after-select not in Phase 2A
+            // where-after-select / -after-skip / -after-take is rejected — canonical chain
+            // order is [where_*][select*][skip?][take?].
+            if (seenSelect || seenSkip || seenTake) return null
             var predicate = fold_linq_cond(cll._0.arguments[1], itName)
             if (whereCond == null) {
                 whereCond = predicate
@@ -1062,6 +1191,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
                 whereCond = qmacro($e(whereCond) && $e(predicate))
             }
         } elif (opName == "select") {
+            if (seenSkip || seenTake) return null
             // Chained selects: bind the previous projection to a fresh local now so the next
             // lambda's `_` can be renamed straight to that name — avoids the
             // ExprRef2Value-substitution trap that plain `Template.replaceVariable` hits when
@@ -1084,6 +1214,28 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
             projection = fold_linq_cond(cll._0.arguments[1], lastBindName)
             elementType = clone_type(cll._0._type.firstType)
             seenSelect = true
+        } elif (opName == "skip") {
+            // Canonical chain: at most one skip, before any take. Multiple skips / skip-after-
+            // take falls through (no semantic difficulty, just scope deferral). Range-form
+            // `skip(start..end)` is a slice (different operator semantics) — fall through.
+            if (seenSkip || seenTake) return null
+            var skipArg = cll._0.arguments[1]
+            if (skipArg == null || skipArg._type == null || skipArg._type.baseType != Type.tInt) return null
+            skipExpr = clone_expression(skipArg)
+            seenSkip = true
+        } elif (opName == "take") {
+            if (seenTake) return null
+            var takeArg = cll._0.arguments[1]
+            if (takeArg == null || takeArg._type == null || takeArg._type.baseType != Type.tInt) return null
+            takeExpr = clone_expression(takeArg)
+            seenTake = true
+        } elif (is_buffer_required_op(opName)) {     // nolint:LINT009
+            // TODO Phase 2X: BufferTopN (order_by + take/skip), BufferDistinct (distinct/_by),
+            // BufferReverse (reverse), BufferGroupBy (group_by/_lazy), MultiSourceZip (zip),
+            // BufferedJoin (join/left_join/group_join). Recognized but not yet emitted —
+            // future PRs replace each return-null with a dedicated emit path; for now the
+            // marker arm fall-throughs identically to the unrecognized-op default.
+            return null
         } else {
             return null
         }
@@ -1091,12 +1243,14 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
     if (projection != null && has_sideeffects(projection)) {
         allProjectionsPure = false
     }
+    let noLimits = skipExpr == null && takeExpr == null
     // Count-shaped shortcut: when terminator is `count` (→ int) or `long_count` (→ int64),
-    // there's no filter, and every projection is pure, the result is just the source length.
-    // Skip the loop entirely.
+    // there's no filter, no skip/take limits, and every projection is pure, the result is
+    // just the source length. Skip the loop entirely. take/skip would truncate the length so
+    // the shortcut can't fire there.
     let isCountShaped = (lane == LinqLane.COUNTER
         || (lane == LinqLane.ACCUMULATOR && lastName == "long_count"))
-    if (isCountShaped && whereCond == null && allProjectionsPure
+    if (isCountShaped && whereCond == null && allProjectionsPure && noLimits
             && type_has_length(top._type))
         return emit_length_shortcut(lastName, top, srcName, at)
     // Ring 1: accumulator lane builds its own per-op loop body (typed accumulator, optional
@@ -1104,21 +1258,24 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
     // COUNTER/ARRAY loopBody construction.
     if (lane == LinqLane.ACCUMULATOR)
         return emit_accumulator_lane(lastName, top, projection, whereCond,
-            intermediateBinds, elementType, srcName, accName, itName, at)
-    // Ring 2: early-exit lane — `any` no-pred + no upstream work + length-bearing source
-    // gets the empty-shortcut; everything else dispatches to the loop emitter.
+            intermediateBinds, elementType, srcName, accName, itName, skipName, takeCountName,
+            skipExpr, takeExpr, at)
+    // Ring 2: early-exit lane — `any` no-pred + no upstream work + no limits + length-bearing
+    // source gets the empty-shortcut; everything else dispatches to the loop emitter.
     if (lane == LinqLane.EARLY_EXIT) {
         let terminatorCall = calls.back()._0
         let isAnyNoPred = lastName == "any" && length(terminatorCall.arguments) == 1
-        if (isAnyNoPred && whereCond == null && allProjectionsPure
+        if (isAnyNoPred && whereCond == null && allProjectionsPure && noLimits
                 && type_has_length(top._type))
             return emit_any_empty_shortcut(top, srcName, at)
         return emit_early_exit_lane(lastName, top, projection, whereCond,
-            intermediateBinds, elementType, terminatorCall, srcName, itName, at)
+            intermediateBinds, elementType, terminatorCall, srcName, itName, skipName,
+            takeCountName, skipExpr, takeExpr, at)
     }
     // Build the per-element loop body for COUNTER / ARRAY. Both lanes follow the same shape:
     // collect per-element stmts (incl. side-effect-preserving projection binds), prepend
-    // chain `intermediateBinds`, collapse to a single expression, wrap with `whereCond`.
+    // chain `intermediateBinds`, wrap with skip/take guards, collapse to a single expression,
+    // wrap with `whereCond`.
     var loopBody : Expression?
     if (counterLane) {
         // Counter lane must evaluate the projection (and any chained intermediates) per
@@ -1138,6 +1295,7 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
             $i(accName) ++
         }
         prepend_binds(stmts, intermediateBinds)
+        wrap_with_skip_take(stmts, skipExpr, takeExpr, skipName, takeCountName)
         loopBody = wrap_with_condition(stmts_to_expr(stmts), whereCond)
     } else {
         // Array lane. `push_clone` is the safe append everywhere: for workhorse types it's a
@@ -1151,8 +1309,10 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
             stmts |> push <| qmacro_expr() {
                 $i(accName) |> push_clone($e(projection))
             }
-        } elif (whereCond != null) {
-            // Identity case (no projection): `it` aliases the source element.
+        } elif (whereCond != null || skipExpr != null || takeExpr != null) {
+            // Identity push: `it` aliases the source element. Reached when chain is bare
+            // [where_*][skip?][take?] (no select) — every match (post-filter, post-skip,
+            // pre-take) goes into the result array as-is.
             stmts |> push <| qmacro_expr() {
                 $i(accName) |> push_clone($i(itName))
             }
@@ -1161,12 +1321,15 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
             return null
         }
         prepend_binds(stmts, intermediateBinds)
+        wrap_with_skip_take(stmts, skipExpr, takeExpr, skipName, takeCountName)
         loopBody = wrap_with_condition(stmts_to_expr(stmts), whereCond)
     }
     if (counterLane) {
-        return emit_counter_lane(top, srcName, accName, itName, loopBody, at)
+        return emit_counter_lane(top, srcName, accName, itName, skipName, takeCountName,
+            skipExpr, takeExpr, loopBody, at)
     } else {
-        return emit_array_lane(top, expr, loopBody, elementType, srcName, accName, itName, at)
+        return emit_array_lane(top, expr, loopBody, elementType, srcName, accName, itName,
+            skipName, takeCountName, skipExpr, takeExpr, at)
     }
 }
 
diff --git a/tests/linq/test_linq_fold.das b/tests/linq/test_linq_fold.das
index e211d1752..2e8a1293e 100644
--- a/tests/linq/test_linq_fold.das
+++ b/tests/linq/test_linq_fold.das
@@ -188,7 +188,7 @@ def test_generic_fold_order(t : T?) {
             ._fold()
         )
         t |> equal(typeinfo typename(t7), "iterator<string>")
-        var sorted = ["kiwi", "apple", "banana", "cherry", "blueberry"]
+        var sorted = ["kiwi", "apple", "banana", "cherry", "blueberry"]    // nolint:LINT003
         for (i, v in 0..5, t7) {
             t |> equal(sorted[i], v)
         }
@@ -199,7 +199,7 @@ def test_generic_fold_order(t : T?) {
             ._fold()
         )
         t |> equal(typeinfo typename(t8), "array<string>")
-        var sorted = ["blueberry", "banana", "cherry", "apple", "kiwi"]
+        var sorted = ["blueberry", "banana", "cherry", "apple", "kiwi"]    // nolint:LINT003
         for (i, v in 0..5, t8) {
             t |> equal(sorted[i], v)
         }
@@ -209,7 +209,7 @@ def test_generic_fold_order(t : T?) {
 [test]
 def test_generic_fold_aggregates(t : T?) {
     t |> run("min fold") @(t : T?) {
-        var t1 = ([5, 3, 8, 1, 4]
+        var t1 = ([5, 3, 8, 1, 4]    // nolint:LINT003 — `var` required for the `typename(t1) == "int"` assertion below (let adds const)
             .to_sequence()
             .min()
             ._fold()
@@ -218,7 +218,7 @@ def test_generic_fold_aggregates(t : T?) {
         t |> equal(1, t1)
     }
     t |> run("min_by fold") @(t : T?) {
-        var t2 = (["apple", "banana", "kiwi", "cherry", "blueberry"]
+        var t2 = (["apple", "banana", "kiwi", "cherry", "blueberry"]    // nolint:LINT003
             .to_sequence()
             ._min_by(length(_))
             ._fold()
@@ -227,7 +227,7 @@ def test_generic_fold_aggregates(t : T?) {
         t |> equal("kiwi", t2)
     }
     t |> run("max fold") @(t : T?) {
-        var t2 = ([5, 3, 8, 1, 4]
+        var t2 = ([5, 3, 8, 1, 4]    // nolint:LINT003
             .to_sequence()
             .max()
             ._fold()
@@ -236,7 +236,7 @@ def test_generic_fold_aggregates(t : T?) {
         t |> equal(8, t2)
     }
     t |> run("max_by fold") @(t : T?) {
-        var t2 = (["apple", "banana", "kiwi", "cherry", "blueberry"]
+        var t2 = (["apple", "banana", "kiwi", "cherry", "blueberry"]    // nolint:LINT003
             .to_sequence()
             ._max_by(length(_))
             ._fold()
@@ -267,7 +267,7 @@ def test_generic_fold_aggregates(t : T?) {
         t |> equal("blueberry", tmax)
     }
     t |> run("average fold") @(t : T?) {
-        var t2 = ([1, 2, 3, 4, 5, 6, 7, 8]
+        var t2 = ([1, 2, 3, 4, 5, 6, 7, 8]    // nolint:LINT003
             .to_sequence()
             .average()
             ._fold()
@@ -289,7 +289,7 @@ def test_generic_fold_aggregates(t : T?) {
         t |> equal(4, tavg)
     }
     t |> run("sum fold") @(t : T?) {
-        var t2 = ([1, 2, 3, 4, 5, 6, 7, 8]
+        var t2 = ([1, 2, 3, 4, 5, 6, 7, 8]    // nolint:LINT003
             .to_sequence()
             .sum()
             ._fold()
@@ -298,7 +298,7 @@ def test_generic_fold_aggregates(t : T?) {
         t |> equal(36, t2)
     }
     t |> run("aggregate fold") @(t : T?) {
-        var t2 = ([1, 2, 3, 4, 5, 6, 7, 8]
+        var t2 = ([1, 2, 3, 4, 5, 6, 7, 8]    // nolint:LINT003
             .to_sequence()
             .aggregate("", $(a, b) => "{a},{b}")
             ._fold()
@@ -311,7 +311,7 @@ def test_generic_fold_aggregates(t : T?) {
 [test]
 def test_generic_fold_skip_and_take(t : T?) {
     t |> run("skip fold") @(t : T?) {
-        var t1 = ([5, 3, 8, 1, 4]
+        var t1 = ([5, 3, 8, 1, 4]    // nolint:LINT003
             .to_sequence()
             .skip(2)
             .min()
@@ -321,7 +321,7 @@ def test_generic_fold_skip_and_take(t : T?) {
         t |> equal(1, t1)
     }
     t |> run("skip_while fold") @(t : T?) {
-        var t2 = ([5, 3, 8, 1, 4]
+        var t2 = ([5, 3, 8, 1, 4]    // nolint:LINT003
             .to_sequence()
             ._skip_while(_ > 3)
             .min()
@@ -331,7 +331,7 @@ def test_generic_fold_skip_and_take(t : T?) {
         t |> equal(1, t2)
     }
     t |> run("take fold") @(t : T?) {
-        var t3 = ([5, 3, 8, 1, 4]
+        var t3 = ([5, 3, 8, 1, 4]    // nolint:LINT003
             .to_sequence()
             .take(3)
             .max()
@@ -341,7 +341,7 @@ def test_generic_fold_skip_and_take(t : T?) {
         t |> equal(8, t3)
     }
     t |> run("take_while fold") @(t : T?) {
-        var t4 = ([5, 3, 8, 1, 4]
+        var t4 = ([5, 3, 8, 1, 4]    // nolint:LINT003
             .to_sequence()
             ._take_while(_ < 8)
             .max()
@@ -351,7 +351,7 @@ def test_generic_fold_skip_and_take(t : T?) {
         t |> equal(5, t4)
     }
     t |> run("take range fold") @(t : T?) {
-        var t5 = ([5, 3, 8, 1, 4]
+        var t5 = ([5, 3, 8, 1, 4]    // nolint:LINT003
             .to_sequence()
             .take(1..4)
             .max()
@@ -381,7 +381,7 @@ def test_concat(t : T?) {
         )
         t |> equal(typeinfo typename(q), "array<int>")
         t |> equal(length(q), 6)
-        var expected = [6, 4, 2, 30, 27, 24]
+        var expected = [6, 4, 2, 30, 27, 24]    // nolint:LINT003
         for (i, v in 0..6, q) {
             t |> equal(expected[i], v)
         }
@@ -400,7 +400,7 @@ def test_concat(t : T?) {
         )
         t |> equal(typeinfo typename(q), "array<int>")
         t |> equal(length(q), 4)
-        var expected = [6, 4, 2, 100]
+        var expected = [6, 4, 2, 100]    // nolint:LINT003
         for (i, v in 0..4, q) {
             t |> equal(expected[i], v)
         }
@@ -419,7 +419,7 @@ def test_concat(t : T?) {
         )
         t |> equal(typeinfo typename(q), "array<int>")
         t |> equal(length(q), 4)
-        var expected = [100, 6, 4, 2]
+        var expected = [100, 6, 4, 2]    // nolint:LINT003
         for (i, v in 0..4, q) {
             t |> equal(expected[i], v)
         }
@@ -429,7 +429,7 @@ def test_concat(t : T?) {
 [test]
 def test_any_all_contains(t : T?) {
     t |> run("fold any") @(t : T?) {
-        var query = (
+        var query = (    // nolint:LINT003
             [for (x in 0..5); x]
             ._any(_ > 3)
             ._fold()
@@ -437,7 +437,7 @@ def test_any_all_contains(t : T?) {
         t |> success(query)
     }
     t |> run("fold any false") @(t : T?) {
-        var qpred2 = (
+        var qpred2 = (    // nolint:LINT003
             [iterator for(x in 0..5); x]
             ._any(_ > 10)
             ._fold()
@@ -445,7 +445,7 @@ def test_any_all_contains(t : T?) {
         t |> success(!qpred2)
     }
     t |> run("fold all") @(t : T?) {
-        var query = (
+        var query = (    // nolint:LINT003
             [iterator for(x in 0..5); x]
             ._all(_ < 5)
             ._fold()
@@ -453,7 +453,7 @@ def test_any_all_contains(t : T?) {
         t |> success(query)
     }
     t |> run("fold all empty") @(t : T?) {
-        var qempty = (
+        var qempty = (    // nolint:LINT003
             empty(type<int>)
             ._all(_ < 5)
             ._fold()
@@ -461,7 +461,7 @@ def test_any_all_contains(t : T?) {
         t |> success(qempty)
     }
     t |> run("fold contains") @(t : T?) {
-        var query = (
+        var query = (    // nolint:LINT003
             [iterator for(x in 0..5); x]
             .contains(3)
             ._fold()
@@ -469,7 +469,7 @@ def test_any_all_contains(t : T?) {
         t |> success(query)
     }
     t |> run("fold complex contains") @(t : T?) {
-        var qcomplex = (
+        var qcomplex = (    // nolint:LINT003
             [iterator for(x in 0..5); ComplexType(a = [x, x * 10])]
             .contains(ComplexType(a = [3, 30]))
             ._fold()
@@ -477,7 +477,7 @@ def test_any_all_contains(t : T?) {
         t |> success(qcomplex)
     }
     t |> run("fold complex contains false") @(t : T?) {
-        var qcomplex = (
+        var qcomplex = (    // nolint:LINT003
             [iterator for(x in 0..5); ComplexType(a = [x, x * 10])]
             .contains(ComplexType(a = [3, 31]))
             ._fold()
@@ -534,7 +534,7 @@ def test_unique(t : T?) {
             ._order_by(_) // sort
             ._fold()
         )
-        var expected = [0, 1, 2, 3, 9, 16, 25, 36, 49]
+        var expected = [0, 1, 2, 3, 9, 16, 25, 36, 49]    // nolint:LINT003
         t |> equal(length(t1), length(expected))
         for (i, v in 0..length(expected), t1) {
             t |> equal(expected[i], v)
@@ -552,7 +552,7 @@ def test_unique(t : T?) {
             ._order_by(_) // sort
             ._fold()
         )
-        var expected = [0, 1, 2, 3]
+        var expected = [0, 1, 2, 3]    // nolint:LINT003
         t |> equal(length(t1), length(expected))
         for (i, v in 0..length(expected), t1) {
             t |> equal(expected[i], v)
@@ -574,7 +574,7 @@ def test_except(t : T?) {
             .distinct()
             ._fold()
         )
-        var expected = [0, 1, 2, 3]
+        var expected = [0, 1, 2, 3]    // nolint:LINT003
         t |> equal(length(t1), length(expected))
         for (i, v in 0..length(expected), t1) {
             t |> equal(expected[i], v)
@@ -593,7 +593,7 @@ def test_except(t : T?) {
             .distinct()
             ._fold()
         )
-        var expected = [2, 3]
+        var expected = [2, 3]    // nolint:LINT003
         t |> equal(length(t1), length(expected))
         for (i, v in 0..length(expected), t1) {
             t |> equal(expected[i], v)
@@ -614,7 +614,7 @@ def test_intersect(t : T?) {
             ._order_by(_) // sort
             ._fold()
         )
-        var expected = [3, 4, 5]
+        var expected = [3, 4, 5]    // nolint:LINT003
         t |> equal(length(t1), length(expected))
         for (i, v in 0..length(expected), t1) {
             t |> equal(expected[i], v)
@@ -632,7 +632,7 @@ def test_intersect(t : T?) {
             ._order_by(_) // sort
             ._fold()
         )
-        var expected = [3, 4, 5, 6]
+        var expected = [3, 4, 5, 6]    // nolint:LINT003
         t |> equal(length(t1), length(expected))
         for (i, v in 0..length(expected), t1) {
             t |> equal(expected[i], v)
@@ -700,7 +700,7 @@ def test_zip(t : T?) {
 [test]
 def test_order_distinct(t : T?) {
     t |> run("basic order distinct") @(t : T?) {
-        var t7 = (
+        var t7 = (    // nolint:LINT003
             [5, 4, 4, 3, 4, 2, 1, 1, 4, 3]
             ._select(_ * 10)
             .order()
@@ -712,7 +712,7 @@ def test_order_distinct(t : T?) {
         t |> equal(10, t7)
     }
     t |> run("first position order distinct") @(t : T?) {
-        var t7 = (
+        var t7 = (    // nolint:LINT003
             [5, 4, 4, 3, 4, 2, 1, 1, 4, 3]
             .order()
             .distinct()
@@ -1073,3 +1073,175 @@ def test_contains_early_exit(t : T?) {
         t |> equal(true, r)
     }
 }
+
+[test]
+def test_take_skip_counter_lane(t : T?) {
+    t |> run("counter: where.take.count") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        // Even values: 2,4,6,8,10 — take 3 → 3.
+        let r = _fold(each(arr)._where(_ % 2 == 0).take(3).count())
+        t |> equal(3, r)
+    }
+    t |> run("counter: where.skip.count") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        // Even values: 2,4,6,8,10. skip 2 → 6,8,10 → 3.
+        let r = _fold(each(arr)._where(_ % 2 == 0).skip(2).count())
+        t |> equal(3, r)
+    }
+    t |> run("counter: where.skip.take.count") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        // Even values: 2,4,6,8,10. skip 1 take 3 → 4,6,8 → 3.
+        let r = _fold(each(arr)._where(_ % 2 == 0).skip(1).take(3).count())
+        t |> equal(3, r)
+    }
+    t |> run("counter: take(0).count → 0") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr).take(0).count())
+        t |> equal(0, r)
+    }
+    t |> run("counter: take(huge).count → all source") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let r = _fold(each(arr).take(1000).count())
+        t |> equal(3, r)
+    }
+    t |> run("counter: skip(huge).count → 0") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let r = _fold(each(arr).skip(1000).count())
+        t |> equal(0, r)
+    }
+}
+
+[test]
+def test_take_skip_accumulator_lane(t : T?) {
+    t |> run("accumulator: select.take.sum") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        // Projected: 10,20,30,40,50. take 3 → 10+20+30 = 60.
+        let r = _fold(each(arr)._select(_ * 10).take(3).sum())
+        t |> equal(60, r)
+    }
+    t |> run("accumulator: select.skip.take.average") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8]
+        // Projected (* 10): 10,20,30,40,50,60,70,80. skip 2 take 4 → 30+40+50+60=180 / 4 = 45.
+        let r = _fold(each(arr)._select(_ * 10).skip(2).take(4).average())
+        t |> equal(45.0lf, r)
+    }
+    t |> run("accumulator: take.min") @(t : T?) {
+        let arr <- [5, 3, 8, 1, 4]
+        // take 3 → 5,3,8 → min=3.
+        let r = _fold(each(arr).take(3).min())
+        t |> equal(3, r)
+    }
+    t |> run("accumulator: where.take.long_count → int64") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        // odd values: 1,3,5,7,9. take 3 → 3.
+        let r = _fold(each(arr)._where(_ % 2 == 1).take(3).long_count())
+        t |> equal(3l, r)
+    }
+    t |> run("accumulator: take(0).sum → 0") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr).take(0).sum())
+        t |> equal(0, r)
+    }
+}
+
+[test]
+def test_take_skip_early_exit_lane(t : T?) {
+    t |> run("early-exit: take.first") @(t : T?) {
+        let arr <- [5, 3, 8, 1, 4]
+        // take 3 → 5,3,8. first → 5.
+        let r = _fold(each(arr).take(3).first())
+        t |> equal(5, r)
+    }
+    t |> run("early-exit: where.take.any → true") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        // even values: 2,4. take 5 → 2,4. any → true.
+        let r = _fold(each(arr)._where(_ % 2 == 0).take(5).any())
+        t |> equal(true, r)
+    }
+    t |> run("early-exit: where.skip.take.first → 6") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        // even: 2,4,6,8,10. skip 2 take 3 → 6,8,10. first → 6.
+        let r = _fold(each(arr)._where(_ % 2 == 0).skip(2).take(3).first())
+        t |> equal(6, r)
+    }
+    t |> run("early-exit: take.all(odd) → false (even hits within take limit)") @(t : T?) {
+        let arr <- [1, 3, 5, 2, 7]
+        // take 5 → all five. all(odd) → false (2 fails).
+        let r = _fold(each(arr).take(5).all($(x : int) => x % 2 == 1))
+        t |> equal(false, r)
+    }
+    t |> run("early-exit: take(3).all(odd) → true (even excluded by take)") @(t : T?) {
+        let arr <- [1, 3, 5, 2, 7]
+        // take 3 → 1,3,5. all(odd) → true.
+        let r = _fold(each(arr).take(3).all($(x : int) => x % 2 == 1))
+        t |> equal(true, r)
+    }
+    t |> run("early-exit: take(0).any → false (empty)") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let r = _fold(each(arr).take(0).any())
+        t |> equal(false, r)
+    }
+    t |> run("early-exit: take(0).all(p) → true (vacuous)") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let r = _fold(each(arr).take(0).all($(x : int) => x > 100))
+        t |> equal(true, r)
+    }
+    t |> run("early-exit: take.first_or_default") @(t : T?) {
+        let arr : array<int>
+        let r = _fold(each(arr).take(5).first_or_default(-1))
+        t |> equal(-1, r)
+    }
+    t |> run("early-exit: take.contains") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        // take 3 → 1,2,3. contains(4) → false (excluded by take).
+        let r = _fold(each(arr).take(3).contains(4))
+        t |> equal(false, r)
+    }
+}
+
+[test]
+def test_take_skip_array_lane(t : T?) {
+    t |> run("array: take.to_array") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r <- _fold(each(arr).take(3).to_array())
+        t |> equal(3, length(r))
+        t |> equal(1, r[0])
+        t |> equal(2, r[1])
+        t |> equal(3, r[2])
+    }
+    t |> run("array: skip.take.to_array") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7]
+        let r <- _fold(each(arr).skip(2).take(3).to_array())
+        t |> equal(3, length(r))
+        t |> equal(3, r[0])
+        t |> equal(4, r[1])
+        t |> equal(5, r[2])
+    }
+    t |> run("array: where.skip.take.to_array") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
+        // even: 2,4,6,8,10. skip 1 take 3 → 4,6,8.
+        let r <- _fold(each(arr)._where(_ % 2 == 0).skip(1).take(3).to_array())
+        t |> equal(3, length(r))
+        t |> equal(4, r[0])
+        t |> equal(6, r[1])
+        t |> equal(8, r[2])
+    }
+    t |> run("array: where.select.take.to_array") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5, 6, 7]
+        // _ > 2 → 3,4,5,6,7. * 2 → 6,8,10,12,14. take 2 → 6,8.
+        let r <- _fold(each(arr)._where(_ > 2)._select(_ * 2).take(2).to_array())
+        t |> equal(2, length(r))
+        t |> equal(6, r[0])
+        t |> equal(8, r[1])
+    }
+    t |> run("array: take(0).to_array → empty") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let r <- _fold(each(arr).take(0).to_array())
+        t |> equal(0, length(r))
+    }
+    t |> run("array: skip(huge).to_array → empty") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let r <- _fold(each(arr).skip(100).to_array())
+        t |> equal(0, length(r))
+    }
+}
diff --git a/tests/linq/test_linq_fold_ast.das b/tests/linq/test_linq_fold_ast.das
index f0c6c5945..7454d3a77 100644
--- a/tests/linq/test_linq_fold_ast.das
+++ b/tests/linq/test_linq_fold_ast.das
@@ -110,7 +110,7 @@ def test_where_old_fold_produces_comprehension(t : T?) {
         // fold_where output: invoke($(var source) .. var pass_0 <- COMP; return <- pass_0 .., src)
         var comp_expr : ExpressionPtr
         var source_expr : ExpressionPtr
-        let r = qmatch_function(func) $() {
+        let r = qmatch_function(func) $() { // nolint:STYLE016
             return <- invoke($(var source : array<int>) {
                 var pass_0 : array<int> <- $e(comp_expr)
                 return <- pass_0
@@ -135,7 +135,7 @@ def test_where_old_fold_comprehension_pattern(t : T?) {
         // Match the full structure including comprehension pattern
         var where_cond : ExpressionPtr
         var source_expr : ExpressionPtr
-        let r = qmatch_function(func) $() {
+        let r = qmatch_function(func) $() { // nolint:STYLE016
             return <- invoke($(var source : array<int>) {
                 var pass_0 : array<int> <- [for (it in source); it; where $e(where_cond)]
                 return <- pass_0
@@ -153,7 +153,7 @@ def test_select_old_fold_produces_comprehension(t : T?) {
         if (func == null) return
         var comp_expr : ExpressionPtr
         var source_expr : ExpressionPtr
-        let r = qmatch_function(func) $() {
+        let r = qmatch_function(func) $() { // nolint:STYLE016
             return <- invoke($(var source : array<int>) {
                 var pass_0 : array<int> <- $e(comp_expr)
                 return <- pass_0
@@ -176,7 +176,7 @@ def test_select_old_fold_comprehension_pattern(t : T?) {
         if (func == null) return
         var select_expr : ExpressionPtr
         var source_expr : ExpressionPtr
-        let r = qmatch_function(func) $() {
+        let r = qmatch_function(func) $() { // nolint:STYLE016
             return <- invoke($(var source : array<int>) {
                 var pass_0 : array<int> <- [for (it in source); $e(select_expr)]
                 return <- pass_0
@@ -199,7 +199,7 @@ def test_where_select_old_fold_comprehension(t : T?) {
         var select_expr : ExpressionPtr
         var where_cond : ExpressionPtr
         var source_expr : ExpressionPtr
-        let r = qmatch_function(func) $() {
+        let r = qmatch_function(func) $() { // nolint:STYLE016
             return <- invoke($(var source : array<int>) {
                 var pass_0 : array<int> <- [for (it in source); $e(select_expr); where $e(where_cond)]
                 return <- pass_0
@@ -225,7 +225,7 @@ def test_select_where_old_fold_structure(t : T?) {
         // It is NOT a simple comprehension - verify the fold still happened
         var inner_expr : ExpressionPtr
         var source_expr : ExpressionPtr
-        let r = qmatch_function(func) $() {
+        let r = qmatch_function(func) $() { // nolint:STYLE016
             return <- invoke($(var source : array<int>) {
                 var pass_0 : array<int> <- $e(inner_expr)
                 return <- pass_0
@@ -1348,3 +1348,157 @@ def test_early_exit_falls_through_on_select_where(t : T?) {
     }
 }
 
+// ── Phase 2C take/skip splice ──────────────────────────────────────────
+
+[export, marker(no_coverage)]
+def target_where_take_count_fold() : int {
+    return _fold(each([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])._where(_ % 2 == 0).take(3).count())
+}
+
+[export, marker(no_coverage)]
+def target_skip_take_to_array_fold() : array<int> {
+    return <- _fold(each([1, 2, 3, 4, 5, 6, 7]).skip(2).take(3).to_array())
+}
+
+[export, marker(no_coverage)]
+def target_order_by_take_falls_through() : array<int> {
+    // Buffer-required (order_by) → splice planner returns null → silent fallback to raw linq.
+    return <- _fold([1, 2, 3, 4, 5].to_sequence()._select(_ * 2)._order_by(_).take(2).to_array())
+}
+
+[export, marker(no_coverage)]
+def target_distinct_falls_through() : array<int> {
+    // distinct is buffer-required — splice planner returns null → silent fallback.
+    return <- _fold([1, 2, 2, 3, 3, 3].to_sequence().distinct().to_array())
+}
+
+[export, marker(no_coverage)]
+def target_take_sum_fold() : int {
+    return _fold([1, 2, 3, 4, 5]._select(_ * 10).take(3).sum())
+}
+
+// AST helper — total break/continue count anywhere in the subtree. Used to assert that the
+// take-limit `break` and skip-counter `continue` actually got spliced into the per-element body.
+def count_break_continue(expr : Expression?; want_break : bool) : int {
+    if (expr == null) return 0
+    var n = 0
+    if (want_break && expr is ExprBreak) {
+        n ++
+    } elif (!want_break && expr is ExprContinue) {
+        n ++
+    }
+    if (expr is ExprBlock) {
+        let b = expr as ExprBlock
+        for (s in b.list) {
+            n += count_break_continue(s, want_break)
+        }
+        for (s in b.finalList) {
+            n += count_break_continue(s, want_break)
+        }
+    } elif (expr is ExprFor) {
+        let f = expr as ExprFor
+        for (s in f.sources) {
+            n += count_break_continue(s, want_break)
+        }
+        n += count_break_continue(f.body, want_break)
+    } elif (expr is ExprIfThenElse) {
+        let i = expr as ExprIfThenElse
+        n += count_break_continue(i.cond, want_break)
+        n += count_break_continue(i.if_true, want_break)
+        n += count_break_continue(i.if_false, want_break)
+    } elif (expr is ExprMakeBlock) {
+        let mb = expr as ExprMakeBlock
+        n += count_break_continue(mb._block, want_break)
+    } elif (expr is ExprInvoke) {
+        let inv = expr as ExprInvoke
+        for (a in inv.arguments) {
+            n += count_break_continue(a, want_break)
+        }
+    }
+    return n
+}
+
+[test]
+def test_take_splices_break_into_loop(t : T?) {
+    // where.take.count: counter lane with take guard → one for-loop + at least one break
+    // (the `if (taken == N) break` guard).
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_where_take_count_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper for splice")
+        t |> equal(1, count_inner_for_loops(body_expr), "single fused for-loop")
+        t |> success(count_break_continue(body_expr, true) >= 1, "take guard must emit a break")
+    }
+}
+
+[test]
+def test_skip_take_splices_continue_and_break(t : T?) {
+    // skip.take.to_array: array lane with both guards → continue (skip) + break (take).
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_skip_take_to_array_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        t |> equal(1, count_inner_for_loops(body_expr), "single fused for-loop")
+        t |> success(count_break_continue(body_expr, true) >= 1, "take guard must emit a break")
+        t |> success(count_break_continue(body_expr, false) >= 1, "skip guard must emit a continue")
+    }
+}
+
+[test]
+def test_take_sum_splices_in_accumulator(t : T?) {
+    // select.take.sum: accumulator lane with take → one for-loop + acc += ... + break.
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_take_sum_fold)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched && body_expr is ExprInvoke, "expected invoke wrapper")
+        t |> equal(1, count_inner_for_loops(body_expr), "single fused for-loop")
+        t |> success(count_op2(body_expr, "+=") >= 1, "sum must emit `+=` in accumulator")
+        t |> success(count_break_continue(body_expr, true) >= 1, "take guard must emit a break")
+    }
+}
+
+[test]
+def test_order_by_take_falls_through(t : T?) {
+    // order_by + take is BufferTopN territory — recognized as buffer-required, planner returns
+    // null, falls through to plain linq. Future PR replaces with a dedicated emit path.
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_order_by_take_falls_through)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched, "should have return expression")
+        t |> success(!(body_expr is ExprInvoke),
+            "order_by chain should fall through unfolded — splice planner doesn't handle buffer mode yet")
+    }
+}
+
+[test]
+def test_distinct_falls_through(t : T?) {
+    // distinct is buffer-required (hash set) — fall-through marker.
+    ast_gc_guard() {
+        var func = find_module_function_via_rtti(compiling_module(), @@target_distinct_falls_through)
+        if (func == null) return
+        var body_expr : ExpressionPtr
+        let r = qmatch_function(func) $() {
+            return $e(body_expr)
+        }
+        t |> success(r.matched, "should have return expression")
+        t |> success(!(body_expr is ExprInvoke),
+            "distinct chain should fall through unfolded — BufferDistinct mode not yet implemented")
+    }
+}
+

From 10e6cdca1182681c538b24863ec3d00a3583bbd8 Mon Sep 17 00:00:00 2001
From: Boris Batkin <bbatkin@gmail.com>
Date: Sat, 16 May 2026 22:12:42 -0700
Subject: [PATCH 07/14] ast_match: switch qmatch gensym from qm_{n} to qm`{n}
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Backtick is the established compiler-generated-name marker in daslib
(linq_fold gensyms, etc.) and lint.das:152 already skips backtick-bearing
names. Eliminates collision risk with any user identifier — `qm_5` is a
valid user identifier; `qm\`5` is not.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 daslib/ast_match.das | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/daslib/ast_match.das b/daslib/ast_match.das
index 95be772bb..b8fb90143 100644
--- a/daslib/ast_match.das
+++ b/daslib/ast_match.das
@@ -508,10 +508,10 @@ def private qm_null_guard(at : LineInfo; actual_var : string) : Expression? {
 
 [macro_function]
 def private next_var(var index : int&) : string {
-    // Generated locals for qmatch pattern-match bind sites. Leading `qm_` (no underscore
-    // prefix) sidesteps LINT004 — these vars are USED downstream in the emitted match
-    // arms, so the unused-prefix convention doesn't apply.
-    let name = "qm_{index}"
+    // Generated locals for qmatch pattern-match bind sites. Backtick marks them as
+    // compiler-generated — no collision with user identifiers, and lint.das:152 skips
+    // backtick-bearing names from LINT004's leading-underscore-used check.
+    let name = "qm`{index}"
     index++
     return name
 }

From 088d80ed062294323fb057c0fcd88c00b2dacbdd Mon Sep 17 00:00:00 2001
From: Boris Batkin <bbatkin@gmail.com>
Date: Sat, 16 May 2026 22:17:05 -0700
Subject: [PATCH 08/14] linq_fold Phase 2C Ring 4: chained-select clone via :=
 (drop prevWorkhorse guard)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The planner rejected any chained `_select|_select|...` whose previous
projection had a non-workhorse type — the `prevWorkhorse=false → return null`
guard at the `select` arm in plan_loop_or_count. Stated reason: `<-` (move)
corrupts source for lvalue projections like `_._field`. The guard was a
Phase 2B placeholder.

Correctness observation: `:=` is safe on every type — byte copy on workhorse,
deep-clone on non-workhorse — so a single emission shape (`var $i(bind) := ...`)
covers both cases. The workhorse/non-workhorse branch in chained-bind emission
is removed; chained selects of any type now splice through one path.

Audit result — two workhorse-type branches remain in linq_fold.das, both
intentional:

  1. fold_select_where (line ~392, `static_if (typeinfo is_workhorse(...))`)
     — used exclusively by `_old_fold`'s frozen baseline path via g_foldSeq.
     Changing it would alter the frozen baseline output and invalidate the
     m3f_old benchmark column.

  2. min_max_compare (line ~746) + caller (line ~933)
     — perf-critical. Workhorse types use `<` / `>` directly
     (single-instruction compare); non-workhorse falls back to `_::less`
     so user/tuple comparator overloads still apply. Boris's design
     directive 2026-05-16 mandates keeping this branch.

No further workhorse branches in the splice path.

New test: tests/linq/test_linq_fold.das::test_chained_non_workhorse_select
covers three chain shapes that previously fell through:

  - int → ComplexType → int → sum
  - where + int → ComplexType → int → sum (where-before-selects, canonical)
  - workhorse → ComplexType → workhorse → max

LINQ.md gets a new Phase 2C Ring 4 section documenting the change + the
two-branch audit. Phase status table updated.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 benchmarks/sql/LINQ.md        | 23 +++++++++++++++++++++-
 daslib/linq_fold.das          | 12 +++++-------
 tests/linq/test_linq_fold.das | 37 +++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+), 8 deletions(-)

diff --git a/benchmarks/sql/LINQ.md b/benchmarks/sql/LINQ.md
index 25105e908..00afdcd0c 100644
--- a/benchmarks/sql/LINQ.md
+++ b/benchmarks/sql/LINQ.md
@@ -26,7 +26,7 @@ See `~/.claude/plans/keen-hopping-balloon.md` for the long-form plan.
 | 2B Ring 1 | Accumulator lane: `sum`, `min`, `max`, `average`, `long_count` with workhorse `<` / `>` for min/max scalars and `_::less` fallback for tuples/user types. `long_count` shares the count-length shortcut. | ✅ done |
 | 2B Ring 2 | Early-exit lane: `first`, `first_or_default`, `any`, `all`, `contains` via `invoke($block { ... return val })`. Predicate-free `any` gets a `length(src) > 0` shortcut. | ✅ done |
 | 2C Ring 3 | `take(N)` / `skip(N)` in counter/array/accumulator/early-exit lanes. Canonical chain order `[where_*][select*][skip?][take?] |> terminator`. Trailing take/skip (no explicit aggregator) → ARRAY lane with implicit `to_array`. Range-form `take(start..end)` falls through (slice operator, different semantics). Buffer-required ops (`order_by`, `distinct`, `reverse`, `group_by`, `zip`, `join`, `left_join`, `group_join`) recognized by name and emit silent fallback with future-mode markers (BufferTopN / BufferDistinct / BufferReverse / BufferGroupBy / MultiSourceZip / BufferedJoin). | ✅ done |
-| 2C Ring 4 | Non-workhorse chained selects via `:=`-clone. | ⏳ |
+| 2C Ring 4 | Non-workhorse chained selects via `:=`-clone. | ✅ done |
 | 2D | Fail-loudly contract — see "Planned" section below | ⏳ |
 | 3+ | Buffer-required emit modes: `distinct`, `sort`/`order_by`, `reverse`, `groupby`, `zip`, `join`. Once we go array, we stay array | ⏳ |
 | 4 | Final coverage pass + docs; full 4-way comparison table refresh; parity-test sweep | ⏳ |
@@ -212,6 +212,27 @@ Sub-ns/op on the three improved benchmarks reflects the bounded-loop nature: per
 
 `_old_fold`'s `take_count` at 0 ns/op already reflects iterator-fusion at the linq-runtime layer; the Phase 2C delta there is allocation count (`_fold`: 1 alloc for the result array, `_old_fold`: same with extra take-iterator wrapper). The functional Phase 2C win for that shape is structural — the splice path now emits a single fused loop where `_old_fold` chains iterator instances.
 
+## Phase 2C Ring 4 — chained-select clone via `:=` (2026-05-17)
+
+Pre-Ring-4, the planner rejected any chained `_select|_select|...` chain whose previous projection had a non-workhorse type. Reason given in the source: `<-` (move) corrupts source for lvalue projections like `_._field`. The rejection guard (`prevWorkhorse=false → return null`) was a Phase 2B placeholder.
+
+Resolution in Ring 4 follows from a Boris correctness observation: **`:=` is safe on every type** — byte copy on workhorse, deep-clone on non-workhorse — so a single emission shape (`var $i(bind) := $e(projection)`) covers both cases. The workhorse / non-workhorse branch in chained-bind emission is removed entirely; chained selects of any type now splice through one path.
+
+Concretely: `each(arr)._select(ComplexType(a = [_*2]))._select(_.a[0]).sum()` previously fell through to plain linq (iterator chain + allocation); now splices to a single fused for-loop.
+
+### Workhorse-branch audit
+
+After Ring 4, two workhorse-type branches remain in `linq_fold.das`. Both are intentional:
+
+1. **`fold_select_where` (line ~392), `static_if (typeinfo is_workhorse($e(selectExpr)))`** — used exclusively by `_old_fold`'s frozen baseline path via `g_foldSeq`. Not touched here; changing it would alter the frozen `_old_fold` output and invalidate the `m3f_old` benchmark column.
+2. **`min_max_compare` (line ~746) + caller (line ~933)** — perf-critical. Workhorse types use `<` / `>` directly (single-instruction compare); non-workhorse falls back to `_::less` so user/tuple comparator overloads still apply. Boris's design directive 2026-05-16 explicitly mandates keeping this branch (≈2× per-element on int columns; see PR #2696 numbers).
+
+No further workhorse branches in the splice path.
+
+### Ring 4 deltas
+
+Ring 4 is a correctness gate (chained non-workhorse selects now splice instead of falling through), not a per-benchmark improvement on the existing 100K suite. Coverage tracked via test `test_chained_non_workhorse_select` in `tests/linq/test_linq_fold.das` (3 subtests: int → ComplexType → int → sum / where + ComplexType chain + sum / workhorse → ComplexType → workhorse → max).
+
 ## Planned: fail-loudly contract
 
 The current contract: when `_fold` can't splice a chain (out-of-scope terminator, buffer-required op, multiple take/skip, range-form take/skip, etc.), it falls through to plain linq — same as today's master. This is **temporary**. The planned contract (Boris design directive 2026-05-17): `_fold` will emit `macro_error("_fold: cannot splice — <reason>")` for any unsupported shape, mirroring the sqlite_linq `_sql(...)` "splice or error" contract.
diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das
index fa37b9fed..4e98c4d80 100644
--- a/daslib/linq_fold.das
+++ b/daslib/linq_fold.das
@@ -1195,19 +1195,17 @@ def private plan_loop_or_count(var expr : Expression?) : Expression? {
             // Chained selects: bind the previous projection to a fresh local now so the next
             // lambda's `_` can be renamed straight to that name — avoids the
             // ExprRef2Value-substitution trap that plain `Template.replaceVariable` hits when
-            // splicing a typed expression into another typed expression. Phase 2A only
-            // chains workhorse projections; a non-workhorse intermediate binding would need
-            // a clone (`:=`) since `<-` (move) can corrupt source for lvalue projections
-            // like `_._field`. Deferred to Phase 2B.
+            // splicing a typed expression into another typed expression. `:=` (clone) is
+            // safe on every type — byte copy on workhorse, deep-clone on non-workhorse — so
+            // the same emission shape works regardless of projection type, including lvalue
+            // projections like `_._field` that `<-` (move) would corrupt.
             if (projection != null) {
-                let prevWorkhorse = projection._type != null && projection._type.isWorkhorseType
-                if (!prevWorkhorse) return null   // chained non-workhorse selects — Phase 2B
                 if (has_sideeffects(projection)) {
                     allProjectionsPure = false
                 }
                 let bindName = "`v`{at.line}`{at.column}`{length(intermediateBinds)}"
                 intermediateBinds |> push <| qmacro_expr() {
-                    var $i(bindName) = $e(projection)
+                    var $i(bindName) := $e(projection)
                 }
                 lastBindName = bindName
             }
diff --git a/tests/linq/test_linq_fold.das b/tests/linq/test_linq_fold.das
index 2e8a1293e..a6b7a48d4 100644
--- a/tests/linq/test_linq_fold.das
+++ b/tests/linq/test_linq_fold.das
@@ -1245,3 +1245,40 @@ def test_take_skip_array_lane(t : T?) {
         t |> equal(0, length(r))
     }
 }
+
+[test]
+def test_chained_non_workhorse_select(t : T?) {
+    // Pre-Commit-2: planner rejected any chained _select whose previous projection was
+    // non-workhorse (prevWorkhorse=false → return null), falling back to the iterator path.
+    // Commit 2: chained-bind emission switched to `:=` (clone) — safe for every type — so
+    // ComplexType intermediates now splice.
+    t |> run("int → ComplexType → int → sum") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr)
+            ._select(ComplexType(a = [_ * 2]))    // int → ComplexType (non-workhorse)
+            ._select(_.a[0])                       // ComplexType → int (reads field)
+            .sum())
+        // sum(2 + 4 + 6 + 8 + 10) = 30
+        t |> equal(30, r)
+    }
+    t |> run("where then ComplexType intermediate then sum") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr)
+            ._where(_ >= 2)                        // predicate before selects
+            ._select(ComplexType(a = [_ * 2, _ * 3]))
+            ._select(_.a[0] + _.a[1])              // ComplexType → int (reads two fields)
+            .sum())
+        // filtered [2,3,4,5] → [(4+6),(6+9),(8+12),(10+15)] = [10,15,20,25] → sum = 70
+        t |> equal(70, r)
+    }
+    t |> run("workhorse → ComplexType → workhorse → max") @(t : T?) {
+        let arr <- [1, 2, 3, 4, 5]
+        let r = _fold(each(arr)
+            ._select(_ * 2)                        // int → int (workhorse)
+            ._select(ComplexType(a = [_, _ + 1]))  // int → ComplexType (non-workhorse)
+            ._select(_.a[1])                       // ComplexType → int
+            .max())
+        // _ * 2 → [2,4,6,8,10]; .a[1] → [3,5,7,9,11]; max = 11
+        t |> equal(11, r)
+    }
+}

From ffcb27590334e6ee7e0e4f81416fcefa4278e1a4 Mon Sep 17 00:00:00 2001
From: Boris Batkin <bbatkin@gmail.com>
Date: Sat, 16 May 2026 22:31:39 -0700
Subject: [PATCH 09/14] cmake: gate daslang-target references when
 DAS_TOOLS_DISABLED=ON
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When DAS_TOOLS_DISABLED=ON, the `daslang` target is never created
(SETUP_COMPILER_BINARY only runs in the gated `if (NOT ${DAS_TOOLS_DISABLED})`
block). Several unconditional references to it caused configure to fail with
`No target "daslang"`:

- `ADD_EXAMPLE_RUN` macro fed into `run_examples` which uses
  `$<TARGET_FILE:daslang>` / `DEPENDS daslang`. Gate the macro itself so all
  call sites (incl. dasPEG, dasStbImage, dasLLVM) are automatically safe and
  `DAS_EXAMPLES_TO_RUN` stays empty.
- `examples/pathTracer/CMakeLists.txt` depends on `libDaScriptAot`, which is
  only created when AOT examples are enabled. Move the include under
  `if(NOT DAS_AOT_EXAMPLES_DISABLED)` (same guard already used for tests/aot).
- Documentation build (`das2rst.das` step) references `daslang`. Add
  `AND NOT DAS_TOOLS_DISABLED` to the `if(DAS_BUILD_DOCUMENTATION)` guard so
  tools-disabled + docs-on no longer fails configure.

Verified locally — both configures succeed:
  cmake -B build_off -DDAS_TOOLS_DISABLED=ON -DDAS_AOT_EXAMPLES_DISABLED=ON
  cmake -B build_on  -DDAS_TOOLS_DISABLED=OFF

Closes #2686

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 CMakeLists.txt | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 80ad73430..e4c1654ba 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -438,13 +438,22 @@ ENDMACRO()
 # Register a .das file to be run as a headless example check.
 # Modules call this from their CMakeLists.txt for examples that
 # can run without GUI, audio, or user interaction.
+#
+# When DAS_TOOLS_DISABLED=ON the `daslang` target is never created, so the
+# run_examples target below (which references $<TARGET_FILE:daslang>) cannot
+# be generated. Gate the registration here so every call site is automatically
+# safe and DAS_EXAMPLES_TO_RUN stays empty.
 MACRO(ADD_EXAMPLE_RUN das_file)
-    SET(_exe TRUE)
-    IF("${ARGN}" STREQUAL "FALSE")
-        SET(_exe FALSE)
+    IF(DAS_TOOLS_DISABLED)
+        MESSAGE(STATUS "EXAMPLE_RUN ${das_file} skipped (DAS_TOOLS_DISABLED=ON)")
+    ELSE()
+        SET(_exe TRUE)
+        IF("${ARGN}" STREQUAL "FALSE")
+            SET(_exe FALSE)
+        ENDIF()
+        MESSAGE("EXAMPLE_RUN ${das_file} (exe=${_exe})")
+        LIST(APPEND DAS_EXAMPLES_TO_RUN "${das_file}|${_exe}")
     ENDIF()
-    MESSAGE("EXAMPLE_RUN ${das_file} (exe=${_exe})")
-    LIST(APPEND DAS_EXAMPLES_TO_RUN "${das_file}|${_exe}")
 ENDMACRO()
 
 ADD_EXAMPLE_RUN("${PROJECT_SOURCE_DIR}/examples/dasbind/dasbind_example.das")
@@ -1117,7 +1126,11 @@ SET(DAS_DASCRIPT_MAIN_SRC
 
 # Tests
 if (NOT ${DAS_TESTS_DISABLED})
-    include(examples/pathTracer/CMakeLists.txt)
+    # pathTracer links against libDaScriptAot, which is only created when
+    # AOT examples are enabled — gate the inclusion accordingly.
+    if(NOT ${DAS_AOT_EXAMPLES_DISABLED})
+        include(examples/pathTracer/CMakeLists.txt)
+    endif()
     include(examples/crash/CMakeLists.txt)
     if(NOT ${DAS_AOT_EXAMPLES_DISABLED} AND NOT (WIN32 AND CMAKE_SIZEOF_VOID_P EQUAL 4))
         include(tests/aot/CMakeLists.txt)
@@ -1742,7 +1755,10 @@ install(FILES
 add_subdirectory(tutorials)
 
 # ── Documentation build (optional) ──────────────────────────────────────────
-if(DAS_BUILD_DOCUMENTATION)
+# Requires the `daslang` target (das2rst stdlib RST generator runs it), so we
+# also gate on NOT DAS_TOOLS_DISABLED — otherwise CMake fails on the missing
+# target. Tools-disabled builds simply can't generate docs.
+if(DAS_BUILD_DOCUMENTATION AND NOT DAS_TOOLS_DISABLED)
     find_package(Python3 COMPONENTS Interpreter)
     if(Python3_FOUND)
         execute_process(

From 969343ae0b0da8515f093bd816286663e3745f16 Mon Sep 17 00:00:00 2001
From: Boris Batkin <bbatkin@gmail.com>
Date: Sat, 16 May 2026 22:42:23 -0700
Subject: [PATCH 10/14] =?UTF-8?q?linq=5Ffold:=20fix=20take(N)=20non-positi?=
 =?UTF-8?q?ve=20N=20=E2=80=94=20`=3D=3D`=20=E2=86=92=20`>=3D`?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Copilot review on PR #2697 (https://github.com/GaijinEntertainment/daScript/pull/2697#discussion_r3254094082)
caught a silent semantic divergence in the take-limit guard.

Pre-fix: `if (takenCount == N) break` — for negative N, `takenCount` (which
starts at 0 and only ever increments) never satisfies equality, so the
spliced loop takes ALL elements. Reference iterator semantics in
daslib/linq.das treat non-positive N as "take nothing":

  take_impl     (line 858): `if (total <= 0) break`
  take_to_array (line 879): `if (total <= 0) break`

Post-fix: `if (takenCount >= N) break`. Identical to `==` for positive N
(since `takenCount` increments by 1 from 0), short-circuits on the first
iteration for N <= 0.

Skip is unaffected — the existing emission
`if (skipRem > 0) { skipRem--; continue }` is structurally identical to
`skip_impl` (linq.das:770-773) and naturally inert for non-positive K
(condition false → all elements pass through → skip nothing).

Added 4 pinning tests covering take(-1), take(0), skip(-1), skip(0) so
the non-positive semantics are documented and the regression can't slip
back in.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 daslib/linq_fold.das          |  6 +++++-
 tests/linq/test_linq_fold.das | 26 ++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das
index 4e98c4d80..b6c27149b 100644
--- a/daslib/linq_fold.das
+++ b/daslib/linq_fold.das
@@ -717,8 +717,12 @@ def private wrap_with_skip_take(var stmts : array<Expression?>; var skipExpr : E
     prefixed |> reserve(length(stmts) + 3)
     if (takeExpr != null) {
         var takeLimit = clone_expression(takeExpr)
+        // `>=` (not `==`) so non-positive N short-circuits on the first iteration —
+        // matches reference iterator semantics: linq.das take_impl uses `if (total <= 0) break`,
+        // so `take(-1)` and `take(0)` yield nothing. `==` would silently take all elements
+        // for negative N (takenCount starts at 0 and only ever increments).
         prefixed |> push <| qmacro_expr() {
-            if ($i(takeCountName) == $e(takeLimit)) {
+            if ($i(takeCountName) >= $e(takeLimit)) {
                 break
             }
         }
diff --git a/tests/linq/test_linq_fold.das b/tests/linq/test_linq_fold.das
index a6b7a48d4..e9b2cc41b 100644
--- a/tests/linq/test_linq_fold.das
+++ b/tests/linq/test_linq_fold.das
@@ -1244,6 +1244,32 @@ def test_take_skip_array_lane(t : T?) {
         let r <- _fold(each(arr).skip(100).to_array())
         t |> equal(0, length(r))
     }
+    // Pin non-positive N/K behavior — reference iterator semantics treat negative as
+    // "take/skip nothing" (linq.das take_impl: `if (total <= 0) break`; skip_impl:
+    // `if (total > 0) decrement-and-skip` so non-positive falls through).
+    t |> run("array: take(-1).to_array → empty (matches take_impl)") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let r <- _fold(each(arr).take(-1).to_array())
+        t |> equal(0, length(r))
+    }
+    t |> run("array: take(0).to_array → empty (pins zero-N)") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let r <- _fold(each(arr).take(0).to_array())
+        t |> equal(0, length(r))
+    }
+    t |> run("array: skip(-1).to_array → full (non-positive skip is a no-op)") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let r <- _fold(each(arr).skip(-1).to_array())
+        t |> equal(3, length(r))
+        t |> equal(1, r[0])
+        t |> equal(2, r[1])
+        t |> equal(3, r[2])
+    }
+    t |> run("array: skip(0).to_array → full (zero-K is a no-op)") @(t : T?) {
+        let arr <- [1, 2, 3]
+        let r <- _fold(each(arr).skip(0).to_array())
+        t |> equal(3, length(r))
+    }
 }
 
 [test]

From ceaa3be50408184032c1d0aae319bbf372d7f279 Mon Sep 17 00:00:00 2001
From: Boris Batkin <bbatkin@gmail.com>
Date: Sat, 16 May 2026 22:43:22 -0700
Subject: [PATCH 11/14] cmake: fatal-error on DAS_BUILD_DOCUMENTATION +
 DAS_TOOLS_DISABLED

Previous fix silently skipped the docs block when tools were disabled.
DAS_BUILD_DOCUMENTATION is an explicit opt-in, so failing early matches
the existing DAS_AOT_EXAMPLES check at line 1081 and surfaces the
incompatible flag combination instead of mystifying the user.

Addresses Copilot review on #2698.
---
 CMakeLists.txt | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index e4c1654ba..22c0d7027 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -1755,10 +1755,13 @@ install(FILES
 add_subdirectory(tutorials)
 
 # ── Documentation build (optional) ──────────────────────────────────────────
-# Requires the `daslang` target (das2rst stdlib RST generator runs it), so we
-# also gate on NOT DAS_TOOLS_DISABLED — otherwise CMake fails on the missing
-# target. Tools-disabled builds simply can't generate docs.
-if(DAS_BUILD_DOCUMENTATION AND NOT DAS_TOOLS_DISABLED)
+# Requires the `daslang` target (das2rst stdlib RST generator runs it). If the
+# user explicitly asked for docs but also disabled tools, fail early rather
+# than silently skipping — mirrors the DAS_AOT_EXAMPLES check above.
+if(DAS_BUILD_DOCUMENTATION)
+    if(DAS_TOOLS_DISABLED)
+        message(FATAL_ERROR "DAS_BUILD_DOCUMENTATION requires DAS_TOOLS_DISABLED to be OFF (the docs pipeline runs daslang via das2rst.das)")
+    endif()
     find_package(Python3 COMPONENTS Interpreter)
     if(Python3_FOUND)
         execute_process(

From 95e5967235254dde570ca043da5def63475b8159 Mon Sep 17 00:00:00 2001
From: Boris Batkin <bbatkin@gmail.com>
Date: Sat, 16 May 2026 23:54:05 -0700
Subject: [PATCH 12/14] examples/graphics: dark bg matching daslang theme +
 recolor circles
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`glClearColor(0.85, 0.85, 0.90)` (lavender) clashed with the daslang theme
that the harness now applies — the ImGui windows on top sit on `#0d0c0a`
while the GL surface behind them was light. Switch the clear color to the
theme bg (`(0.051, 0.047, 0.039)` = `#0d0c0a`) so the viewport blends
seamlessly with the windows.

Circle outlines were `(0,0,0)` black — invisible on the new dark bg.
Recolor to `(0.357, 0.333, 0.278)` (theme `fgFaint`, `#5b5547`) so the
epicycle circles are legible without competing with the red arrows or
yellow trace.

Arrows (red) and trace (yellow) read fine on dark — left as-is.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 examples/graphics/furier_opengl_imgui_example.das | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/examples/graphics/furier_opengl_imgui_example.das b/examples/graphics/furier_opengl_imgui_example.das
index e25f05f7c..04ebf440a 100644
--- a/examples/graphics/furier_opengl_imgui_example.das
+++ b/examples/graphics/furier_opengl_imgui_example.das
@@ -86,17 +86,17 @@ def draw_fourier() {
     draw_arrow(float2(0.0f, 0.0f), p0, float3(1.0f, 0.0f, 0.0f))
     if (enable_1) {
         var p = p0
-        draw_circle(p, length(cp1), float3(0.0f, 0.0f, 0.0f))
+        draw_circle(p, length(cp1), float3(0.357f, 0.333f, 0.278f))
         draw_arrow(p, pp1, float3(1.0f, 0.0f, 0.0f))
         p += pp1
-        draw_circle(p, length(cn1), float3(0.0f, 0.0f, 0.0f))
+        draw_circle(p, length(cn1), float3(0.357f, 0.333f, 0.278f))
         draw_arrow(p, pn1, float3(1.0f, 0.0f, 0.0f))
         p += pn1
         if (enable_2) {
-            draw_circle(p, length(cp2), float3(0.0f, 0.0f, 0.0f))
+            draw_circle(p, length(cp2), float3(0.357f, 0.333f, 0.278f))
             draw_arrow(p, pp2, float3(1.0f, 0.0f, 0.0f))
             p += pp2
-            draw_circle(p, length(cn2), float3(0.0f, 0.0f, 0.0f))
+            draw_circle(p, length(cn2), float3(0.357f, 0.333f, 0.278f))
             draw_arrow(p, pn2, float3(1.0f, 0.0f, 0.0f))
             p += pn2
         }
@@ -120,7 +120,9 @@ def update() {
     var display_w, display_h : int
     live_get_framebuffer_size(display_w, display_h)
     glViewport(0, 0, display_w, display_h)
-    glClearColor(0.85f, 0.85f, 0.90f, 1.0f)
+    // Match the daslang theme's bg (#0d0c0a) so the GL viewport blends
+    // seamlessly with the ImGui windows that sit on top.
+    glClearColor(0.051f, 0.047f, 0.039f, 1.0f)
     glClear(GL_COLOR_BUFFER_BIT)
     draw_fourier()
 

From 4a844359f809e62dd5169aff6249bf386c196c9d Mon Sep 17 00:00:00 2001
From: Boris Batkin <bbatkin@gmail.com>
Date: Sat, 16 May 2026 23:58:10 -0700
Subject: [PATCH 13/14] =?UTF-8?q?mouse:=205=20cards=20=E2=80=94=20linq=5Ff?=
 =?UTF-8?q?old=20Phase=202C=20+=20dasImgui=20HDPI/build?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three linq_fold cards from Phase 2C (PR #2697) that didn't make it into
the merged PR:
  - chained-select splice: bind via clone-assign universal (drops the
    prevWorkhorse-only guard)
  - macro planner: named-marker arms leave room for future modes
  - splice macro: bounded loop guard for take/skip non-positive N

Two dasImgui cards from this session (HDPI PR borisbat/dasImgui#42):
  - HDPI plumbing pattern (glfwGetWindowContentScale + ScaleAllSizes +
    GLFW_SCALE_TO_MONITOR hint; float* binding gotcha)
  - Local daspkg install dance (out-of-tree dasImgui build + --global
    --force re-install to propagate edits; -project_root won't substitute)

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 ...-splice-bind-via-clone-assign-universal.md | 47 ++++++++++++++++
 ...are-what-s-the-canonical-scale-plumbing.md | 53 +++++++++++++++++++
 ...y-what-s-the-build-daspkg-install-dance.md | 46 ++++++++++++++++
 ...marker-arms-leave-room-for-future-modes.md | 46 ++++++++++++++++
 ...ded-loop-guard-take-skip-non-positive-n.md | 46 ++++++++++++++++
 5 files changed, 238 insertions(+)
 create mode 100644 mouse-data/docs/chained-select-splice-bind-via-clone-assign-universal.md
 create mode 100644 mouse-data/docs/how-do-i-make-dasimgui-hdpi-aware-what-s-the-canonical-scale-plumbing.md
 create mode 100644 mouse-data/docs/how-do-i-run-a-dasimgui-demo-locally-what-s-the-build-daspkg-install-dance.md
 create mode 100644 mouse-data/docs/macro-planner-named-marker-arms-leave-room-for-future-modes.md
 create mode 100644 mouse-data/docs/splice-macro-bounded-loop-guard-take-skip-non-positive-n.md

diff --git a/mouse-data/docs/chained-select-splice-bind-via-clone-assign-universal.md b/mouse-data/docs/chained-select-splice-bind-via-clone-assign-universal.md
new file mode 100644
index 000000000..da9a9b532
--- /dev/null
+++ b/mouse-data/docs/chained-select-splice-bind-via-clone-assign-universal.md
@@ -0,0 +1,47 @@
+---
+slug: chained-select-splice-bind-via-clone-assign-universal
+title: How do I splice chained `_select|_select` stages in a daslib macro without hitting the ExprRef2Value substitution trap, and is the bind-via-`:=` approach correct for every element type?
+created: 2026-05-17
+last_verified: 2026-05-17
+links: []
+---
+
+
+Bind the previous projection to a fresh local with **`:=` (clone assignment)**, then rename the next select's `_` to that bind name via `Template.replaceVariable("_", $i(bindName))`. Single emission shape — no `static_if`, no macro-time workhorse branching.
+
+```das
+// projection = previous select's lambda body (already typed, may carry ExprRef2Value
+// wrappers from the typer pass)
+let bindName = "`v`{at.line}`{at.column}`{length(intermediateBinds)}"
+intermediateBinds |> push <| qmacro_expr() {
+    var $i(bindName) := $e(projection)
+}
+// next stage's `_` gets renamed to bindName via Template.replaceVariable
+```
+
+**Why bind-and-rename instead of direct substitution:** trying to substitute the full previous projection expression into the next stage's body hits the `ExprRef2Value` trap — see [[my-macro-substitutes-it-for-a-projection-expression-via-template-replacevariable-it-proj-apply-template-but-the-result-fails-to]]. The bind step sidesteps it: the substitute is now a simple `ExprVar(bindName)`, no wrapper, `Template.replaceVariable` works cleanly.
+
+**Why `:=` is universally correct** — and why the workhorse / non-workhorse branch you might be tempted to write isn't needed:
+
+| Source type | What `:=` does | Side effects on source |
+|---|---|---|
+| Workhorse (`int`, `float`, `bool`, `string`, …) | byte copy | none — same cost as plain `=` |
+| Non-workhorse (`array<T>`, lambdas, user structs) | deep clone, new heap alloc | none — source remains intact |
+| Lvalue projection like `_._field` | clones the field's value into the local | the parent object is untouched |
+
+Compare:
+- **`=`** (plain assign) — fails to compile for non-copyable types (`array<T>`, lambdas, etc.) and triggers the strict "this type is not assignable" check.
+- **`<-`** (move) — corrupts the source by zeroing it. For an lvalue projection like `_._field`, this would zero the field on the underlying iterated element — incorrect, since the user expects the projection to be a *read*.
+
+So a single emission `var $i(bind) := $e(projection)` covers every chain shape. Earlier `linq_fold` versions had a `prevWorkhorse=false → return null` guard at the chained-`select` arm; Phase 2C Ring 4 (2026-05-17) lifted it after the audit confirmed `:=` is safe everywhere.
+
+**Concrete location:** `daslib/linq_fold.das` `plan_loop_or_count` select-stage arm — chained selects of any type splice through this one path. The push-vs-emplace decision in the array lane (different problem — that's about `push(value)` vs `var v <- value; emplace(v)`) still branches on workhorse-ness; see [[when-a-call-macro-needs-to-pick-copy-vs-move-init-for-a-projection-should-i-emit-static-if-typeinfo-is-workhorse-e-proj-or-decid]].
+
+**What workhorse branches still legitimately exist in `linq_fold.das`** (audit 2026-05-17):
+1. `fold_select_where` — `static_if (typeinfo is_workhorse($e(selectExpr)))` — frozen `_old_fold` baseline path; changing it would invalidate the `m3f_old` benchmark column.
+2. `min_max_compare` — workhorse uses `<` / `>` directly (single-instruction compare); non-workhorse falls back to `_::less` for user/tuple comparators. Perf-critical (~2× per-element on int columns), Boris-mandated.
+
+Outside those two, no workhorse branch in the splice path.
+
+## Questions
+- How do I splice chained `_select|_select` stages in a daslib macro without hitting the ExprRef2Value substitution trap, and is the bind-via-`:=` approach correct for every element type?
diff --git a/mouse-data/docs/how-do-i-make-dasimgui-hdpi-aware-what-s-the-canonical-scale-plumbing.md b/mouse-data/docs/how-do-i-make-dasimgui-hdpi-aware-what-s-the-canonical-scale-plumbing.md
new file mode 100644
index 000000000..55f257243
--- /dev/null
+++ b/mouse-data/docs/how-do-i-make-dasimgui-hdpi-aware-what-s-the-canonical-scale-plumbing.md
@@ -0,0 +1,53 @@
+---
+slug: how-do-i-make-dasimgui-hdpi-aware-what-s-the-canonical-scale-plumbing
+title: How do I make dasImgui HDPI-aware — what's the canonical scale plumbing?
+created: 2026-05-17
+last_verified: 2026-05-17
+links: []
+---
+
+Verified 2026-05-16 (dasImgui PR #42, branch `bbatkin/hdpi-theme-scaling`).
+
+dasImgui prior to PR #42 had **zero DPI awareness**: theme constants hardcoded at 1x (`WindowPadding(8,8)`, `FramePadding(6,3)`, `ScrollbarSize=10`), font at 14px exactly. `glfwGetWindowContentScale` was already bound in dasGlfw but never queried.
+
+**The plumbing (3 places):**
+
+1. `widgets/imgui_live.das` — read scale once at init from the window, cache it, apply to fonts + style:
+```das
+var public live_imgui_content_scale : float = 1.0f
+
+def public live_imgui_init(window : GLFWwindow?; glsl_version : string = "#version 330") {
+    ...
+    if (window != null) {
+        var xs = 1.0f
+        var ys = 1.0f
+        glfwGetWindowContentScale(window, safe_addr(xs), safe_addr(ys))
+        // Clamp: some backends report 0 if monitor unavailable.
+        live_imgui_content_scale = max(max(xs, ys), 1.0f)
+    }
+    load_daslang_font(14.0f * live_imgui_content_scale)
+    apply_daslang_theme()
+    ScaleAllSizes(unsafe(GetStyle()), live_imgui_content_scale)
+    ...
+}
+```
+
+2. `widgets/imgui_harness.das` — `glfwWindowHint(int(GLFW_SCALE_TO_MONITOR), 1)` before `live_create_window`. On Windows GLFW scales window-creation size by monitor DPI; no-op on macOS/Linux (where logical-pixel sizing is intrinsic).
+
+3. `widgets/imgui_theme_daslang.das` — leave theme constants at 1x; `ScaleAllSizes` runs AFTER `apply_daslang_theme`. Note: stock ImGui `ScaleAllSizes` does NOT scale border widths — hairline 1px borders on retina are the intended look.
+
+**Required `require`s in imgui_live.das**: `daslib/safe_addr` (for `safe_addr(xs)`) and `math` (for `max`).
+
+**Gotchas:**
+- `glfwGetWindowContentScale` binds C `float*` as `float?` in das, NOT `float&`. Must pass `safe_addr(xs)`. There's a wrapper in `dasglfw/glfw_boost.das` for `glfwGetFramebufferSize(window, int&, int&)` but NO equivalent for content scale.
+- Read once at init; DPI changes during monitor drag (`glfwSetWindowContentScaleCallback`) are deliberately out of scope — re-applying theme + rebuilding font atlas mid-frame is fiddly. Document the limitation.
+- Font strategy: re-raster at `14 * scale` px (sharp), NOT `io.FontGlobalScale = scale` (bilinear-blurry).
+- Headless / no-window path stays at 1.0 naturally.
+
+**Per-platform behavior at runtime:**
+- macOS retina: `glfwGetWindowContentScale` → 2.0, window opens at logical 800×600 = physical 1600×1200 implicitly.
+- Windows DPI 200%: `GLFW_SCALE_TO_MONITOR` resizes window to physical 1600×1200, content scale → 2.0.
+- Linux: depends on compositor (Wayland reports correctly; X11 often 1.0).
+
+## Questions
+- How do I make dasImgui HDPI-aware — what's the canonical scale plumbing?
diff --git a/mouse-data/docs/how-do-i-run-a-dasimgui-demo-locally-what-s-the-build-daspkg-install-dance.md b/mouse-data/docs/how-do-i-run-a-dasimgui-demo-locally-what-s-the-build-daspkg-install-dance.md
new file mode 100644
index 000000000..35c638d95
--- /dev/null
+++ b/mouse-data/docs/how-do-i-run-a-dasimgui-demo-locally-what-s-the-build-daspkg-install-dance.md
@@ -0,0 +1,46 @@
+---
+slug: how-do-i-run-a-dasimgui-demo-locally-what-s-the-build-daspkg-install-dance
+title: How do I run a dasImgui demo locally — what's the build + daspkg install dance?
+created: 2026-05-17
+last_verified: 2026-05-17
+links: []
+---
+
+Verified 2026-05-16.
+
+dasImgui is a **sibling repo** (`/Users/borisbatkin/Work/dasImgui`, not under `daScript/modules/`). To run any dasImgui example or test locally:
+
+**1. Build daslang first** (provides `lib/liblibDaScriptDyn.dylib`, `bin/daslang`):
+```
+cd /Users/borisbatkin/Work/daScript
+cmake --build build --config Release -j 8
+```
+
+**2. Build dasImgui out-of-tree, pointing DASLANG_DIR at daScript:**
+```
+cd /Users/borisbatkin/Work/dasImgui
+mkdir -p build && cd build
+cmake .. -DDASLANG_DIR=/Users/borisbatkin/Work/daScript -DCMAKE_BUILD_TYPE=Release -G Ninja
+cmake --build . --config Release -j 8
+```
+This drops `dasModuleImgui.shared_module`, `imguiApp.shared_module`, `imguiAppHeadless.shared_module` into `/Users/borisbatkin/Work/dasImgui/`.
+
+**3. daspkg install (the critical step):**
+```
+cd /Users/borisbatkin/Work/daScript
+bin/daslang utils/daspkg/main.das -- install /Users/borisbatkin/Work/dasImgui --global --force
+```
+This COPIES (not symlinks) the entire dasImgui repo into `/Users/borisbatkin/Work/daScript/modules/dasImgui/`. The `--global` flag puts it in the das_root modules dir so `require imgui` resolves. The `--force` re-syncs after every edit (without `--force`, repeat installs no-op).
+
+**4. Run:**
+```
+bin/daslang modules/dasImgui/examples/imgui_demo/main.das
+```
+**Do NOT use `-project_root /path/to/dasImgui`** — that flag exists but doesn't trigger `.das_module` initialize for `register_dynamic_module`. The canonical resolver path is via `das_root/modules/<name>` populated by daspkg.
+
+**Gotcha — edits don't propagate without re-install.** Editing a file in `/Users/borisbatkin/Work/dasImgui/widgets/foo.das` is invisible to daslang until you re-run `daspkg install --force`. Working directly in `/Users/borisbatkin/Work/daScript/modules/dasImgui/` skips the sync but means edits land in the daspkg-copied tree, not the source repo — easy to lose work.
+
+**Compile-check via MCP** also needs the install: `mcp__daslang__compile_check` resolves `require imgui` from `daScript/modules/dasImgui/`, not from the dasImgui source repo directly.
+
+## Questions
+- How do I run a dasImgui demo locally — what's the build + daspkg install dance?
diff --git a/mouse-data/docs/macro-planner-named-marker-arms-leave-room-for-future-modes.md b/mouse-data/docs/macro-planner-named-marker-arms-leave-room-for-future-modes.md
new file mode 100644
index 000000000..c1c62b323
--- /dev/null
+++ b/mouse-data/docs/macro-planner-named-marker-arms-leave-room-for-future-modes.md
@@ -0,0 +1,46 @@
+---
+slug: macro-planner-named-marker-arms-leave-room-for-future-modes
+title: How do I structure a chain-recognition macro planner to leave room for future emission modes (e.g. buffer-required ops like order_by/distinct/group_by) without painting over them with a generic fallback?
+created: 2026-05-17
+last_verified: 2026-05-17
+links: []
+---
+
+
+Use **named marker arms** in the chain-recognition switch: each operator that's currently unsupported BUT will need its own emission mode later gets a distinct `elif` arm with `return null` and a `// TODO: <FutureMode>` comment. The generic "unknown operator → fallback" catch-all stays at the bottom for genuinely unrecognized ops.
+
+```das
+} elif (opName == "where_" || opName == "select") {
+    // ... splice-mode emission ...
+} elif (opName == "take" || opName == "skip") {
+    // ... bounded-loop splice ...
+} elif (is_buffer_required_op(opName)) {    // nolint:LINT009
+    // TODO Phase 2X: order_by/distinct/reverse/group_by/zip/join → buffered emission modes
+    // (BufferTopN, BufferDistinct, BufferReverse, BufferGroupBy, MultiSourceZip, BufferedJoin).
+    return null
+} else {
+    // genuinely unknown operator — fallback to plain pipeline
+    return null
+}
+
+[macro_function]
+def private is_buffer_required_op(name : string) : bool {
+    return name == "order_by" || name == "order_descending"
+        || name == "distinct" || name == "distinct_by"
+        || name == "reverse"
+        || name == "group_by" || name == "group_by_lazy"
+        || name == "zip"
+        || name == "join" || name == "left_join" || name == "group_join"
+}
+```
+
+**Why this matters**: when the future mode lands, it grafts in at the existing named arm — no re-walking the chain-recognition logic, no risk of accidentally enabling broken splice for an op that needs full-source materialization. A single `else: return null` catch-all would force the future PR to identify which name belongs in which mode, re-implement the recognition, and risk shadowing the wrong arm.
+
+**Concrete location**: `daslib/linq_fold.das` `plan_loop_or_count`. Buffer-required marker arm landed in Phase 2C Ring 3 (2026-05-17, PR …); the actual emission modes are deferred to Phase 3+. The marker also doubles as documentation: a reader sees exactly which operators have planned-but-unimplemented support, vs. which are forever-out-of-scope.
+
+**Bonus**: when the project moves to a fail-loudly contract (macro errors instead of silent fallback), the named arms can emit specific error messages ("cannot splice `order_by` + take — needs BufferTopN mode (Phase 3+)"), while the catch-all becomes a generic "unknown chain operator".
+
+Related: see [[chained-select-splice-bind-via-clone-assign-universal]] for the chained-bind shape itself.
+
+## Questions
+- How do I structure a chain-recognition macro planner to leave room for future emission modes (e.g. buffer-required ops like order_by/distinct/group_by) without painting over them with a generic fallback?
diff --git a/mouse-data/docs/splice-macro-bounded-loop-guard-take-skip-non-positive-n.md b/mouse-data/docs/splice-macro-bounded-loop-guard-take-skip-non-positive-n.md
new file mode 100644
index 000000000..f0ad5c56b
--- /dev/null
+++ b/mouse-data/docs/splice-macro-bounded-loop-guard-take-skip-non-positive-n.md
@@ -0,0 +1,46 @@
+---
+slug: splice-macro-bounded-loop-guard-take-skip-non-positive-n
+title: When emitting bounded-loop counter guards (take/skip) in a splice macro, what comparison operator matches iterator reference semantics for non-positive N — `==` or `>=`?
+created: 2026-05-17
+last_verified: 2026-05-17
+links: []
+---
+
+
+**Use `>=` for take, `> 0` for skip.** Mirrors `daslib/linq.das` iterator semantics where `take_impl` and `take_to_array` both `if (total <= 0) break` (non-positive N → take nothing) and `skip_impl` `if (total > 0) { total--; continue }` (non-positive K → naturally inert, skip nothing).
+
+```das
+// Take guard — break when limit reached.
+// `>=` (not `==`): for N <= 0, takenCount=0 already satisfies `0 >= -N`, breaks
+// on first iteration → matches reference "non-positive → take 0".
+// For N > 0, `>=` agrees with `==` since takenCount increments by 1 from 0.
+prefixed |> push <| qmacro_expr() {
+    if ($i(takeCountName) >= $e(takeLimit)) {
+        break
+    }
+}
+
+// Skip guard — decrement and continue while there's skip remaining.
+// `> 0` strict: K <= 0 → condition false → no decrement, element passes through.
+// Structurally identical to skip_impl in linq.das, so it inherits its semantics
+// for free including the non-positive case.
+prefixed |> push <| qmacro_expr() {
+    if ($i(skipName) > 0) {
+        $i(skipName)--
+        continue
+    }
+}
+```
+
+**Why `==` is wrong for take**: when N is negative, `takenCount` (which starts at 0 and only ever increments) **never** satisfies the equality, so the spliced loop happily takes every element of the source. That's a silent divergence from `take_impl` / `take_to_array` / `take(arr, total)`-style overloads which all break on `total <= 0`. It looks correct in tests with positive N and only blows up when a user threads a computed `take(n - m)` that goes negative.
+
+**Concrete bug + fix**: caught on PR #2697 by Copilot's pull-request reviewer. Fix commit `969343ae0`: one-character `==` → `>=` in `daslib/linq_fold.das::wrap_with_skip_take` + 4 pinning tests covering `take(-1)`, `take(0)`, `skip(-1)`, `skip(0)` in `tests/linq/test_linq_fold.das`.
+
+**Why skip didn't need fixing**: pure luck of having reached for the natural `> 0` guard at emission time instead of e.g. counting down to `== 0`. The two paths are not symmetrical — take counts UP from 0 to N (so any non-positive limit needs `>=`), while skip counts DOWN from K to 0 (so the natural `> 0` covers both positive and non-positive K).
+
+**Lesson for any new bounded-loop emission**: pick the guard that short-circuits on the first iteration when the bound is non-positive. `==` against a counter that starts at 0 and increments fails this. Don't trust positive-N tests to prove the emission is sound.
+
+See [[chained-select-splice-bind-via-clone-assign-universal]] for the broader splice emission patterns and [[macro-planner-named-marker-arms-leave-room-for-future-modes]] for the chain-recognition structure that hosts these guards.
+
+## Questions
+- When emitting bounded-loop counter guards (take/skip) in a splice macro, what comparison operator matches iterator reference semantics for non-positive N — `==` or `>=`?

From 6fbfcfd2dfd9ca87efc8d5787eb1813d57dd0ffd Mon Sep 17 00:00:00 2001
From: Boris Batkin <bbatkin@gmail.com>
Date: Sat, 16 May 2026 23:59:20 -0700
Subject: [PATCH 14/14] mcp: thread project arg through run_script and
 eval_expression
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Every other compile-family MCP tool (compile_check, lint, aot, run_test,
ast_dump, find_symbol, etc.) accepts an optional project : string pointing
at a .das_project file for custom module resolution. run_script and
eval_expression were the last two outliers — both shell out to daslang.exe
(which supports -project) but never piped it through, so any script or
expression that needed project-bound module resolution silently failed.

- run_script.das / eval_expression.das: trailing project arg, validated
  via validate_project_arg, injected as -project <file> in argv before
  the script path. eval_expression also threads it through the daslib-
  rewrite retry.
- protocol.das: PROJECT_PROP registered on both tool schemas; dispatch_tool
  passes the already-extracted project string.
- test_tools.das: two new [test] functions mirroring the existing
  test_compile_check_invalid_project — exercise both validate_project_arg
  error branches (nonexistent path, wrong extension).
- README.md / doc/source/reference/utils/mcp.rst: document the new arg
  on run_script + eval_expression, document the previously-shipped-but-
  undocumented project arg on live_launch, and add the missing
  live_commands row to the Live-Reload Control tables.

Out of scope: removing daslang-live's single-instance lock + adding a
-port CLI flag to unblock concurrent live instances (MCP-side port arg
already exists on every live_* tool).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 doc/source/reference/utils/mcp.rst  | 15 +++++++++---
 utils/mcp/README.md                 |  7 +++---
 utils/mcp/protocol.das              | 10 ++++----
 utils/mcp/test_tools.das            | 36 +++++++++++++++++++++++++++++
 utils/mcp/tools/eval_expression.das | 17 ++++++++++----
 utils/mcp/tools/run_script.das      |  8 ++++++-
 6 files changed, 77 insertions(+), 16 deletions(-)

diff --git a/doc/source/reference/utils/mcp.rst b/doc/source/reference/utils/mcp.rst
index 9100300ca..769c2c58f 100644
--- a/doc/source/reference/utils/mcp.rst
+++ b/doc/source/reference/utils/mcp.rst
@@ -180,13 +180,15 @@ Execution
      - Description
    * - ``run_script``
      - Run a ``.das`` file or inline code snippet and return
-       stdout/stderr.
+       stdout/stderr.  Optional ``project`` for ``.das_project``-bound
+       module resolution.
    * - ``run_test``
      - Run dastest on a ``.das`` test file and return pass/fail
        results.  Optional ``json`` for structured output.
    * - ``eval_expression``
      - Evaluate a daslang expression and return its printed result.
-       Supports module imports via ``require`` parameter.
+       Supports module imports via ``require`` parameter.  Optional
+       ``project`` for ``.das_project``-bound module resolution.
 
 Code generation and transformation
 -----------------------------------
@@ -358,7 +360,10 @@ instance via its REST API.  All accept an optional ``port`` parameter
      - Description
    * - ``live_launch``
      - Launch ``daslang-live.exe`` on a script if not already running.
-       Sets working directory to the script's folder.
+       Sets working directory to the script's folder.  Optional
+       ``project`` is forwarded to ``daslang-live`` as
+       ``-project <file>`` for ``.das_project``-bound module
+       resolution.
    * - ``live_status``
      - Query the running instance for fps, uptime, paused state, and
        error status.
@@ -370,6 +375,10 @@ instance via its REST API.  All accept an optional ``port`` parameter
      - Pause or unpause execution.
    * - ``live_command``
      - Dispatch a ``[live_command]`` by name with JSON arguments.
+   * - ``live_commands``
+     - Dispatch a batch of ``[live_command]``\ s in one round-trip;
+       continue-on-error semantics, response is a JSON array
+       preserving input order.
    * - ``live_shutdown``
      - Gracefully shut down the running instance.
 
diff --git a/utils/mcp/README.md b/utils/mcp/README.md
index 7e5afcde2..686d09689 100644
--- a/utils/mcp/README.md
+++ b/utils/mcp/README.md
@@ -13,7 +13,7 @@ A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that e
 | `list_types` | Compile a `.das` file and list all structs, classes (with fields), enums (with values), and type aliases |
 | `run_test` | Run dastest on a `.das` test file and return pass/fail results. Optional `json` for structured output |
 | `format_file` | Format a `.das` file using `daslib/das_source_formatter` |
-| `run_script` | Run a `.das` file or inline code snippet and return stdout/stderr |
+| `run_script` | Run a `.das` file or inline code snippet and return stdout/stderr. Optional `project` for `.das_project`-bound module resolution. |
 | `ast_dump` | Dump AST of an expression or compiled function. `mode=ast` returns S-expression (node types/fields), `mode=source` returns post-macro daslang code. Optional `lineinfo` to include file and line:col spans on each node |
 | `program_log` | Produce full post-compilation program text (like `options log`). Shows all types, globals, and functions after macro expansion, template instantiation, and inference. Optional `function` filter |
 | `list_modules` | List all available daslang modules (builtin C++ modules and daslib). Optional `json` for structured output |
@@ -24,7 +24,7 @@ A [Model Context Protocol](https://modelcontextprotocol.io/) (MCP) server that e
 | `goto_definition` | Given a cursor position (file, line, column), resolve the definition of the symbol under the cursor. Returns location, kind (variable/function/field/builtin/struct/enum/typedef), and source snippet. Optional `no_opt` to preserve pre-optimization AST |
 | `type_of` | Given a cursor position (file, line, column), return the resolved type of the expression under the cursor. Shows all expressions at position from innermost to outermost. Optional `no_opt` |
 | `find_references` | Find all references to the symbol under the cursor (function calls, variable uses, field accesses, type refs, enum/bitfield values, aliases). Works from both usage and declaration sites. Scope: `file` (default) or `all` (all loaded modules). Optional `no_opt` |
-| `eval_expression` | Evaluate a daslang expression and return its printed result. Supports comma-separated module imports via `require` parameter |
+| `eval_expression` | Evaluate a daslang expression and return its printed result. Supports comma-separated module imports via `require` parameter. Optional `project` for `.das_project`-bound module resolution. |
 | `describe_type` | Describe a type's fields, methods, values, and base type. Supports structs, classes, handled types, enums, bitfields, variants, tuples, typedefs |
 | `grep_usage` | Parse-aware symbol search across `.das` files using ast-grep + tree-sitter. Finds identifier occurrences excluding comments and strings. Conditional on `sg` CLI |
 | `outline` | List all declarations (functions, structs, classes, enums, bitfields, variants, globals, typedefs) in a file or set of files using tree-sitter. Works on broken/incomplete code — no compilation needed. Conditional on `sg` CLI |
@@ -45,12 +45,13 @@ These tools interact with a running `daslang-live.exe` instance via its REST API
 
 | Tool | Description |
 |---|---|
-| `live_launch` | Start a `daslang-live` instance with a script file. Sets working directory to the script's folder. Detects if already running. Polls up to 10 seconds to confirm startup |
+| `live_launch` | Start a `daslang-live` instance with a script file. Sets working directory to the script's folder. Detects if already running. Polls up to 10 seconds to confirm startup. Optional `project` is forwarded as `-project <file>` for `.das_project`-bound module resolution |
 | `live_status` | Get status (fps, uptime, paused, dt, has_error) |
 | `live_error` | Get last compilation error (null if none) |
 | `live_reload` | Trigger reload. Optional `full` param for full recompile. Works even during compilation errors |
 | `live_pause` | Pause or unpause (`paused` = "true"/"false"). Returns 503 on compilation error |
 | `live_command` | Dispatch a `[live_command]` (`name` required, optional `args` JSON string). Returns 503 on compilation error. Use `name="help"` to list all commands |
+| `live_commands` | Dispatch a batch of `[live_command]`s in one round-trip; continue-on-error semantics, response is a JSON array preserving input order |
 | `live_shutdown` | Graceful shutdown of the live instance |
 
 ### Server Management
diff --git a/utils/mcp/protocol.das b/utils/mcp/protocol.das
index 95dad8fb5..a6da74874 100644
--- a/utils/mcp/protocol.das
+++ b/utils/mcp/protocol.das
@@ -242,7 +242,8 @@ def handle_tools_list(id_json : string) : string {
             "file" => PropertySchema(_type = "string", description = "Path to the .das file to run"),
             "code" => PropertySchema(_type = "string", description = "Inline daslang code to run (written to temp file and executed)"),
             "timeout" => PropertySchema(_type = "string", description = "Timeout in seconds (default: 30). Process tree is killed if exceeded"),
-            "track_allocations" => PropertySchema(_type = "string", description = "If 'true', enable heap allocation tracking and append a heap report at exit (shows where each allocation came from)")
+            "track_allocations" => PropertySchema(_type = "string", description = "If 'true', enable heap allocation tracking and append a heap report at exit (shows where each allocation came from)"),
+            "project" => PROJECT_PROP
         },
         []
     ))
@@ -251,7 +252,8 @@ def handle_tools_list(id_json : string) : string {
         "Evaluate a daslang expression and return its printed result. Wraps the expression in a string builder print statement. Use 'require' to add module imports (comma-separated).",
         {
             "expression" => PropertySchema(_type = "string", description = "daslang expression to evaluate (e.g. 'to_float(42) + 1.0', 'length(\"hello\")')"),
-            "require" => PropertySchema(_type = "string", description = "Comma-separated module imports (e.g. 'math', 'strings, daslib/json'). On compile failure, single-token names are retried under 'daslib/'.")
+            "require" => PropertySchema(_type = "string", description = "Comma-separated module imports (e.g. 'math', 'strings, daslib/json'). On compile failure, single-token names are retried under 'daslib/'."),
+            "project" => PROJECT_PROP
         },
         ["expression"]
     ))
@@ -614,9 +616,9 @@ def dispatch_tool(tool_name, arg1, arg2, arg3, arg4, arg5, arg6, project : strin
     } elif (tool_name == "format_file") {
         return do_format_file(arg1)
     } elif (tool_name == "run_script") {
-        return do_run_script(arg1, arg2, arg3, arg4 == "true")
+        return do_run_script(arg1, arg2, arg3, arg4 == "true", project)
     } elif (tool_name == "eval_expression") {
-        return do_eval_expression(arg1, arg2)
+        return do_eval_expression(arg1, arg2, project)
     } elif (tool_name == "describe_type") {
         return do_describe_type(arg1, arg2, project)
     } elif (tool_name == "ast_dump") {
diff --git a/utils/mcp/test_tools.das b/utils/mcp/test_tools.das
index a424c9e37..8949200a1 100644
--- a/utils/mcp/test_tools.das
+++ b/utils/mcp/test_tools.das
@@ -188,6 +188,42 @@ def test_compile_check_invalid_project(t : T?) {
     }
 }
 
+[test]
+def test_run_script_invalid_project(t : T?) {
+    t |> run("missing project file reports focused error") <| @(t : T?) {
+        var text : string
+        var is_error = false
+        parse_result(do_run_script("", "print(\"hi\")", "", false, "nonexistent_xyz.das_project"), text, is_error)
+        t |> success(is_error, "should be error")
+        t |> success(find(text, "'project' file not found") >= 0, "should mention project file not found")
+    }
+    t |> run("non-.das_project extension reports focused error") <| @(t : T?) {
+        var text : string
+        var is_error = false
+        parse_result(do_run_script("", "print(\"hi\")", "", false, fixture_path("_fixture_valid.das")), text, is_error)
+        t |> success(is_error, "should be error")
+        t |> success(find(text, "expected a path to a .das_project file") >= 0, "should mention expected extension")
+    }
+}
+
+[test]
+def test_eval_expression_invalid_project(t : T?) {
+    t |> run("missing project file reports focused error") <| @(t : T?) {
+        var text : string
+        var is_error = false
+        parse_result(do_eval_expression("1 + 1", "", "nonexistent_xyz.das_project"), text, is_error)
+        t |> success(is_error, "should be error")
+        t |> success(find(text, "'project' file not found") >= 0, "should mention project file not found")
+    }
+    t |> run("non-.das_project extension reports focused error") <| @(t : T?) {
+        var text : string
+        var is_error = false
+        parse_result(do_eval_expression("1 + 1", "", fixture_path("_fixture_valid.das")), text, is_error)
+        t |> success(is_error, "should be error")
+        t |> success(find(text, "expected a path to a .das_project file") >= 0, "should mention expected extension")
+    }
+}
+
 // ── parse_file_list (smoke) ──────────────────────────────────────────
 // Canonical unit tests for `parse_file_list` and `expand_glob` live in
 // `tests/fio/expand_glob_test.das` (the helpers moved to `daslib/fio`).
diff --git a/utils/mcp/tools/eval_expression.das b/utils/mcp/tools/eval_expression.das
index 12c93e076..826a46662 100644
--- a/utils/mcp/tools/eval_expression.das
+++ b/utils/mcp/tools/eval_expression.das
@@ -50,7 +50,7 @@ def private rewrite_modules_in_set(req_modules : string; missing : array<string>
     return join(out_parts, ", ") => rewritten
 }
 
-def private try_eval_expression(exe, expression, req_modules : string; var output : string&) : int {
+def private try_eval_expression(exe, expression, req_modules, project : string; var output : string&) : int {
     let stub_path = make_temp_das_file()
     let script = build_string() $(var w) {
         write(w, "options gen2\n")
@@ -80,18 +80,25 @@ def private try_eval_expression(exe, expression, req_modules : string; var outpu
         output = "Cannot write temp file: {stub_path}"
         return -1
     }
-    let argv <- [exe, stub_path]
+    var argv <- [exe]
+    if (!empty(project)) {
+        argv |> push("-project")
+        argv |> push(string(project))
+    }
+    argv |> push(string(stub_path))
     let exit_code = run_and_capture(argv, output)
     remove(stub_path)
     return exit_code
 }
 
-def do_eval_expression(expression, req_modules : string) : string {
+def do_eval_expression(expression, req_modules : string; project : string = "") : string {
     if (empty(expression)) return make_tool_result("missing 'expression' argument", true)
+    let project_err = validate_project_arg(project)
+    if (!empty(project_err)) return make_tool_result(project_err, true)
     let exe = get_daslang_exe()
     if (empty(exe)) return make_tool_result("Cannot determine daslang executable path", true)
     var output : string
-    let exit_code = try_eval_expression(exe, expression, req_modules, output)
+    let exit_code = try_eval_expression(exe, expression, req_modules, project, output)
     if (exit_code == 0) return make_tool_result(strip(output))
     // exit_code < 0 means try_eval_expression failed before invoking the daslang subprocess
     // (e.g. couldn't write the temp file). Surface the message directly — no retry would help.
@@ -107,7 +114,7 @@ def do_eval_expression(expression, req_modules : string) : string {
         let did_rewrite = rewrite._1
         if (did_rewrite) {
             var output2 : string
-            let exit_code2 = try_eval_expression(exe, expression, new_modules, output2)
+            let exit_code2 = try_eval_expression(exe, expression, new_modules, project, output2)
             if (exit_code2 == 0) return make_tool_result("[resolved unqualified modules under daslib/: {new_modules}]\n{strip(output2)}")
             if (exit_code2 < 0) return make_tool_result(output2, true)
             // Retry also failed at the daslang level — surface the retry's output, since the rewrite
diff --git a/utils/mcp/tools/run_script.das b/utils/mcp/tools/run_script.das
index 8e5d54880..75db6690b 100644
--- a/utils/mcp/tools/run_script.das
+++ b/utils/mcp/tools/run_script.das
@@ -4,7 +4,9 @@ options no_unused_block_arguments = false
 
 require common public
 
-def do_run_script(file, code : string; timeout_str : string = ""; track_allocations : bool = false) : string {
+def do_run_script(file, code : string; timeout_str : string = ""; track_allocations : bool = false; project : string = "") : string {
+    let project_err = validate_project_arg(project)
+    if (!empty(project_err)) return make_tool_result(project_err, true)
     let exe = get_daslang_exe()
     if (empty(exe)) return make_tool_result("Cannot determine daslang executable path", true)
     // if code is provided, write to temp file; otherwise resolve relative path
@@ -25,6 +27,10 @@ def do_run_script(file, code : string; timeout_str : string = ""; track_allocati
     let timeout_sec = empty(timeout_str) ? 30.0 : float(to_int(timeout_str))
     var output : string
     var argv <- [exe]
+    if (!empty(project)) {
+        argv |> push("-project")
+        argv |> push(string(project))
+    }
     if (track_allocations) {
         argv |> push("-track-allocations")
         argv |> push("-heap-report")