From de9dd75089376674fc74f795c3e6eb30046efe38 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Sat, 23 May 2026 06:41:42 -0700 Subject: [PATCH 1/7] ast,daslib: fix spurious 30151 on concept_assert under lint flags (#2830) Two independent bugs surfaced together by the linter's `no_infer_time_folding=true` + `no_optimizations=true` policy combo. 1. Compiler: `concept_assert` / `static_assert` cond never gets folded. `typeinfo sizeof(T) <= typeinfo sizeof(U)` rewrites to `ExprConstInt <= ExprConstInt` at infer time, but `InferTypes::visit(ExprOp1/Op2/Op3)` only folds when `enableInferTimeFolding` is on. With lint disabling it and `no_optimizations` skipping `ConstFolding`, the binop stayed unfolded and `ContractFolding::visit(ExprStaticAssert*)` raised the spurious 30151. Mirror the existing static_if save+force-enable+ restore pattern in `preVisit`/`visit(ExprStaticAssert*)` so the cond subtree always folds. 2. Daslib: 6 sites used `if (typeinfo X)` instead of `static_if`. These relied on the same infer-time folding to elide the dead branch (whose body references fields/operations only valid in the true branch's universe). Under lint flags, both branches survive and the dead one fails to resolve. Convert all to `static_if`: `decs_boost.das:244` (`a._aka`), `builtin.das:403/892/914/1183`, `json_boost.das:477`. `extended_checks` gates the lint step to `matrix.target == 'linux'`, so this surfaced as "linux x64 only" in CI; verified platform-independent locally on Windows daslang.exe. Also: 2 pre-existing STYLE028 hits in `decs_boost.das` (`self->implement` -> `implement`), required by the "every changed .das file lint-clean" PR rule once the PR touches that file. Regression fixture: `tests/_issue_2830_lint_repro.das`. CI lint runs over changed .das files; the fixture exercises the failing path on this PR and continues to exercise it on any future PR that touches it. Verified locally on WSL Ubuntu2404-CI (clang 18.1.3, Release): - lint clean on all 4 repro variants - tests/decs 245/245 pass - tests/json 266/266 pass - tests/lint 8/8 + utils/lint/tests 38/38 pass Co-Authored-By: Claude Opus 4.7 (1M context) --- daslib/builtin.das | 8 +++--- daslib/decs_boost.das | 6 ++--- daslib/json_boost.das | 2 +- include/daScript/ast/ast_infer_type.h | 1 + src/ast/ast_infer_type.cpp | 15 +++++++++++ tests/_issue_2830_lint_repro.das | 37 +++++++++++++++++++++++++++ 6 files changed, 61 insertions(+), 8 deletions(-) create mode 100644 tests/_issue_2830_lint_repro.das diff --git a/daslib/builtin.das b/daslib/builtin.das index 1d8d8ec4d..c142a1988 100644 --- a/daslib/builtin.das +++ b/daslib/builtin.das @@ -400,7 +400,7 @@ def push_clone(var Arr : array; var varr : numT[] ==const) { def push_clone(var Arr : array; varr : numT[] ==const) { static_if (typeinfo can_copy(type)) { static_if (typeinfo sizeof(Arr[0]) == typeinfo sizeof(varr)) { - if (typeinfo can_clone_from_const(varr)) { + static_if (typeinfo can_clone_from_const(varr)) { for (t in varr) { Arr[__builtin_array_push_back_zero(Arr, typeinfo sizeof(Arr[0]))] := t } @@ -889,7 +889,7 @@ def insert_clone(var Tab : table; at : keyT | #; var val [unused_argument(Tab, at, val)] def insert_clone(var Tab : table; at : keyT | #; val : valT ==const | #) { static_if (typeinfo can_clone(val)) { - if (typeinfo can_clone_from_const(val)) { + static_if (typeinfo can_clone_from_const(val)) { unsafe(Tab[at]) := val } else { concept_assert(false, "can't insert value, which can't be cloned from const") @@ -911,7 +911,7 @@ def insert_clone(var Tab : table; at : keyT | #; var v [unused_argument(Tab, at, val)] def insert_clone(var Tab : table; at : keyT | #; val : valT[] ==const | #) { static_if (typeinfo can_clone(val)) { - if (typeinfo can_clone_from_const(val)) { + static_if (typeinfo can_clone_from_const(val)) { unsafe(Tab[at]) := val } else { concept_assert(false, "can't insert value, which can't be cloned from const") @@ -1180,7 +1180,7 @@ def clone_to_move(var clone_src : auto(TT) ==const | #) : TT -const -# { def clone_dim(var a; b : auto | #) { static_if (typeinfo is_dim(a) && typeinfo is_dim(b) && typeinfo dim(a) == typeinfo dim(b)) { - if (typeinfo is_pod(a)) { + static_if (typeinfo is_pod(a)) { unsafe { memcpy(addr(a[0]), addr(b[0]), typeinfo sizeof(a[0]) * length(a)) } diff --git a/daslib/decs_boost.das b/daslib/decs_boost.das index 40043d1f4..4ded362ca 100644 --- a/daslib/decs_boost.das +++ b/daslib/decs_boost.das @@ -241,7 +241,7 @@ def private append_iterator(arch_name : string; var qloop : ExprFor?; a; prefix, qloop.iterators |> resize(qli + 1) qloop.iterators[qli] := "{prefix}{a.name}{suffix}" qloop.iteratorsAka |> resize(qli + 1) - if (typeinfo has_field<_aka>(a)) { + static_if (typeinfo has_field<_aka>(a)) { qloop.iteratorsAka[qli] := a._aka } else { qloop.iteratorsAka[qli] := "" @@ -384,7 +384,7 @@ class DecsQueryMacro : AstCallMacro { macro_verify(totalArgs == 1 || totalArgs == 2, prog, expr.at, "expecting query($(block_with_arguments)) or query(eid,$(block_with_arguments))") let qt = totalArgs == 2 ? DecsQueryType.eid_query : DecsQueryType.query let block_arg_index = totalArgs - 1 - return <- self->implement(expr, block_arg_index, qt) + return <- implement(expr, block_arg_index, qt) } def implement(var expr : ExprCallMacro?; block_arg_index : int; qt : DecsQueryType) : ExpressionPtr { for (arg in expr.arguments) { @@ -539,7 +539,7 @@ class DecsFindQueryMacro : DecsQueryMacro { //! Note: if return is missing, or end of find_query block is reached - its assumed that find_query did not find anything, and will return false. def override visit(prog : ProgramPtr; mod : Module?; var expr : ExprCallMacro?) : ExpressionPtr { macro_verify(length(expr.arguments) == 1, prog, expr.at, "expecting find_query($(block_with_arguments))") - return <- self->implement(expr, 0, DecsQueryType.find_query) + return <- implement(expr, 0, DecsQueryType.find_query) } } diff --git a/daslib/json_boost.das b/daslib/json_boost.das index ccc63a165..9fab0a3ab 100644 --- a/daslib/json_boost.das +++ b/daslib/json_boost.das @@ -474,7 +474,7 @@ def from_JV(v : JsonValue const explicit?; anything : auto(TT)) { let arr & = v.value as _array ret |> reserve(arr |> long_length) for (a in arr) { - if (typeinfo can_copy(anything[0])) { + static_if (typeinfo can_copy(anything[0])) { ret |> push_clone <| _::from_JV(a, decltype_noref(anything[0])) } else { ret |> emplace <| _::from_JV(a, decltype_noref(anything[0])) diff --git a/include/daScript/ast/ast_infer_type.h b/include/daScript/ast/ast_infer_type.h index 28fc0c52c..f7480e937 100644 --- a/include/daScript/ast/ast_infer_type.h +++ b/include/daScript/ast/ast_infer_type.h @@ -62,6 +62,7 @@ namespace das { bool enableInferTimeFolding = true; bool savedFoldingForEnum = true; // preVisitEnumerationValue / visitEnumerationValue save-restore bool savedFoldingForStaticIf = true; // preVisit(ExprIfThenElse) / visit(ExprIfThenElse) save-restore (block hooks skipped for static_if) + bool savedFoldingForStaticAssert = true; // preVisit(ExprStaticAssert) / visit(ExprStaticAssert) save-restore bool disableAot = false; bool multiContext = false; bool standaloneContext = false; diff --git a/src/ast/ast_infer_type.cpp b/src/ast/ast_infer_type.cpp index 27e435d85..71d63f99a 100644 --- a/src/ast/ast_infer_type.cpp +++ b/src/ast/ast_infer_type.cpp @@ -1215,8 +1215,23 @@ namespace das { for (auto &arg : expr->arguments) { markNoDiscard(arg); } + // static_assert / concept_assert needs the cond to fold to a const + // before verifyAndFoldContracts runs. Mirror the static_if path + // above: with `no_infer_time_folding` set (lint policies) plus + // `no_optimizations`, `int_const op int_const` shapes (typically + // `typeinfo sizeof(X) <= typeinfo sizeof(Y)` after typeinfo rewrites + // itself to ExprConstInt) stay as unfolded ExprOp1/Op2/Op3, and the + // contract pass raises a spurious "static assert condition is not + // constexpr or const" (30151). Force-enable folding for the cond + // subtree; restore in visit(). + savedFoldingForStaticAssert = enableInferTimeFolding; + if (!enableInferTimeFolding) { + enableInferTimeFolding = true; + } } ExpressionPtr InferTypes::visit(ExprStaticAssert *expr) { + // Restore folding state before any early-return path below. + enableInferTimeFolding = savedFoldingForStaticAssert; if (expr->argumentsFailedToInfer) { if (func) func->notInferred(); diff --git a/tests/_issue_2830_lint_repro.das b/tests/_issue_2830_lint_repro.das new file mode 100644 index 000000000..5d67299bb --- /dev/null +++ b/tests/_issue_2830_lint_repro.das @@ -0,0 +1,37 @@ +options gen2 + +// Regression fixture for https://github.com/GaijinEntertainment/daScript/issues/2830 +// `[decs_template]` synthesizes a per-field `decs::set(cmp, name, src.field)` call +// chain; `decs::set` is a generic with a `concept_assert(typeinfo sizeof(value) <= +// typeinfo sizeof(cmp[0].data), ...)`. Under lint flags +// (`no_infer_time_folding=true` plus `no_optimizations=true`) the `int_const <= +// int_const` cond stays as an unfolded `ExprOp2`, and `ContractFolding` raised a +// spurious `error[30151]` (static assert condition is not constexpr or const). +// +// `decs_boost`'s `append_iterator` also used a plain `if (typeinfo has_field<_aka>(a))` +// where the dead branch's `a._aka` reference fails to resolve once infer-time folding +// is off — same root cause, separate site. +// +// `extended_checks` runs lint on changed .das files (linux x64 only by gate); this +// file lives in `tests/` so the lint job picks it up. + +require daslib/decs_boost +require daslib/linq_boost +require daslib/linq_fold + +[decs_template(prefix = "x_")] +struct X { + a : int + b : int + c : int +} + +[export] +def target() : int { + return _fold(from_decs_template(type)._where(_.a + _.b + _.c > 0).count()) +} + +[export] +def main { + print("count={target()}\n") +} From 72257687b956b8341e0825bf6ba394f8676250b7 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Sat, 23 May 2026 05:01:18 -0700 Subject: [PATCH 2/7] linq_fold: trivial-let elision + reverse_take skip-into-tail MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two slices closing the final m4-vs-m3f gaps from #2824's residual outliers. Slice 1 — trivial-let elision (closes sum_aggregate_m4 1.3ns gap): When `_select(_.userName)` peels to a single `decs_tup.` reference, rename the chain bind directly to the iter var instead of synthesizing `decs_sel_N`. wrap_decs_chain skips emitting the `let decs_sel_N = car_price` binding entirely; the action's `acc += ` references the iter var natively. Required extending DecsTupUsageScanner with an iter-var-→-user-name reverse map so bare iter-var refs still seed the pruner (previously: empty usedNames fell through to unpruned-default, defeating the elision). Slice 2 — reverse_take skip-into-tail (closes reverse_take_m4 5.2× gap): For `from_decs(...).reverse().take(N).to_array()` with no where/select, emit a two-pass invoke: pass 1 sums `arch.size` (no entity load), pass 2 uses for_each_archetype_find to skip whole archetypes whose cumulative size still fits below the skip threshold, then a per-iter skip-counter through the partial archetype, push into a takeN-sized buffer, and `return true` to stop iteration once buf is full. reverse_inplace runs on the small N buffer at end, not the full source. where/select fall through to the legacy buffer+reverse_inplace+resize emit unchanged. Bench (INTERP, 100K rows, ns/op): - sum_aggregate_m4 3.4 → 2.1 matches m3f (was the systemic 1.3ns gap) - reverse_take_m4 48.0 → 9.2 5.2× win, allocs 42B → 1B - select_where_sum_m4 7.5 → 7.5 matches m3f (elision benefits this too) - contains_match_m4 2.1 → 1.4 beats m3f at 2.2 - chained_where_m4 6.6 → 6.6 no regression - count_aggregate_m4 4.1 → 4.1 no regression Tests: - New splice-shape assertions: trivial-let elision (no decs_sel binding for `_select(_.val).sum()` and `_where(_)._select(_.val).sum()`) - New splice-shape for skip-into-tail (for_each_archetype_find count==1, decs_skips local presence) - New parity tests: multi-archetype reverse+take, take(N>total), empty source — covers the whole-archetype-skip + partial-archetype + early-return arms 1388/1388 linq + 245/245 decs + 782/782 dasSQLITE green INTERP. MCP + CI lint clean. Co-Authored-By: Claude Opus 4.7 --- daslib/linq_fold.das | 112 ++++++++++++++++++++++-- tests/linq/test_linq_from_decs.das | 132 +++++++++++++++++++++++++++++ 2 files changed, 237 insertions(+), 7 deletions(-) diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das index be5be7a36..143a8b37d 100644 --- a/daslib/linq_fold.das +++ b/daslib/linq_fold.das @@ -3078,14 +3078,16 @@ def private build_decs_inner_for(bridge : DecsBridgeShape?; var tupBind : Expres return clonedForExpr } -// Walks a chain body to collect which fields of the decs_tup named-tuple bind are actually referenced. allFieldsUsed flips when the bind is referenced as a whole-var (not via field access) — e.g. push_clone(decs_tup) in a bare to_array, or pass-to-user-fn — so the helper falls back to unpruned emission and preserves the user-visible output shape. +// Walks a chain body to collect which fields of the decs_tup named-tuple bind are actually referenced. allFieldsUsed flips when the bind is referenced as a whole-var (not via field access) — e.g. push_clone(decs_tup) in a bare to_array, or pass-to-user-fn — so the helper falls back to unpruned emission and preserves the user-visible output shape. Also picks up bare iter-var refs (when `iterToUser` is non-empty) so the trivial-let-elision path — which rewrites `decs_sel_N`-bound projections to the iter var directly, bypassing `decs_tup` entirely — still hits the pruned arm instead of falling through to unpruned-default. class private DecsTupUsageScanner : AstVisitor { tupName : string + iterToUser : table usedFields : table allFieldsUsed : bool = false inTargetField : bool = false - def DecsTupUsageScanner(n : string) { + def DecsTupUsageScanner(n : string; var i2u : table) { tupName = n + iterToUser <- i2u } def override preVisitExprField(expr : ExprField?) : void { var v = expr.value @@ -3104,17 +3106,23 @@ class private DecsTupUsageScanner : AstVisitor { def override preVisitExprVar(expr : ExprVar?) : void { if (expr.name == tupName && !inTargetField) { allFieldsUsed = true + return + } + let nm = string(expr.name) + if (key_exists(iterToUser, nm)) { + usedFields |> insert(iterToUser[nm]) } } } [macro_function] -def private collect_decs_tup_usage(var e : Expression?; tupName : string) : tuple> { - // Returns (allFieldsUsed, usedFieldNames). usedFieldNames is unordered — caller filters bridge.userNames in original order to preserve get_ro emission order. +def private collect_decs_tup_usage(var e : Expression?; tupName : string; bridge : DecsBridgeShape?) : tuple> { + // Returns (allFieldsUsed, usedFieldNames). usedFieldNames is unordered — caller filters bridge.userNames in original order to preserve get_ro emission order. The bridge feeds an iter-var → user-name reverse map so bare iter-var refs (from trivial-let elision) count as field usage too. var allUsed = false var used : array if (e == null) return (allUsed, used) - var sc = new DecsTupUsageScanner(tupName) + var iterToUser <- {for (i in 0 .. length(bridge.iterNames)); bridge.iterNames[i] => bridge.userNames[i]} + var sc = new DecsTupUsageScanner(tupName, iterToUser) make_visitor(*sc) $(astVisitorAdapter) { visit_expression(e, astVisitorAdapter) } @@ -3167,7 +3175,7 @@ def private build_decs_inner_for_pruned(bridge : DecsBridgeShape?; var body : Expression?; at : LineInfo) : Expression? { // Walks body for `tupName.` references; when pruning is safe + beneficial, emits the inner multi-iter for with unused get_ro slots dropped and a matching shrunk named-tuple bind. Otherwise falls through to the unpruned path so user-visible shape stays intact (bare to_array, push_clone(decs_tup), pass-to-user-fn). - let (allUsed, usedNames) = collect_decs_tup_usage(body, tupName) + let (allUsed, usedNames) = collect_decs_tup_usage(body, tupName, bridge) // Fallback to the unpruned bind ONLY for whole-var refs (bare to_array, push_clone(decs_tup), pass-to-user-fn) or the edge case where the body never touches decs_tup at all. The "all fields used via field access" case still benefits from flatten + bind elision — no slots dropped, but the per-iter tuple-make and field reads disappear. if (allUsed || empty(usedNames)) { var tupBind = build_decs_tup_bind(bridge, tupName, at) @@ -3211,6 +3219,7 @@ def private build_decs_inner_for_pruned(bridge : DecsBridgeShape?; struct private DecsChainInfo { bindAt : array // bind name visible at each chain position + elidedAt : array // true when the select at position i was elided (peeled to a single decs_tup field access → reused iter var as next bind, no `let decs_sel_N = ...` emit needed) finalBind : string // bind name AFTER full chain — what terminator references finalType : TypeDeclPtr // element type AFTER full chain (constant + ref stripped) selectCount : int // number of `select` ops in chain; 0 means finalBind == tupName @@ -3229,17 +3238,47 @@ def private compute_decs_chain_info(var calls : array reserve(intermediateEnd) + info.elidedAt |> reserve(intermediateEnd) var curBind = tupName var curType : TypeDeclPtr = clone_type(bridge.elementType) for (i in 0 .. intermediateEnd) { info.bindAt |> push(curBind) + info.elidedAt |> push(false) var cll & = unsafe(calls[i]) let opName = cll._1.name if (opName == "select") { info.selectCount ++ - curBind = "`decs_sel`{at.line}`{at.column}`{info.selectCount}" var peeled = peel_lambda_rename_var(cll._0.arguments[1], info.bindAt[i]) if (peeled == null || peeled._type == null) return null + // Trivial-let elision: `_select(_.userName)` against decs_tup → iter var directly. The pruned-for keeps the iter var (its name = bridge.iterNames[idx]) and the flattener would otherwise rewrite the synthetic `let decs_sel_N = decs_tup.userName` to `let decs_sel_N = ` — a pure copy. Renaming finalBind to the iter var name skips the binding entirely and the action references it natively. Typer wraps both the projection root (`peeled` may be ExprRef2Value) AND the ExprVar inside the ExprField in ExprRef2Value — peel both. + var elided = false + if (curBind == tupName) { + var top = peeled + if (top is ExprRef2Value) { + top = (top as ExprRef2Value).subexpr + } + if (top is ExprField) { + var pf = top as ExprField + var pv = pf.value + if (pv is ExprRef2Value) { + pv = (pv as ExprRef2Value).subexpr + } + if (pv is ExprVar && (pv as ExprVar).name == tupName) { + let fname = string(pf.name) + for (ui in 0 .. length(bridge.userNames)) { + if (bridge.userNames[ui] == fname) { + curBind = bridge.iterNames[ui] + elided = true + info.elidedAt[i] = true + break + } + } + } + } + } + if (!elided) { + curBind = "`decs_sel`{at.line}`{at.column}`{info.selectCount}" + } curType = clone_type(peeled._type) } elif (opName != "where_") return null } @@ -3274,6 +3313,8 @@ def private wrap_decs_chain(var action : Expression?; } } } elif (opName == "select") { + // Skip emission entirely when this select was elided in compute_decs_chain_info — the next bind is already the iter var the flattener would have produced for `let nextBind = decs_tup.userName`. No copy needed; downstream `current` already references the iter var by the elided name. + continue if (chainInfo.elidedAt[i]) var proj = peel_lambda_rename_var(cll._0.arguments[1], bindHere) if (proj == null) return null let nextBind = (i + 1 < intermediateEnd) ? chainInfo.bindAt[i + 1] : chainInfo.finalBind @@ -4634,6 +4675,63 @@ def private plan_decs_reverse(var expr : Expression?) : Expression? { let bufName = qn("decs_buf", at) let needIterWrap = expr._type.isIterator var bufElemType = strip_const_ref(clone_type(projection != null ? projection._type : bridge.elementType)) + // Skip-into-tail fast path: `reverse |> take(N) |> to_array` with no where/select. Walk archetypes once to sum `arch.size` (cheap, no entity load), compute skip = total - takeN, then for_each_archetype_find skips whole archetypes whose size still fits below the skip threshold and short-circuits once the buffer reaches takeN. `where` would invalidate the size-based skip (count after filter is unknown without iterating); `select` would only affect element shape, not count, but is skipped here to keep v1 minimal. + if (takeExpr != null && whereCond == null && projection == null) { + let takeNName = qn("take_n", at) + let totalName = qn("decs_total", at) + let actualName = qn("decs_actual", at) + let skipName = qn("decs_skip", at) + let seenName = qn("decs_seen", at) + let skipsLeftName = qn("decs_skips", at) + let tupBind = build_decs_tup_bind(bridge, tupName, at) + // Inner-for body: skip-counter early-out before the named-tuple wrap (so skipped iters pay no per-component load); push + break-on-quota after. + var innerBody : Expression? = qmacro_block() { + if ($i(skipsLeftName) > 0_l) { + $i(skipsLeftName) -- + continue + } + $e(tupBind) + $i(bufName) |> push_clone($i(tupName)) + if (int64(length($i(bufName))) >= $i(actualName)) { + break + } + } + var clonedForExpr = clone_expression(bridge.forExpr) + var clonedFor = clonedForExpr as ExprFor + var newForBody = new ExprBlock(at = at) + newForBody.list |> push(innerBody) + clonedFor.body = newForBody + var emission : Expression? = qmacro(invoke($() : array<$t(bufElemType)> { + // Pass 1: arch.size sum — no entity walk, just archetype-header iteration. + var $i(totalName) = 0_l + for_each_archetype($e(bridge.reqHashExpr), $e(bridge.erqExpr), $($i(archName) : Archetype) { + $i(totalName) += $i(archName).size + }) + let $i(takeNName) = $e(takeExpr) + let $i(actualName) = ($i(takeNName) <= 0) ? 0_l : ((int64($i(takeNName)) < $i(totalName)) ? int64($i(takeNName)) : $i(totalName)) + let $i(skipName) = $i(totalName) - $i(actualName) + var $i(bufName) : array<$t(bufElemType)> + if ($i(actualName) == 0_l) { + return <- $i(bufName) + } + $i(bufName) |> reserve(int($i(actualName))) + // Pass 2: skip whole archetypes via size sum; partial archetype uses skip-counter; subsequent archetypes feed directly. Returns true once buf reaches actualTake to stop iteration across remaining archetypes. + var $i(seenName) = 0_l + for_each_archetype_find($e(bridge.reqHashExpr), $e(bridge.erqExpr), $($i(archName) : Archetype) : bool { + if ($i(seenName) + $i(archName).size <= $i(skipName)) { + $i(seenName) += $i(archName).size + return false + } + var $i(skipsLeftName) = ($i(skipName) > $i(seenName)) ? ($i(skipName) - $i(seenName)) : 0_l + $e(clonedForExpr) + $i(seenName) += $i(archName).size + return int64(length($i(bufName))) >= $i(actualName) + }) + _::reverse_inplace($i(bufName)) + return <- $i(bufName) + })) + return finalize_decs_emission(emission, at, needIterWrap) + } var pushExpr : Expression? if (projection != null) { pushExpr = qmacro_expr() { diff --git a/tests/linq/test_linq_from_decs.das b/tests/linq/test_linq_from_decs.das index 2ae9239f7..131a8273d 100644 --- a/tests/linq/test_linq_from_decs.das +++ b/tests/linq/test_linq_from_decs.das @@ -3088,3 +3088,135 @@ def test_unroll_take_last_splice_shape(t : T?) { } } +// ───────────────────────────────────────────────────────────────────────────── +// Slice 1: trivial-let elision for `_select(_.userName)` → iter var (skips +// the `let decs_sel_N = car_price` no-op binding when the projection +// is a single field access on decs_tup) +// ───────────────────────────────────────────────────────────────────────────── + +[test] +def test_unroll_select_sum_trivial_let_elision_splice_shape(t : T?) { + // `_select(_.val).sum()` peels to a single ExprField(decs_tup.val) which the flattener would rewrite to a bare iter var anyway — slice 1 elides the synthetic `let decs_sel_N = car_price` entirely and rewrites finalBind to the iter var, leaving body as `acc += ` with no intermediate binding. + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_unroll_select_sum_fold) + t |> success(func != null, "RTTI must resolve target_unroll_select_sum_fold") + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return <- $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "expected splice invoke wrapper") + t |> equal(describe_count(body_expr, "decs_sel"), 0, "trivial single-field _select elided — no synthetic decs_sel binding") + t |> equal(describe_count(body_expr, "for_each_archetype"), 1, "still exactly one for_each_archetype walk") + } +} + +[test] +def test_unroll_where_select_sum_trivial_let_elision_splice_shape(t : T?) { + // Same elision applies through a where filter: where(_.flag==1).select(_.val).sum() — the select still peels to a single ExprField on decs_tup, so elision fires and the binding is dropped. Asserts the elision is not gated on whether the chain is bare or filtered. + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_unroll_where_select_sum_fold) + t |> success(func != null, "RTTI must resolve target_unroll_where_select_sum_fold") + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return <- $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "expected splice invoke wrapper") + t |> equal(describe_count(body_expr, "decs_sel"), 0, "trivial single-field _select elided after a where_") + } +} + +// ───────────────────────────────────────────────────────────────────────────── +// Slice 2: reverse + take(N) skip-into-tail fast path on decs +// Two-pass: sum arch.size, then for_each_archetype_find skips whole +// archetypes that fit below the threshold + early-exits once the +// takeN-sized buffer is full. Reverses the small N buffer at end. +// ───────────────────────────────────────────────────────────────────────────── + +[test] +def test_unroll5d_reverse_take_skip_into_tail_splice_shape(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_unroll5d_reverse_take_fold) + t |> success(func != null, "RTTI must resolve target_unroll5d_reverse_take_fold") + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return <- $e(body_expr) + } + t |> success(r.matched && body_expr is ExprInvoke, "expected splice invoke wrapper") + t |> equal(describe_count(body_expr, "for_each_archetype_find"), 1, "skip-into-tail uses _find for early-exit once buf reaches takeN") + t |> equal(describe_count(body_expr, "for_each_archetype"), 2, "two walks total: 1 size-sum + 1 _find (describe_count is substring match)") + // decs_skips is the slice-2 unique skip-counter local (qn(\"decs_skips\", at)). Its presence proves the fast path fired vs. the legacy buffer+reverse_inplace+resize emit. + t |> success(describe_count(body_expr, "decs_skips") >= 1, "decs_skips local present — slice-2 fast path fired") + } +} + +// ── Multi-archetype reverse + take: exercises whole-archetype-skip arm + partial-archetype skip-counter arm ── + +[decs_template(prefix = "rev2_")] +struct RevTakeMultiArchA { + id : int +} + +[decs_template(prefix = "rev2_b_")] +struct RevTakeMultiArchB { + bid : int + bval : int +} + +[test] +def test_reverse_take_multi_archetype_parity(t : T?) { + // Creates two archetypes (A with 4 rows, B with 5 rows). A 9-row sum across archetypes; reverse + take(3) must return 3 rows. The unrelated B archetype is filtered out by the from_decs_template request anyway — but its presence in the decs state ensures for_each_archetype iterates across more than one archetype-class even when the request matches just one. + restart() + for (i in 0..4) { + create_entity() @(eid, cmp) { + cmp.eid := eid + cmp.rev2_id := i + } + } + for (i in 0..5) { + create_entity() @(eid, cmp) { + cmp.eid := eid + cmp.rev2_b_bid := i + 100 + cmp.rev2_b_bval := i * 10 + } + } + commit() + let got <- _fold(from_decs_template(type).reverse().take(3).to_array()) + t |> equal(got.length(), 3, "reverse+take(3) on multi-archetype world returns 3 rows from the matching archetype only") + var idSet : table + for (r in got) { + idSet |> insert(r.id) + } + // All returned ids must be from the A archetype (0..3); none from B (100..104). + for (r in got) { + t |> success(r.id >= 0 && r.id < 4, "row id {r.id} must be from RevTakeMultiArchA archetype") + } + t |> equal(length(idSet), 3, "all 3 ids are distinct") +} + +[test] +def test_reverse_take_skip_zero_when_take_exceeds_total(t : T?) { + // takeN > totalCount → skip = 0, returns all rows reversed. Exercises the actualTake = totalCount branch and the early-exit not firing (buf never reaches takeN before iteration completes). + restart() + for (i in 0..3) { + create_entity() @(eid, cmp) { + cmp.eid := eid + cmp.rev2_id := i + } + } + commit() + let got <- _fold(from_decs_template(type).reverse().take(99).to_array()) + t |> equal(got.length(), 3, "take(N) where N > total returns all rows") +} + +[test] +def test_reverse_take_empty_source(t : T?) { + // No matching archetypes → totalCount = 0 → early-return empty buf before for_each_archetype_find. + restart() + commit() + let got <- _fold(from_decs_template(type).reverse().take(3).to_array()) + t |> success(empty(got), "empty source returns empty") +} + From eec60024f9252c5c5df39ff23996c3643c9942f2 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Sat, 23 May 2026 10:01:31 -0700 Subject: [PATCH 3/7] tests/linq: bring back test_wave4_all_fields_via_access from #2828 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit #2835 fixed the typer-pass-order #2830 that originally tripped this test on the extended_checks (linux, 64) lane. With master now containing the fix, the test compiles cleanly on all lanes. Re-adding it covers a case the current suite missed: from_decs_template(type)._where(_.a >= 0)._where(_.b >= 0).count() Three chained single-field _where_s — all 3 fields read via field access, no whole-var ref. The splice must keep all 3 get_ros (no slot pruning) but elide the named-tuple bind (no decs_tup in the body, iter vars read directly). Lesson saved to memory: not every CI lane runs every test, so "platform-specific" failures often mean "we only check this on one platform" — not that the bug itself is platform-specific. Co-Authored-By: Claude Opus 4.7 --- tests/linq/test_linq_from_decs.das | 37 ++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/tests/linq/test_linq_from_decs.das b/tests/linq/test_linq_from_decs.das index 131a8273d..8671766de 100644 --- a/tests/linq/test_linq_from_decs.das +++ b/tests/linq/test_linq_from_decs.das @@ -2525,6 +2525,43 @@ def test_wave4_take_count_no_field_ref_parity(t : T?) { t |> equal(target_wave4_take_count_no_field_ref_fold(), 5, "take(5).count() splice must still return 5") } +[export, marker(no_coverage)] +def target_wave4_all_fields_via_access_fold() : int { + // All 3 fields referenced via field access (chained single-field where_s — same all-fields trigger as a compound predicate, but each peeled body has a single field-access node, matching the predicate shape the rest of the suite uses). + return _fold(from_decs_template(type) + ._where(_.brand >= 0) + ._where(_.price >= 0) + ._where(_.year > 0) + .count()) +} + +[test] +def test_wave4_all_fields_via_access_parity(t : T?) { + fixture_wave4(10) + // brand = i%4 (always >= 0), price = i*10 (always >= 0), year = 2000+(i%25) (always > 0) → all 10 rows match. + t |> equal(target_wave4_all_fields_via_access_fold(), 10, "all-fields-access predicate must return all rows") +} + +[test] +def test_wave4_all_fields_via_access_splice_shape(t : T?) { + ast_gc_guard() { + var func = find_module_function_via_rtti(compiling_module(), @@target_wave4_all_fields_via_access_fold) + t |> success(func != null, "RTTI must resolve target") + if (func == null) return + var body_expr : ExpressionPtr + let r = qmatch_function(func) $() { + return <- $e(body_expr) + } + t |> success(r.matched, "qmatch must capture body") + // No slots pruned (all 3 fields read), but the bind is elided and body references iter vars directly. + t |> equal(describe_count(body_expr, "get_ro"), 3, "all 3 get_ros present (no slot pruning)") + t |> equal(describe_count(body_expr, "decs_tup"), 0, "named-tuple bind elided when no whole-var ref") + t |> success(describe_count(body_expr, "wave4_brand") >= 2, "brand iter var read directly (slot + body ref)") + t |> success(describe_count(body_expr, "wave4_price") >= 2, "price iter var read directly") + t |> success(describe_count(body_expr, "wave4_year") >= 2, "year iter var read directly") + } +} + // ── C2: buffer-required planners (order/reverse/distinct) ────────────────────── [export, marker(no_coverage)] From 75df958f06dc6a0066e3a005b069f50b89ebda49 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Sat, 23 May 2026 10:07:41 -0700 Subject: [PATCH 4/7] PR #2834 Copilot C3: fix multi-archetype reverse_take test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The original test created one A-only and one B-only archetype, but the query `from_decs_template(type)` only matches A — the B archetype never enters for_each_archetype, so the cross- archetype skipping arm wasn't actually exercised. Now creates two MATCHING archetypes: both have `rev2_id` (so both satisfy the query), but the second group also has the rev2_b_* extras which lands it in a separate archetype class. With A1=4 + A2=5 → totalCount=9, take(3) → skip=6: A1 (size 4) skipped via the size-sum arm, A2 enters with skipsLeft=2 → drains 2, pushes 3, returns true. Exercises both the whole-archetype-skip and partial-archetype + early-exit paths. Co-Authored-By: Claude Opus 4.7 --- tests/linq/test_linq_from_decs.das | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tests/linq/test_linq_from_decs.das b/tests/linq/test_linq_from_decs.das index 8671766de..2d20b6811 100644 --- a/tests/linq/test_linq_from_decs.das +++ b/tests/linq/test_linq_from_decs.das @@ -3198,13 +3198,13 @@ struct RevTakeMultiArchA { [decs_template(prefix = "rev2_b_")] struct RevTakeMultiArchB { - bid : int + bid : int bval : int } [test] def test_reverse_take_multi_archetype_parity(t : T?) { - // Creates two archetypes (A with 4 rows, B with 5 rows). A 9-row sum across archetypes; reverse + take(3) must return 3 rows. The unrelated B archetype is filtered out by the from_decs_template request anyway — but its presence in the decs state ensures for_each_archetype iterates across more than one archetype-class even when the request matches just one. + // Creates two MATCHING archetypes — both have `rev2_id` so both satisfy `from_decs_template(type)`. Group A1: just rev2_id. Group A2: rev2_id + extra rev2_b_* components — same query matches, but the extra components land it in a separate archetype. With A1=4 + A2=5 → totalCount=9, take(3) → skip=6: A1 (size 4) skipped entirely via the size-sum arm, A2 enters with skipsLeft=2 → drains 2 then pushes 3 then returns true. Exercises both the whole-archetype-skip and partial-archetype skip-counter + early-exit. restart() for (i in 0..4) { create_entity() @(eid, cmp) { @@ -3215,22 +3215,25 @@ def test_reverse_take_multi_archetype_parity(t : T?) { for (i in 0..5) { create_entity() @(eid, cmp) { cmp.eid := eid - cmp.rev2_b_bid := i + 100 + cmp.rev2_id := i + 100 + cmp.rev2_b_bid := i + 1000 cmp.rev2_b_bval := i * 10 } } commit() let got <- _fold(from_decs_template(type).reverse().take(3).to_array()) - t |> equal(got.length(), 3, "reverse+take(3) on multi-archetype world returns 3 rows from the matching archetype only") + t |> equal(got.length(), 3, "reverse+take(3) on two matching archetypes returns 3 rows") var idSet : table for (r in got) { idSet |> insert(r.id) } - // All returned ids must be from the A archetype (0..3); none from B (100..104). + t |> equal(length(idSet), 3, "all 3 ids are distinct") + // All returned ids must be from one of the two matching archetypes: A1 (ids 0..3) or A2 (ids 100..104). The exact subset depends on archetype iteration order (a decs-internal detail), so we just check membership. for (r in got) { - t |> success(r.id >= 0 && r.id < 4, "row id {r.id} must be from RevTakeMultiArchA archetype") + let inA1 = r.id >= 0 && r.id < 4 + let inA2 = r.id >= 100 && r.id < 105 + t |> success(inA1 || inA2, "row id {r.id} must be from one of the two matching archetypes") } - t |> equal(length(idSet), 3, "all 3 ids are distinct") } [test] From 7a7991be373d056df3ed3c20014342025576a962 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Sat, 23 May 2026 10:26:27 -0700 Subject: [PATCH 5/7] daslib: drop dead `var res` in json_boost.from_JV MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR b42a9524e (daslib: tighten unsafe usage) condensed the enum-name lookup loop to a single `return unsafe(reinterpret(ef.value)) if (name == ef.name)`, which left `var res : auto(EnumTT) = default` as dead code — only used to bind `EnumTT` (an alias of `EnumT` since `default` has type `EnumT`). Drop the dead var and inline the rename: `reinterpret` → `reinterpret`. No behavior change. Verified by `tests/json/test_json_edge.das::test_enum_json` (128 tests pass; covers both the string-name path and the round-trip path at :630/:634). Co-Authored-By: Claude Opus 4.7 --- daslib/json_boost.das | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/daslib/json_boost.das b/daslib/json_boost.das index 9fab0a3ab..84902d67c 100644 --- a/daslib/json_boost.das +++ b/daslib/json_boost.das @@ -233,10 +233,9 @@ def from_JV(v : JsonValue const explicit?; ent : auto(EnumT); defV : EnumT = def if (v == null || !((v.value is _string) || (v.value is _number) || (v.value is _longint))) return defV if (v.value is _string) { let name = v.value as _string - var res : auto(EnumTT) = default let ti = typeinfo rtti_typeinfo(type) for (ef in *ti.enumType) { - return unsafe(reinterpret(ef.value)) if (name == ef.name) + return unsafe(reinterpret(ef.value)) if (name == ef.name) } panic("not a valid enumeration {name} in {typeinfo typename(type)}") } else { From 3e72e39703cdd71d39995537f5e39d5aa80747ea Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Sat, 23 May 2026 11:58:55 -0700 Subject: [PATCH 6/7] linq_fold: bounded-heap / streaming-min for plan_decs_order_family MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For `from_decs._order_by(KEY).take(N).to_array()` with an inline-able key, emit a bounded heap of size N maintained during the for_each_archetype walk instead of materializing the full M-element buffer and then partial-sorting. For `from_decs._order_by(KEY).first()` / `first_or_default(d)`, emit a streaming-min: single `best` + `seen` flag instead of buf + min_by. Cuts 100K push_clones (full DecsCar struct + string alloc) down to ~N push_clones (only when the element wins the heap test). Bench results (INTERP, 100K rows, ns/op): sort_first_m4 72.0 → 23.9 (3.0×, BEATS m3f 41.3) sort_take_m4 52.1 → 30.6 (1.7×, +7.9 vs m3f 22.7) order_take_desc_m4 52.1 → 30.5 (1.7×, +8.2 vs m3f 22.3) select_where_order_take_m4 35.1 → 25.1 (1.4×, +3.5 vs m3f 21.6) Adds two thin re-exports in linq.das (`spliced_push_heap`, `spliced_pop_heap`) so the splice can call sort_boost::{push,pop}_heap from any user module without requiring sort_boost directly. The bounded-heap less-test uses a new `make_inline_less_call` helper that templates the key body twice with direct operand expressions — no block dispatch on the hot path. Co-Authored-By: Claude Opus 4.7 --- daslib/linq.das | 10 +++ daslib/linq_fold.das | 121 ++++++++++++++++++++++++++++- tests/linq/test_linq_from_decs.das | 16 ++-- 3 files changed, 137 insertions(+), 10 deletions(-) diff --git a/daslib/linq.das b/daslib/linq.das index bb7a43270..5634b95e8 100644 --- a/daslib/linq.das +++ b/daslib/linq.das @@ -601,6 +601,16 @@ def top_n_by_with_cmp(var a : iterator; n : int; cmp : block<(v1 : TT return <- buf } +def public spliced_push_heap(var buf : array; cmp : block<(x, y : TT) : bool>) { + //! Thin re-export of sort_boost::push_heap so plan_decs_order_family's bounded-heap splice can call it via _:: from any user module without requiring sort_boost directly. + sort_boost::push_heap(buf, cmp) +} + +def public spliced_pop_heap(var buf : array; cmp : block<(x, y : TT) : bool>) { + //! Thin re-export of sort_boost::pop_heap for the bounded-heap splice (see spliced_push_heap). + sort_boost::pop_heap(buf, cmp) +} + def unique_key(a) { //! generates unique key of workhorse type for the value static_if (typeinfo is_workhorse(a)) { diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das index 143a8b37d..f953efea3 100644 --- a/daslib/linq_fold.das +++ b/daslib/linq_fold.das @@ -1200,12 +1200,35 @@ def private try_make_inline_cmp(orderKey : Expression?; orderName : string; } else { cmpExpr = qmacro(_::less($e(b1), $e(b2))) } - // Emit untyped block args (`$(v1, v2) { ... }`). The typer infers v1/v2 types from + // Emit untyped block args (`$(v1, v2) { ... }`); typer infers v1/v2 types from the call site (top_n_by_with_cmp / order_inplace cmp-param signatures). return qmacro($(v1, v2) { return $e(cmpExpr) }) } +def private make_inline_less_call(orderKey : Expression?; orderName : string; + var lhsExpr, rhsExpr : Expression?; at : LineInfo) : Expression? { + // Direct-call mirror of try_make_inline_cmp: builds `less(KEY_BODY[lhsExpr], KEY_BODY[rhsExpr])` (or flipped for descending) for inline use without a block dispatch — needed for the bounded-heap less-test where invoking a block on a 100K-element hot path costs ~5ns/op extra. + if (orderKey == null || !(orderKey is ExprMakeBlock)) return null + var mblk = orderKey as ExprMakeBlock + var blk = mblk._block as ExprBlock + if (blk.arguments |> length != 1 || blk.list |> length != 1 + || !(blk.list[0] is ExprReturn)) return null + var ret = blk.list[0] as ExprReturn + if (ret.subexpr == null || has_sideeffects(ret.subexpr)) return null + let argName = string(blk.arguments[0].name) + var b1 = clone_expression(ret.subexpr) + var b2 = clone_expression(ret.subexpr) + var r1 : Template + r1 |> replaceVariable(argName, lhsExpr) + var r2 : Template + r2 |> replaceVariable(argName, rhsExpr) + apply_template(r1, b1.at, b1) + apply_template(r2, b2.at, b2) + return qmacro(_::less($e(b2), $e(b1))) if (orderName == "order_by_descending") + return qmacro(_::less($e(b1), $e(b2))) +} + [macro_function] def private plan_order_family(var expr : Expression?) : Expression? { var (top, calls) = flatten_linq(expr) @@ -4424,6 +4447,99 @@ def private plan_decs_order_family(var expr : Expression?) : Expression? { if (hasKey) { inlineCmp = try_make_inline_cmp(orderKey, orderName, elemType, at) } + // Bounded-heap / streaming-min fast paths: when the key is inline-able, skip the materialize-all + min_by/top_n pattern in favor of a per-walk state (single best for first[_or_default], heap of size N for take). Slashes 100K push_clones to ~N — the rest of the elements only pay a cmp. + let useBoundedHeap = takeExpr != null && inlineCmp != null && firstName == "" + let useStreamingMin = firstName != "" && inlineCmp != null + let archName = bridge.archName + let needIterWrap = expr._type.isIterator + var emission : Expression? + if (useStreamingMin) { + let bestName = qn("decs_best", at) + let seenName = qn("decs_seen", at) + var lessTest = make_inline_less_call(orderKey, orderName, + qmacro($i(tupName)), qmacro($i(bestName)), at) + var perElement : Expression? = qmacro_expr() { + if (!$i(seenName)) { + $i(bestName) := $i(tupName) + $i(seenName) = true + } elif ($e(lessTest)) { + $i(bestName) := $i(tupName) + } + } + if (whereCond != null) { + perElement = qmacro_expr() { + if ($e(whereCond)) { + $e(perElement) + } + } + } + var forExprNode = build_decs_inner_for_pruned(bridge, tupName, perElement, at) + if (firstName == "first") { + emission = qmacro(invoke($() : $t(elemType) { + var $i(bestName) = default<$t(elemType)> + var $i(seenName) = false + for_each_archetype($e(bridge.reqHashExpr), $e(bridge.erqExpr), $($i(archName) : Archetype) { + $e(forExprNode) + }) + panic("sequence contains no elements") if (!$i(seenName)) + return $i(bestName) + })) + } else { + let dBindName = qn("order_d", at) + emission = qmacro(invoke($() : $t(elemType) { + let $i(dBindName) = $e(firstDefaultExpr) + var $i(bestName) = default<$t(elemType)> + var $i(seenName) = false + for_each_archetype($e(bridge.reqHashExpr), $e(bridge.erqExpr), $($i(archName) : Archetype) { + $e(forExprNode) + }) + return $i(bestName) if ($i(seenName)) + return $i(dBindName) + })) + } + return finalize_decs_emission(emission, at, false) + } + if (useBoundedHeap) { + let takeNName = qn("decs_take_n", at) + // Direct less-test on the hot path: `less(KEY[decs_tup], KEY[buf[0]])` inlined, no block dispatch. + var lessTest = make_inline_less_call(orderKey, orderName, + qmacro($i(tupName)), qmacro($i(bufName)[0]), at) + var perElement : Expression? = qmacro_expr() { + if (length($i(bufName)) < $i(takeNName)) { + $i(bufName) |> push_clone($i(tupName)) + _::spliced_push_heap($i(bufName), $e(inlineCmp)) + } elif ($e(lessTest)) { + _::spliced_pop_heap($i(bufName), $e(inlineCmp)) + $i(bufName)[length($i(bufName)) - 1] := $i(tupName) + _::spliced_push_heap($i(bufName), $e(inlineCmp)) + } + } + if (whereCond != null) { + perElement = qmacro_expr() { + if ($e(whereCond)) { + $e(perElement) + } + } + } + var forExprNode = build_decs_inner_for_pruned(bridge, tupName, perElement, at) + var bhStmts : array + bhStmts |> reserve(7) + bhStmts |> push_from <| qmacro_block_to_array() { + let $i(takeNName) = $e(takeExpr) + var $i(bufName) : array<$t(elemType)> + return <- $i(bufName) if ($i(takeNName) <= 0) + $i(bufName) |> reserve($i(takeNName)) + for_each_archetype($e(bridge.reqHashExpr), $e(bridge.erqExpr), $($i(archName) : Archetype) { + $e(forExprNode) + }) + _::order_inplace($i(bufName), $e(inlineCmp)) + return <- $i(bufName) + } + emission = qmacro(invoke($() : array<$t(elemType)> { + $b(bhStmts) + })) + return finalize_decs_emission(emission, at, needIterWrap) + } var perElement : Expression? = qmacro_expr() { $i(bufName) |> push_clone($i(tupName)) } @@ -4435,7 +4551,6 @@ def private plan_decs_order_family(var expr : Expression?) : Expression? { } } var forExprNode = build_decs_inner_for_pruned(bridge, tupName, perElement, at) - let archName = bridge.archName var bodyStmts : array bodyStmts |> reserve(5) bodyStmts |> push_from <| qmacro_block_to_array() { @@ -4444,8 +4559,6 @@ def private plan_decs_order_family(var expr : Expression?) : Expression? { $e(forExprNode) }) } - let needIterWrap = expr._type.isIterator - var emission : Expression? if (firstName == "first") { // order + first → min/max on buffer. Empty buf must panic to match eager `first()` semantics. bodyStmts |> push <| qmacro_expr() { diff --git a/tests/linq/test_linq_from_decs.das b/tests/linq/test_linq_from_decs.das index 2d20b6811..10e25b6e4 100644 --- a/tests/linq/test_linq_from_decs.das +++ b/tests/linq/test_linq_from_decs.das @@ -1987,8 +1987,10 @@ def test_unroll5d_order_by_take_splice_shape(t : T?) { t |> equal(describe_count(body_expr, "for_each_archetype_find"), 0, "order family uses for_each_archetype (no early-exit needed)") // Buffer hoisted ABOVE for_each_archetype so it survives the archetype walk. t |> success(describe_count(body_expr, "decs_buf") >= 2, "decs_buf declared + populated") - // top_n_by call replaces the array-side to_array / sort sequence. - t |> equal(describe_count(body_expr, "top_n_by"), 1, "splice dispatches to top_n_by") + // Bounded-heap emit: spliced_push_heap appears twice (fill + replace), spliced_pop_heap once (replace). top_n_by is no longer used. + t |> equal(describe_count(body_expr, "spliced_push_heap"), 2, "bounded-heap fill + replace each call spliced_push_heap") + t |> equal(describe_count(body_expr, "spliced_pop_heap"), 1, "bounded-heap replace calls spliced_pop_heap once") + t |> equal(describe_count(body_expr, "top_n_by"), 0, "bounded-heap path does NOT call top_n_by") } } @@ -2005,10 +2007,12 @@ def test_unroll5d_order_by_first_splice_shape(t : T?) { t |> success(r.matched, "qmatch must capture body") t |> equal(describe_count(body_expr, "to_sequence"), 0, "order_by+first splice must NOT fall to tier-2 to_sequence") t |> equal(describe_count(body_expr, "for_each_archetype"), 1, "exactly one for_each_archetype") - // first → min_by on the buffer (NOT top_n_by + index). - t |> equal(describe_count(body_expr, "min_by"), 1, "order_by+first emits min_by call") - t |> equal(describe_count(body_expr, "top_n_by"), 0, "order_by+first should NOT use top_n_by") - // Empty-buffer panic guard present. + // Streaming-min emit: best + seen state vars, no buffer + min_by/top_n. + t |> equal(describe_count(body_expr, "min_by"), 0, "streaming-min path does NOT call min_by") + t |> equal(describe_count(body_expr, "top_n_by"), 0, "streaming-min path does NOT call top_n_by") + t |> success(describe_count(body_expr, "decs_best") >= 2, "best state var declared + updated") + t |> success(describe_count(body_expr, "decs_seen") >= 2, "seen flag declared + updated") + // Empty-source panic guard preserved. t |> equal(describe_count(body_expr, "sequence contains no elements"), 1, "panic-on-empty guard for first()") } } From 761f605371b2f647a93dddc55928c00b7010016c Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Sat, 23 May 2026 12:27:10 -0700 Subject: [PATCH 7/7] PR #2837 Copilot: drop reserve(takeN) on bounded-heap buf MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit reserve(takeN) for a user-supplied take(N) on a decs source of unknown cardinality risks a large upfront allocation when N >> actual source size — same OOM trap that top_n_by_with_cmp's iterator variant already documents (linq.das:482-484). The fill phase grows geometrically to min(N, M) in O(log) reallocs anyway, and at our common N (≤100) the bench delta is in the noise floor (sort_take_m4 30.6 → 29.8, within measurement noise). Co-Authored-By: Claude Opus 4.7 --- daslib/linq_fold.das | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/daslib/linq_fold.das b/daslib/linq_fold.das index f953efea3..ac98b43d0 100644 --- a/daslib/linq_fold.das +++ b/daslib/linq_fold.das @@ -4524,11 +4524,11 @@ def private plan_decs_order_family(var expr : Expression?) : Expression? { var forExprNode = build_decs_inner_for_pruned(bridge, tupName, perElement, at) var bhStmts : array bhStmts |> reserve(7) + // No `reserve(takeN)` on the bounded buf — matches the policy in linq.das top_n_by_with_cmp iterator variant. Caller may pass takeN >> actual source size, and the decs cardinality is unknown ahead of the walk; pre-reserving N slots would risk a large upfront allocation for no win (fill phase grows geometrically to min(N, M) in O(log) reallocs anyway). bhStmts |> push_from <| qmacro_block_to_array() { let $i(takeNName) = $e(takeExpr) var $i(bufName) : array<$t(elemType)> return <- $i(bufName) if ($i(takeNName) <= 0) - $i(bufName) |> reserve($i(takeNName)) for_each_archetype($e(bridge.reqHashExpr), $e(bridge.erqExpr), $($i(archName) : Archetype) { $e(forExprNode) })