From fcb6481579b2a964c5cd23c0f005b1d74ca71f72 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Thu, 7 May 2026 02:26:49 -0700 Subject: [PATCH 1/4] benchctl: clargs + sqlite_linq rewrite, sql/ benchmarks, where+count fold benchctl - replace hand-rolled flags.das with daslib/clargs (single struct, positional command + files, structured filters) - replace raw sqlite3_* calls in bench_sql.das with [sql_table] Benchmark + with_sqlite + db |> insert(rows) bulk + _sql(... |> select_from |> _where(...)) for query and compare paths - structured filter flags (--commit / --tag / --old-commit / --old-tag / --new-commit / --new-tag) replace user-supplied --select raw-WHERE; flag composition uses _sql || empty-string short-circuit so one call site covers all flag combinations - isolate clargs in bench_args.das (private require) to work around an Option ambiguity bug when daslib/clargs and sqlite/sqlite_boost are in the same module (filed as #2598) - delete bench_sql.das, flags.das; add bench_args.das, bench_table.das - README rewritten for the structured-flag surface benchmarks/sql/ - new comparison suite mirroring tests/dasSQLITE/parity_check_*.das shape but oriented to throughput: _common.das fixture + select_where.das, select_where_order_take.das, count_aggregate.das - 6 modes per file: m1m / m1d (_sql, mem/disk), m2m / m2d (no _sql, select_from materializes, mem/disk), m3 (array LINQ), m3f (_fold fused array LINQ); disk DBs created+deleted outside timed block - benchmarks/README.md adds the sql/ section daslib/linq_boost - new fold_where_count + ["where_", "count"] FoldSequence: emits a single-pass invoke($(source) { var n; for it in source; if pred(it) n++; return n }, top) with the predicate spliced via fold_linq_cond - eliminates intermediate filter array AND block-call overhead - count_aggregate m3f: 25.5 -> 8 ns/op (INTERP), 3 ns/op (JIT); zero alloc; ~5x faster than _sql at 10K rows in memory Co-Authored-By: Claude Opus 4.7 (1M context) --- benchmarks/README.md | 20 + benchmarks/sql/_common.das | 46 +++ benchmarks/sql/count_aggregate.das | 118 ++++++ benchmarks/sql/select_where.das | 114 ++++++ benchmarks/sql/select_where_order_take.das | 132 +++++++ daslib/linq_boost.das | 37 ++ utils/benchctl/README.md | 85 ++--- utils/benchctl/bench_args.das | 94 +++++ utils/benchctl/bench_sql.das | 82 ---- utils/benchctl/bench_table.das | 23 ++ utils/benchctl/benchstat.das | 28 +- utils/benchctl/flags.das | 51 --- utils/benchctl/main.das | 416 ++++++++------------- utils/benchctl/table_fmt.das | 2 + utils/benchctl/utils.das | 4 +- 15 files changed, 784 insertions(+), 468 deletions(-) create mode 100644 benchmarks/sql/_common.das create mode 100644 benchmarks/sql/count_aggregate.das create mode 100644 benchmarks/sql/select_where.das create mode 100644 benchmarks/sql/select_where_order_take.das create mode 100644 utils/benchctl/bench_args.das delete mode 100644 utils/benchctl/bench_sql.das create mode 100644 utils/benchctl/bench_table.das delete mode 100644 utils/benchctl/flags.das diff --git a/benchmarks/README.md b/benchmarks/README.md index 754d576ae1..b28edb0ffc 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -57,3 +57,23 @@ Every `.das` benchmark file in this directory tree is listed below, grouped by s | File | Description | |---|---| | `bench_v_ldu.das` | Fusion-engine `Op2At` array-indexed read at sizeof(T) ∈ {4,8,12,16} — int, int64, float3, float4. Used to compare DAS_FUSION=0 vs current `DAS_LDU_WORKHORSE` ladder vs `v_zero+memcpy(sizeof(CTYPE))` | + +## sql/ + +6-mode comparison: `_sql` macro vs `select_from` without macro, across `:memory:` and on-disk SQLite, plus pure in-memory `array` LINQ in materializing and `_fold`-fused forms. Mirrors the `tests/dasSQLITE/parity_check_*.das` pattern but oriented to throughput. Disk DBs are populated and deleted outside the timed block. + +| Mode | Source | Macro | +|---|---|---| +| `m1m` | `:memory:` SQLite | `_sql` — compile-time SQL emission | +| `m1d` | on-disk `.db` file | `_sql` — compile-time SQL emission | +| `m2m` | `:memory:` SQLite | none — `select_from` materializes the full table, then in-memory LINQ | +| `m2d` | on-disk `.db` file | none — same as `m2m` over disk | +| `m3` | pre-populated `array` | none — chain materializes intermediate filter/sort arrays | +| `m3f` | pre-populated `array` | `_fold` from `daslib/linq_boost` — fuses the chain into a single pass, in-place where possible | + +| File | Description | +|---|---| +| `_common.das` | Shared `Car` `[sql_table]` + `fixture_db` / `fixture_array` / `disk_db_setup` / `disk_db_cleanup` (not a benchmark) | +| `select_where.das` | Filter chain — `_where(_.price > 500)` over 10K rows. Modest asymmetry; M2/M3 must walk every row. | +| `select_where_order_take.das` | Filter + sort + limit — `_where \|> _order_by(_.price) \|> take(10)`. SQL ORDER BY + LIMIT bounds work; M2/M3 sort the full filtered set. | +| `count_aggregate.das` | Aggregate — `count()` after `_where`. SQL pushes `COUNT(*)` to the engine returning one row; M2/M3 materialize the full filtered array then count it. Highest-asymmetry chain. | diff --git a/benchmarks/sql/_common.das b/benchmarks/sql/_common.das new file mode 100644 index 0000000000..7ee5e6d498 --- /dev/null +++ b/benchmarks/sql/_common.das @@ -0,0 +1,46 @@ +options gen2 +options persistent_heap + +require daslib/sql public +require daslib/linq_boost public +require sqlite/sqlite_boost public +require sqlite/sqlite_linq public +require dastest/testing_boost public +require daslib/fio public + +[sql_table(name = "Cars")] +struct Car { + @sql_primary_key id : int + name : string + price : int +} + +def public fixture_db(db : SqlRunner; n : int) { + db |> create_table(type) + var rows : array + rows |> resize(n) + for (i in range(n)) { + rows[i] = Car(id = i + 1, name = "Car{i}", price = (i * 37) % 1000) + } + db |> insert(rows) +} + +def public fixture_array(n : int) : array { + var arr : array + arr |> resize(n) + for (i in range(n)) { + arr[i] = Car(id = i + 1, name = "Car{i}", price = (i * 37) % 1000) + } + return <- arr +} + +def public disk_db_setup(path : string; n : int) { + remove(path) + with_sqlite(path) $(db) { + fixture_db(db, n) + } +} + +def public disk_db_cleanup(path : string) { + remove(path) +} diff --git a/benchmarks/sql/count_aggregate.das b/benchmarks/sql/count_aggregate.das new file mode 100644 index 0000000000..67926b2505 --- /dev/null +++ b/benchmarks/sql/count_aggregate.das @@ -0,0 +1,118 @@ +options gen2 +options persistent_heap + +require _common public + +let THRESHOLD = 500 + +// SQL pushes COUNT(*) to the engine returning one row. +// M2/M3 must materialize the full filtered array, then walk to count it. +// Highest-asymmetry comparison among the three benchmark chains. + +// --- M1m: _sql over :memory: --- +def run_m1m(b : B?; n : int) { + with_sqlite(":memory:") $(db) { + fixture_db(db, n) + b |> run("m1m_sql/{n}", n) { + let c = _sql(db |> select_from(type) |> _where(_.price > THRESHOLD) |> count()) + if (c == 0) { + b->failNow() + } + } + } +} + +// --- M1d: _sql over disk file --- +def run_m1d(b : B?; n : int) { + let path = "_sql_bench_cnt_m1d_{n}.db" + disk_db_setup(path, n) + with_sqlite(path) $(db) { + b |> run("m1d_sql/{n}", n) { + let c = _sql(db |> select_from(type) |> _where(_.price > THRESHOLD) |> count()) + if (c == 0) { + b->failNow() + } + } + } + disk_db_cleanup(path) +} + +// --- M2m: no _sql, :memory: source --- +def run_m2m(b : B?; n : int) { + with_sqlite(":memory:") $(db) { + fixture_db(db, n) + b |> run("m2m_db_no_sql/{n}", n) { + let c = db |> select_from(type) |> _where(_.price > THRESHOLD) |> count() + if (c == 0) { + b->failNow() + } + } + } +} + +// --- M2d: no _sql, disk source --- +def run_m2d(b : B?; n : int) { + let path = "_sql_bench_cnt_m2d_{n}.db" + disk_db_setup(path, n) + with_sqlite(path) $(db) { + b |> run("m2d_db_no_sql/{n}", n) { + let c = db |> select_from(type) |> _where(_.price > THRESHOLD) |> count() + if (c == 0) { + b->failNow() + } + } + } + disk_db_cleanup(path) +} + +// --- M3: array LINQ (materializes intermediate filter array) --- +def run_m3(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3_array/{n}", n) { + let c = arr |> _where(_.price > THRESHOLD) |> count() + if (c == 0) { + b->failNow() + } + } +} + +// --- M3f: array LINQ folded into a single fused pass --- +def run_m3f(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3f_array_fold/{n}", n) { + let c = _fold(each(arr)._where(_.price > THRESHOLD).count()) + if (c == 0) { + b->failNow() + } + } +} + +[benchmark] +def count_aggregate_10k_m1m(b : B?) { + run_m1m(b, 10000) +} + +[benchmark] +def count_aggregate_10k_m1d(b : B?) { + run_m1d(b, 10000) +} + +[benchmark] +def count_aggregate_10k_m2m(b : B?) { + run_m2m(b, 10000) +} + +[benchmark] +def count_aggregate_10k_m2d(b : B?) { + run_m2d(b, 10000) +} + +[benchmark] +def count_aggregate_10k_m3(b : B?) { + run_m3(b, 10000) +} + +[benchmark] +def count_aggregate_10k_m3f(b : B?) { + run_m3f(b, 10000) +} diff --git a/benchmarks/sql/select_where.das b/benchmarks/sql/select_where.das new file mode 100644 index 0000000000..2e8def8615 --- /dev/null +++ b/benchmarks/sql/select_where.das @@ -0,0 +1,114 @@ +options gen2 +options persistent_heap + +require _common public + +let THRESHOLD = 500 + +// --- M1m: _sql over :memory: --- +def run_m1m(b : B?; n : int) { + with_sqlite(":memory:") $(db) { + fixture_db(db, n) + b |> run("m1m_sql/{n}", n) { + let rows <- _sql(db |> select_from(type) |> _where(_.price > THRESHOLD)) + if (length(rows) == 0) { + b->failNow() + } + } + } +} + +// --- M1d: _sql over disk file --- +def run_m1d(b : B?; n : int) { + let path = "_sql_bench_m1d_{n}.db" + disk_db_setup(path, n) + with_sqlite(path) $(db) { + b |> run("m1d_sql/{n}", n) { + let rows <- _sql(db |> select_from(type) |> _where(_.price > THRESHOLD)) + if (length(rows) == 0) { + b->failNow() + } + } + } + disk_db_cleanup(path) +} + +// --- M2m: no _sql, :memory: source --- +def run_m2m(b : B?; n : int) { + with_sqlite(":memory:") $(db) { + fixture_db(db, n) + b |> run("m2m_db_no_sql/{n}", n) { + let rows <- (db |> select_from(type) |> _where(_.price > THRESHOLD)) + if (length(rows) == 0) { + b->failNow() + } + } + } +} + +// --- M2d: no _sql, disk source --- +def run_m2d(b : B?; n : int) { + let path = "_sql_bench_m2d_{n}.db" + disk_db_setup(path, n) + with_sqlite(path) $(db) { + b |> run("m2d_db_no_sql/{n}", n) { + let rows <- (db |> select_from(type) |> _where(_.price > THRESHOLD)) + if (length(rows) == 0) { + b->failNow() + } + } + } + disk_db_cleanup(path) +} + +// --- M3: array LINQ (materializing intermediate arrays) --- +def run_m3(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3_array/{n}", n) { + let rows <- (arr |> _where(_.price > THRESHOLD)) + if (length(rows) == 0) { + b->failNow() + } + } +} + +// --- M3f: array LINQ folded into a single fused pass --- +def run_m3f(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3f_array_fold/{n}", n) { + let rows <- _fold(each(arr)._where(_.price > THRESHOLD).to_array()) + if (length(rows) == 0) { + b->failNow() + } + } +} + +[benchmark] +def select_where_10k_m1m(b : B?) { + run_m1m(b, 10000) +} + +[benchmark] +def select_where_10k_m1d(b : B?) { + run_m1d(b, 10000) +} + +[benchmark] +def select_where_10k_m2m(b : B?) { + run_m2m(b, 10000) +} + +[benchmark] +def select_where_10k_m2d(b : B?) { + run_m2d(b, 10000) +} + +[benchmark] +def select_where_10k_m3(b : B?) { + run_m3(b, 10000) +} + +[benchmark] +def select_where_10k_m3f(b : B?) { + run_m3f(b, 10000) +} diff --git a/benchmarks/sql/select_where_order_take.das b/benchmarks/sql/select_where_order_take.das new file mode 100644 index 0000000000..b14fcfd428 --- /dev/null +++ b/benchmarks/sql/select_where_order_take.das @@ -0,0 +1,132 @@ +options gen2 +options persistent_heap + +require _common public + +let THRESHOLD = 500 +let TAKE_N = 10 + +// --- M1m: _sql over :memory: --- +def run_m1m(b : B?; n : int) { + with_sqlite(":memory:") $(db) { + fixture_db(db, n) + b |> run("m1m_sql/{n}", n) { + let rows <- _sql(db |> select_from(type) + |> _where(_.price > THRESHOLD) + |> _order_by(_.price) + |> take(TAKE_N)) + if (length(rows) == 0) { + b->failNow() + } + } + } +} + +// --- M1d: _sql over disk file --- +def run_m1d(b : B?; n : int) { + let path = "_sql_bench_swot_m1d_{n}.db" + disk_db_setup(path, n) + with_sqlite(path) $(db) { + b |> run("m1d_sql/{n}", n) { + let rows <- _sql(db |> select_from(type) + |> _where(_.price > THRESHOLD) + |> _order_by(_.price) + |> take(TAKE_N)) + if (length(rows) == 0) { + b->failNow() + } + } + } + disk_db_cleanup(path) +} + +// --- M2m: no _sql, :memory: source --- +def run_m2m(b : B?; n : int) { + with_sqlite(":memory:") $(db) { + fixture_db(db, n) + b |> run("m2m_db_no_sql/{n}", n) { + let rows <- (db |> select_from(type) + |> _where(_.price > THRESHOLD) + |> _order_by(_.price) + |> take(TAKE_N)) + if (length(rows) == 0) { + b->failNow() + } + } + } +} + +// --- M2d: no _sql, disk source --- +def run_m2d(b : B?; n : int) { + let path = "_sql_bench_swot_m2d_{n}.db" + disk_db_setup(path, n) + with_sqlite(path) $(db) { + b |> run("m2d_db_no_sql/{n}", n) { + let rows <- (db |> select_from(type) + |> _where(_.price > THRESHOLD) + |> _order_by(_.price) + |> take(TAKE_N)) + if (length(rows) == 0) { + b->failNow() + } + } + } + disk_db_cleanup(path) +} + +// --- M3: array LINQ (materializing intermediate arrays) --- +def run_m3(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3_array/{n}", n) { + let rows <- (arr |> _where(_.price > THRESHOLD) + |> _order_by(_.price) + |> take(TAKE_N)) + if (length(rows) == 0) { + b->failNow() + } + } +} + +// --- M3f: array LINQ folded into a single fused pass --- +def run_m3f(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3f_array_fold/{n}", n) { + let rows <- _fold(each(arr)._where(_.price > THRESHOLD) + ._order_by(_.price) + .take(TAKE_N) + .to_array()) + if (length(rows) == 0) { + b->failNow() + } + } +} + +[benchmark] +def select_where_order_take_10k_m1m(b : B?) { + run_m1m(b, 10000) +} + +[benchmark] +def select_where_order_take_10k_m1d(b : B?) { + run_m1d(b, 10000) +} + +[benchmark] +def select_where_order_take_10k_m2m(b : B?) { + run_m2m(b, 10000) +} + +[benchmark] +def select_where_order_take_10k_m2d(b : B?) { + run_m2d(b, 10000) +} + +[benchmark] +def select_where_order_take_10k_m3(b : B?) { + run_m3(b, 10000) +} + +[benchmark] +def select_where_order_take_10k_m3f(b : B?) { + run_m3f(b, 10000) +} diff --git a/daslib/linq_boost.das b/daslib/linq_boost.das index 8b7ee9866e..cc95f369a5 100644 --- a/daslib/linq_boost.das +++ b/daslib/linq_boost.das @@ -956,6 +956,11 @@ var private g_foldSeq = [ // those are applied in order calls = ["distinct", "order" ], folder = @@fold_order_distinct ), +// where + count (single-pass count, no intermediate filter array) + FoldSequence( + calls = ["where_", "count"], + folder = @@fold_where_count + ), // select and where FoldSequence( calls = ["where_", "select" ], @@ -1030,6 +1035,38 @@ def private fold_where(argIndex : int; var topValue : Expression?; var blk : Exp return append_comprehension(argIndex, topValue, comprehension, blk, calls[0]._0.at) } +[macro_function] +def private fold_where_count(argIndex : int; var topValue : Expression?; var blk : ExprBlock?; var calls : array>) : Expression? { + //! folds `_where(p) |> count()` into a single-pass loop with the predicate inlined — no intermediate filter array, no block-call overhead + var eWhere = calls[0]._0 + let srcName = "`source`{argIndex}`{eWhere.at.line}`{eWhere.at.column}" + let itName = "`it`{argIndex}`{eWhere.at.line}`{eWhere.at.column}" + let nName = "`n`{argIndex}`{eWhere.at.line}`{eWhere.at.column}" + var whereCond = fold_linq_cond(eWhere.arguments[1], itName) + var fusedCall : Expression? = qmacro(invoke($($i(srcName) : typedecl($e(topValue)) - const) { + var $i(nName) = 0 + for ($i(itName) in $i(srcName)) { + if ($e(whereCond)) { + $i(nName) ++ + } + } + return $i(nName) + }, $e(topValue))) + fusedCall.force_at(calls[0]._0.at) + fusedCall.force_generated(true) + let newArgName = "pass_{argIndex}" + blk.list |> emplace_new <| qmacro_expr() { + var $i(newArgName) = $e(fusedCall) + } + (blk.list.back() as ExprLet).variables[0].flags |= VariableFlags.can_shadow + if (argIndex != 0) { + blk.list |> emplace_new <| qmacro_expr() { + delete $e(topValue) + } + } + return qmacro($i(newArgName)) +} + [macro_function] def private fold_select(argIndex : int; var topValue : Expression?; var blk : ExprBlock?; var calls : array>) : Expression? { //! folds select into a single comprehension diff --git a/utils/benchctl/README.md b/utils/benchctl/README.md index 851b203c65..55fa4ba1a2 100644 --- a/utils/benchctl/README.md +++ b/utils/benchctl/README.md @@ -13,7 +13,7 @@ benchctl stores benchmark output in a local SQLite database and provides statist Key capabilities: - Insert benchmark JSON output files into a persistent database, tagged by commit hash (and optionally custom tags) -- Query the database with raw SQL conditions (e.g. selecting by name or tags) +- Query the database with structured filter flags (`--commit`, `--tag`) - Compare two sets of results - Compute geometric mean deltas across all benchmarks in a comparison @@ -31,7 +31,7 @@ Key capabilities: daslang utils/benchctl/main.das -- [options...] ``` -All commands accept `--db ` to specify the database file (default: `benchdata.db`) and `--colors false` to disable ANSI color output. +All commands accept `--db ` to specify the database file (default: `benchdata.db`) and `--no-color` to disable ANSI color output. Consider using an explicit name for a long-term database while using the default `benchdata.db` as a scratch db you can reset between the experiments. @@ -44,8 +44,6 @@ Initializes (or reinitializes) the benchmark database. > **Warning:** drops all existing data. ``` -# The name is benchdata.db by default, so this explicit parameter -# is only used for demonstrative purposes daslang utils/benchctl/main.das -- reset --db benchdata.db ``` @@ -74,7 +72,7 @@ Options: Input files must contain newline-delimited JSON records as produced by the dastest benchmark runner. Non-JSON lines are silently skipped. ``` -# Adds all samples from result1.txt and result2.txt tagging them with 2 prodived tags, +# Adds all samples from result1.txt and result2.txt tagging them with 2 provided tags, # the commit hash will be "git rev-parse HEAD" (use --commit to override that) daslang utils/benchctl/main.das -- insert --tag example1 --tag foo result1.txt result2.txt ``` @@ -99,10 +97,10 @@ daslang utils/benchctl/main.das -- insert results.txt ### `query` -Displays benchmark records from the database. +Displays benchmark records from the database. Filters compose with AND. ``` -daslang utils/benchctl/main.das -- query [--db benchdata.db] [--select ] +daslang utils/benchctl/main.das -- query [--db benchdata.db] [--commit ] [--tag ] ``` Options: @@ -110,9 +108,10 @@ Options: | Flag | Description | |------|-------------| | `--db ` | Database file path | -| `--select ` | SQL `WHERE` clause to filter records | +| `--commit ` | Filter to records with this exact commit hash | +| `--tag ` | Filter to records carrying this tag (uses LIKE under the hood — empty `--tag` means no filter) | -This is mostly needed to debug the selection queries before using a more useful `compare` command. +This is mostly needed to debug the selection filters before using a more useful `compare` command. **Examples:** @@ -121,45 +120,18 @@ This is mostly needed to debug the selection queries before using a more useful daslang utils/benchctl/main.das -- query # Show records for a specific commit -daslang utils/benchctl/main.das -- query --select "commit_hash='abc12345'" +daslang utils/benchctl/main.das -- query --commit abc12345 -# Show only string-allocation-heavy benchmarks -daslang utils/benchctl/main.das -- query --select "string_allocs > 0" -``` - -You can use any columns from the `benchmarks` table to do the filtering. - -> Keep in mind: the table contains the samples, they are joined and analyzed together during the processing. +# Show records with a tag +daslang utils/benchctl/main.das -- query --tag before +# Combine filters (AND) +daslang utils/benchctl/main.das -- query --commit abc12345 --tag before ``` -id INTEGER -- an autoincrement ID of the sample - -commit_hash TEXT -- a git commit hash -tags TEXT -- a list of tags bundled inside a string (see below) -insert_date INTEGER -- the date of the "insert" command being executed for this sample -full_name TEXT -- a full sample's benchmark identifier, "{name}/{sub_name}" -name TEXT -- a benchmark's function name -sub_name TEXT -- a benchmark's "run" argument which specifies the subtest +If you need a one-off filter that the structured flags don't cover (e.g. `WHERE string_allocs > 0`), open the DB directly with the `sqlite3` shell — `benchctl` no longer accepts arbitrary SQL via the CLI. -mode TEXT -- the execution mode ("JIT", "INTERP", or "AOT") - -n INTEGER -- how many times the benchmarking function was executed - -time_ns INTEGER -- nanosecs per every operation run (time) -allocs INTEGER -- a number of non-string heap allocs -heap_bytes INTEGER -- a number of heap bytes allocated (excluding the string bytes) -string_allocs INTEGER -- like allocs, but only for heap strings -string_heap_bytes INTEGER -- like heap_bytes, but only for heap strings -``` - -Tags are stored as `[tag1][tag2]` strings, so you can filter by tag with `LIKE`: - -```sh -daslang utils/benchctl/main.das -- query --select "tags LIKE '%[before]%'" -``` - -> "Has no tag" condition can be implemented using the `NOT LIKE` operation. +> Stored columns: `id`, `commit_hash`, `tags`, `insert_date`, `full_name`, `name`, `sub_name`, `mode`, `n`, `time_ns`, `allocs`, `heap_bytes`, `string_allocs`, `string_heap_bytes`. Tags are stored as `[tag1][tag2]` strings. --- @@ -176,27 +148,29 @@ Options: | Flag | Description | |------|-------------| | `--db ` | Database file path | -| `--select_old ` | SQL `WHERE` clause for the baseline (old) results | -| `--select_new ` | SQL `WHERE` clause for the new results | +| `--old-commit ` | Baseline commit filter | +| `--old-tag ` | Baseline tag filter | +| `--new-commit ` | New commit filter | +| `--new-tag ` | New tag filter | | `--s to>` | Regex rename: rewrite old benchmark names to match new names | -| `--colors false` | Disable colored output | +| `--no-color` | Disable colored output | -> Both "select" arguments are abbreviated for convenience to `--old` and `--new`, but you're still encouraged to create an alias/shortcut for the most common use cases you might have. +Each side's filters compose with AND. Empty side filters mean "all rows for that side" — usually you want at least one filter per side. **Example - compare two commits:** ```sh daslang utils/benchctl/main.das -- compare \ - --select_old "commit_hash='abc12345'" \ - --select_new "commit_hash='def67890'" + --old-commit abc12345 \ + --new-commit def67890 ``` **Example - compare using tags:** ```sh daslang utils/benchctl/main.das -- compare \ - --select_old "tags LIKE '%[before]%'" \ - --select_new "tags LIKE '%[after]%'" + --old-tag before \ + --new-tag after ``` Both sample sets (old and new) will be compared over the matching `full_name`. This means only the same benchmark results (but across different revisions) can be compared. Unless you use a renaming rule. @@ -207,8 +181,8 @@ If benchmarks have different names, but logically can be compared to one another ```sh daslang utils/benchctl/main.das -- compare \ - --select_old "..." \ - --select_new "..." \ + --old-tag before \ + --new-tag after \ --s "BenchmarkBad=>BenchmarkGood" ``` @@ -273,8 +247,5 @@ daslang dastest/dastest.das -- --test mybench.das --bench --bench-format json | daslang utils/benchctl/main.das -- insert --tag after new.json # 4. Compare the two runs -# (note: using the aliased select_old and select_new) -daslang utils/benchctl/main.das -- compare \ - --old "tags LIKE '%[before]%'" \ - --new "tags LIKE '%[after]%'" +daslang utils/benchctl/main.das -- compare --old-tag before --new-tag after ``` diff --git a/utils/benchctl/bench_args.das b/utils/benchctl/bench_args.das new file mode 100644 index 0000000000..f73f75c41f --- /dev/null +++ b/utils/benchctl/bench_args.das @@ -0,0 +1,94 @@ +options gen2 + +require daslib/clargs + +[CommandLineArgs] +struct BenchctlArgs { + @clarg_positional + @clarg_doc = "Subcommand: reset / insert / query / compare / help" + command : Option + + @clarg_positional + @clarg_doc = "Files to insert (insert subcommand only)" + files : array + + @clarg_doc = "SQLite database path" + db : string = "benchdata.db" + + @clarg_name = "no-color" + @clarg_doc = "Disable colored output" + no_color : bool + + @clarg_doc = "Git commit hash for insert (default: git rev-parse HEAD); also filters query" + commit : string + + @clarg_doc = "Tag (insert: repeatable; query: filter)" + tag : array + + @clarg_name = "old-commit" + @clarg_doc = "Baseline commit hash filter (compare)" + old_commit : string + + @clarg_name = "old-tag" + @clarg_doc = "Baseline tag filter (compare)" + old_tag : string + + @clarg_name = "new-commit" + @clarg_doc = "New commit hash filter (compare)" + new_commit : string + + @clarg_name = "new-tag" + @clarg_doc = "New tag filter (compare)" + new_tag : string + + @clarg_doc = "Regex rename old=>new before pairing benchmark names (compare)" + s : string + + @clarg_short = "?" + @clarg_doc = "Show this help and exit" + help : bool +} + +struct public ParsedArgs { + ok : bool + err : string + command : string + files : array + db : string + no_color : bool + commit : string + tags : array + old_commit : string + old_tag : string + new_commit : string + new_tag : string + s : string + help : bool +} + +def public parse_benchctl_args(argv : array) : ParsedArgs { + var parse_r <- parse_args(type, argv) + if (parse_r |> is_err) { + return ParsedArgs(ok = false, err = parse_r |> unwrap_err) + } + var parsed <- parse_r |> move_unwrap + return <- ParsedArgs( + ok = true, + command = parsed.command ?? "", + files <- parsed.files, + db = parsed.db, + no_color = parsed.no_color, + commit = parsed.commit, + tags <- parsed.tag, + old_commit = parsed.old_commit, + old_tag = parsed.old_tag, + new_commit = parsed.new_commit, + new_tag = parsed.new_tag, + s = parsed.s, + help = parsed.help + ) +} + +def public print_benchctl_help() { + print(format_help_with_auto_help(get_command_info(type), "benchctl")) +} diff --git a/utils/benchctl/bench_sql.das b/utils/benchctl/bench_sql.das deleted file mode 100644 index 70b2bb0f82..0000000000 --- a/utils/benchctl/bench_sql.das +++ /dev/null @@ -1,82 +0,0 @@ -require sqlite/sqlite_boost - -require daslib/stringify -require daslib/defer - -def db_exec(db : sqlite3?; sql : string) : string { - var err_msg : string - defer() { - sqlite3_free(err_msg) - } - let rc = sqlite3_exec(db, sql, unsafe(addr(err_msg))) - return (rc != SQLITE_OK) ? clone_string(err_msg) : "" -} - -def db_bind_text_param(stmt : sqlite3_stmt?; key : string; v : string) { - let idx = sqlite3_bind_parameter_index(stmt, key) - sqlite3_bind_text(stmt, idx, v) -} - -def db_bind_int64_param(stmt : sqlite3_stmt?; key : string; v : int64) { - let idx = sqlite3_bind_parameter_index(stmt, key) - sqlite3_bind_int64(stmt, idx, v) -} - -let sql_bench_insert = %stringify~ - INSERT INTO benchmarks ( - commit_hash, - tags, - insert_date, - full_name, - name, - sub_name, - mode, - n, - time_ns, - allocs, - heap_bytes, - string_allocs, - string_heap_bytes - ) - VALUES ( - @commit_hash, - @tags, - strftime('%s', 'now'), - @name || '/' || @sub_name, - @name, - @sub_name, - @mode, - @n, - @time_ns, - @allocs, - @heap_bytes, - @string_allocs, - @string_heap_bytes - ); -%% - -let sql_db_init = %stringify~ - DROP TABLE IF EXISTS benchmarks; - - CREATE TABLE benchmarks ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - - commit_hash TEXT, - tags TEXT, - insert_date INTEGER, - - full_name TEXT, - name TEXT, - sub_name TEXT, - - mode TEXT, - - n INTEGER, - time_ns INTEGER, - - allocs INTEGER, - heap_bytes INTEGER, - string_allocs INTEGER, - string_heap_bytes INTEGER - ); -%% diff --git a/utils/benchctl/bench_table.das b/utils/benchctl/bench_table.das new file mode 100644 index 0000000000..c0e86763d2 --- /dev/null +++ b/utils/benchctl/bench_table.das @@ -0,0 +1,23 @@ +options gen2 + +require daslib/sql +require sqlite/sqlite_boost +require sqlite/sqlite_linq + +[sql_table(name = "benchmarks")] +struct Benchmark { + @sql_primary_key id : int + commit_hash : string + tags : string + insert_date : int64 + full_name : string + name : string + sub_name : string + mode : string + n : int64 + time_ns : int64 + allocs : int64 + heap_bytes : int64 + string_allocs : int64 + string_heap_bytes : int64 +} diff --git a/utils/benchctl/benchstat.das b/utils/benchctl/benchstat.das index 15fc403987..9966efdeec 100644 --- a/utils/benchctl/benchstat.das +++ b/utils/benchctl/benchstat.das @@ -1,33 +1,15 @@ +options gen2 + require dastest/testing_boost require daslib/strings_boost require daslib/json_boost require math -struct BenchmarkEntry { - id : int64 - - commit_hash : string - tags : string - insert_date : int64 - - full_name : string - name : string - sub_name : string - - mode : string - - n : int64 - time_ns : int64 - - allocs : int64 - heap_bytes : int64 - string_allocs : int64 - string_heap_bytes : int64 -} +require bench_table public struct BenchmarkSampleSet { key : string - list : array + list : array // Assigned later, when stats are computed. stats : BenchmarkStats? = null @@ -85,7 +67,7 @@ def parse_bench_output(data : string) : array { return <- entries } -def make_sample_sets(entries : array) : table { +def make_sample_sets(entries : array) : table { var result : table for (e in entries) { let key := e.name + "/" + e.sub_name diff --git a/utils/benchctl/flags.das b/utils/benchctl/flags.das deleted file mode 100644 index 6abda2606d..0000000000 --- a/utils/benchctl/flags.das +++ /dev/null @@ -1,51 +0,0 @@ -require strings -require daslib/strings_boost - -struct Flags { - tags : array - argmap : table - values : array -} - -def get_string_aliased_arg(flags : Flags; key : string; alias : string, default_value : string = "") : string { - return get_string_arg(flags, alias, get_string_arg(flags, key, default_value)) -} - -def get_string_arg(flags : Flags; key : string; default_value : string = "") : string { - return flags.argmap?[key] ?? default_value -} - -def parse_flags(args : array) : Flags { - var tags : array - var values : array - var argmap : table - var i = 0 - while (i < length(args)) { - if (args[i] == "--tag") { - i++ - if (i < length(args)) { - let tag = args[i] - i++ - tags |> push(tag) - } - continue - } - if (starts_with(args[i], "--")) { - let key = args[i] - i++ - if (i < length(args)) { - let val = args[i] - i++ - argmap |> insert(slice(key, 2), val) - } - continue - } - values |> push(args[i]) - i++ - } - return Flags( - values <- values, - argmap <- argmap, - tags <- tags, - ) -} diff --git a/utils/benchctl/main.das b/utils/benchctl/main.das index 3b78b5ae81..746d56cf08 100644 --- a/utils/benchctl/main.das +++ b/utils/benchctl/main.das @@ -1,191 +1,212 @@ options gen2 -require sqlite/sqlite_boost - require daslib/fio -require math require daslib/defer require daslib/regex_boost require daslib/strings_boost +require math +require strings -require bench_sql -require utils -require flags +require sqlite/sqlite_boost +require sqlite/sqlite_linq +require bench_table +require bench_args require benchstat +require utils require table_fmt -[export] -def main() { - let err := run_main() - var exit_code = 0 - if (err != "") { - print("[{red_str("error")}] {err}\n") - exit_code = 1 - } - return exit_code +def print_usage() { + print("benchctl - benchmark database management tool\n\n") + print("Usage:\n") + print(" daslang utils/benchctl/main.das -- [options]\n\n") + print("Commands:\n") + print(" reset Initialize or reinitialize the benchmark database (drops all data)\n") + print(" insert Insert benchmark JSON output files into the database\n") + print(" query Display benchmark records (filter by --commit / --tag)\n") + print(" compare Compare two sets of results statistically\n") + print(" help Show this help message\n\n") + print_benchctl_help() } -def run_main() : string { - var args <- get_command_line_arguments() - let arg_index = find_index(args, "--") - if (arg_index == -1) { - return "-- separator not found" +[export] +def main : int { + var argv <- get_command_line_arguments() + let dash_idx = find_index(argv, "--") + if (dash_idx != -1) { + argv <- subarray(argv, (dash_idx + 1)..length(argv)) } - var script_args <- subarray(args, (arg_index + 1)..length(args)) - if (length(script_args) == 0) { - return "expected a subcommand name" + + if (empty(argv) || find_index(argv, "--help") != -1 || find_index(argv, "-h") != -1) { + print_usage() + return 0 } - let subcmd_name = script_args[0] - let subcmd_args <- subarray(script_args, 1..length(script_args)) - let flags = parse_flags(subcmd_args) + let parsed <- parse_benchctl_args(argv) + if (!parsed.ok) { + print("[{red_str("error")}] {parsed.err}\n\n") + print_usage() + return 1 + } - if (get_string_arg(flags, "colors", "true") == "false") { + if (parsed.no_color) { colored_output = false } - if (subcmd_name == "help") { - return run_help_cmd() + if (parsed.help) { + print_usage() + return 0 } - let db_filename = get_string_arg(flags, "db", "benchdata.db") - var need_db_init = !file_exists(db_filename) && find_index([ - "insert", - "query", - "compare", - ], subcmd_name) != -1 - - var db : sqlite3? - defer() { - sqlite3_close(db) + if (parsed.command == "" || parsed.command == "help") { + print_usage() + return 0 } - var rc = sqlite3_open(db_filename, unsafe(addr(db))) - if (rc != SQLITE_OK) { - return "open database: {sqlite3_errmsg(db)}" + + let err = run_subcommand(parsed) + if (err != "") { + print("[{red_str("error")}] {err}\n") + return 1 } + return 0 +} - if (need_db_init) { - let err = run_reset_cmd(db, flags) - if (err != "") { - return "implicit db creation: {err}" - } +def run_subcommand(parsed : ParsedArgs) : string { + let cmd = parsed.command + let need_init = !file_exists(parsed.db) && (cmd == "insert" || cmd == "query" || cmd == "compare") + var open_r <- try_open_sqlite(parsed.db) + if (open_r |> is_err) { + return "open database: {open_r |> unwrap_err}" } + var inscope db <- open_r._value - if (subcmd_name == "reset") { - return run_reset_cmd(db, flags) - } elif (subcmd_name == "insert") { - return run_insert_cmd(db, flags) - } elif (subcmd_name == "query") { - return run_query_cmd(db, flags) - } elif (subcmd_name == "compare") { - return run_compare_cmd(db, flags) - } else { - return "unknown subcommand name: {subcmd_name}" + if (need_init) { + run_reset_cmd(db) } - return "" + if (cmd == "reset") { + run_reset_cmd(db) + return "" + } elif (cmd == "insert") { + return run_insert_cmd(db, parsed) + } elif (cmd == "query") { + return run_query_cmd(db, parsed) + } elif (cmd == "compare") { + return run_compare_cmd(db, parsed) + } + return "unknown subcommand: {cmd}" } -def run_help_cmd() : string { - // TODO: it would be better if we could get this help command for free - // using a command line library (e.g. like "flag" package in Go) - - print("benchctl - benchmark database management tool\n\n") - print("Usage:\n") - print(" daslang benchctl/main.das -- [options]\n\n") - print("Commands:\n") - print(" reset Initialize or reinitialize the benchmark database (drops all data)\n") - print(" insert Insert benchmark results from JSON output files\n") - print(" query Query and display stored benchmark records\n") - print(" compare Compare two sets of benchmark results statistically\n") - print(" help Show this help message\n\n") - print("Common options:\n") - print(" --db SQLite3 database file path (default: benchdata.db)\n") - print(" --colors false Disable colored terminal output\n\n") - print("insert options:\n") - print(" --commit Git commit hash to tag results with (default: git rev-parse HEAD)\n") - print(" --tag Tag label to attach to results (can be repeated)\n") - print(" ... Benchmark JSON output files to insert\n\n") - print("query options:\n") - print(" --select SQL WHERE clause to filter records\n\n") - print("compare options:\n") - print(" --select_old SQL WHERE clause for baseline (old) results\n") - print(" --select_new SQL WHERE clause for new results\n") - print(" --s to> Regex rename: map old benchmark names to new names\n") - print(" --old --select_old alias\n") - print(" --new --select_new alias\n") - return "" +def run_reset_cmd(db : SqlRunner) { + db |> drop_table_if_exists(type) + db |> create_table(type) } -def query_benchmarks(db : sqlite3?; cond : string; exclude : table = default>) : tuple, string> { - var result : array +def run_insert_cmd(db : SqlRunner; parsed : ParsedArgs) : string { + if (length(parsed.files) == 0) { + return "missing targets to insert" + } - var sql_text = "SELECT * FROM benchmarks" - if (cond != "") { - sql_text += " WHERE {cond}" + var commit_hash = parsed.commit + if (commit_hash == "") { + commit_hash = cmd_exec("git rev-parse HEAD") } - var err_msg : string - defer() { - sqlite3_free(err_msg) + var tag_string = "" + for (t in parsed.tags) { + tag_string += "[{t}]" // nolint:PERF001 — typically 0-2 tags, not a hot path } - let rc = sqlite3_exec(db, sql_text, unsafe(addr(err_msg))) $(values, columns) { - var entry : BenchmarkEntry - for (v, c in values, columns) { - if (c == "id") { - entry.id = to_int64(v) - } elif (c == "commit_hash") { - entry.commit_hash := v - } elif (c == "tags") { - entry.tags := v - } elif (c == "insert_date") { - entry.insert_date = to_int64(v) - } elif (c == "full_name") { - entry.full_name := v - } elif (c == "name") { - entry.name := v - } elif (c == "sub_name") { - entry.sub_name := v - } elif (c == "mode") { - entry.mode := v - } elif (c == "n") { - entry.n = to_int64(v) - } elif (c == "time_ns") { - entry.time_ns = to_int64(v) - } elif (c == "allocs") { - entry.allocs = to_int64(v) - } elif (c == "heap_bytes") { - entry.heap_bytes = to_int64(v) - } elif (c == "string_allocs") { - entry.string_allocs = to_int64(v) - } elif (c == "string_heap_bytes") { - entry.string_heap_bytes = to_int64(v) - } + + let now = int64(get_clock()) + + for (filename in parsed.files) { + let data = read_file(filename) + if (data == "") { + print("{filename}: ignored\n") + continue + } + let stats_entries = parse_bench_output(data) + var rows : array + rows |> reserve(length(stats_entries)) + for (e in stats_entries) { + rows |> push(Benchmark( + id = 0, + commit_hash = commit_hash, + tags = tag_string, + insert_date = now, + full_name = "{e.name}/{e.sub_name}", + name = e.name, + sub_name = e.sub_name, + mode = e.func_type, + n = int64(e.n), + time_ns = e.time_ns, + allocs = int64(e.allocs), + heap_bytes = int64(e.heap_bytes), + string_allocs = int64(e.string_allocs), + string_heap_bytes = int64(e.string_heap_bytes), + )) } - if (!key_exists(exclude, entry.id)) { - result |> push(entry) + let txn_err = db |> try_insert(rows) + if (txn_err |> is_err) { + return "insert {filename}: {txn_err |> unwrap_err}" } - return SQLITE_OK + print("{filename}: added {length(rows)} samples\n") } + return "" +} - var err = "" - if (rc != SQLITE_OK) { - err = clone_string(sqlite3_errmsg(db)) - } +def query_benchmarks(db : SqlRunner; commit_filter : string; tag_filter : string) : array { + let want_tag = (tag_filter == "" ? "" : "[{tag_filter}]") + return <- _sql(db |> select_from(type) + |> _where(commit_filter == "" || _.commit_hash == commit_filter) + |> _where(want_tag == "" || _.tags |> contains(want_tag))) +} - return (<- result, err) +def run_query_cmd(db : SqlRunner; parsed : ParsedArgs) : string { + let tag_filter = length(parsed.tags) > 0 ? parsed.tags[0] : "" + let entries <- query_benchmarks(db, parsed.commit, tag_filter) + if (length(entries) == 0) { + print("no results\n") + return "" + } + var table_rows : array + table_rows |> reserve(length(entries)) + for (e in entries) { + let commit_short = length(e.commit_hash) > 8 ? slice(e.commit_hash, 0, 8) : e.commit_hash + let ns_per_op = e.n > 0l ? double(e.time_ns) / double(e.n) : 0.0lf + table_rows |> push(new TableRow(columns <- [ + e.full_name, + commit_short, + e.tags, + "{e.n}", + "{ns_per_op:.1f} ns/op", + ])) + } + print(" {yellow_str("benchmark")} {yellow_str("commit")} {yellow_str("tags")} {yellow_str("n")} {yellow_str("ns/op")}\n\n") + var tabspec = new TableData( + rows <- table_rows, + col_gap = 2, + indent = " ", + ) + print(format_table(tabspec)) + return "" } -def run_compare_cmd(db : sqlite3?; flags : Flags) : string { - var cond_old = get_string_aliased_arg(flags, "select_old", "old", "") - var cond_new = get_string_aliased_arg(flags, "select_new", "new", "") +def run_compare_cmd(db : SqlRunner; parsed : ParsedArgs) : string { + let old_entries <- query_benchmarks(db, parsed.old_commit, parsed.old_tag) + let new_entries <- query_benchmarks(db, parsed.new_commit, parsed.new_tag) + + if (length(old_entries) == 0) { + return "no rows match --old-commit / --old-tag" + } + if (length(new_entries) == 0) { + return "no rows match --new-commit / --new-tag" + } var key_map = @(x : string) : string { return x } - var key_mapper = get_string_arg(flags, "s", "") - if (key_mapper != "") { - let parts <- split(key_mapper, "=>") + if (parsed.s != "") { + let parts <- split(parsed.s, "=>") if (length(parts) != 2) { return "invalid -s argument, expected 'from=>to' format" } @@ -195,20 +216,6 @@ def run_compare_cmd(db : sqlite3?; flags : Flags) : string { } } - let (old_entries, err) = query_benchmarks(db, cond_old) - if (err != "") { - return "query --old: {err}" - } - var old_ids : table - for (e in old_entries) { - old_ids |> insert(e.id) - } - - let (new_entries, err) = query_benchmarks(db, cond_new, old_ids) - if (err != "") { - return "query --new: {err}" - } - var old_samples = make_sample_sets(old_entries) var new_samples = make_sample_sets(new_entries) @@ -383,105 +390,6 @@ def run_compare_cmd(db : sqlite3?; flags : Flags) : string { return "" } -def run_query_cmd(db : sqlite3?; flags : Flags) : string { - var cond = get_string_arg(flags, "select", "") - let (entries, err) = query_benchmarks(db, cond) - if (err != "") { - return "run query: {err}" - } - if (length(entries) == 0) { - print("no results\n") - return "" - } - var table_rows : array - for (e in entries) { - let commit_short = length(e.commit_hash) > 8 ? slice(e.commit_hash, 0, 8) : e.commit_hash - let ns_per_op = e.n > 0l ? double(e.time_ns) / double(e.n) : 0.0lf - table_rows |> push(new TableRow(columns <- [ - e.full_name, - commit_short, - e.tags, - "{e.n}", - "{ns_per_op:.1f} ns/op", - ])) - } - print(" {yellow_str("benchmark")} {yellow_str("commit")} {yellow_str("tags")} {yellow_str("n")} {yellow_str("ns/op")}\n\n") - var tabspec = new TableData( - rows <- table_rows, - col_gap = 2, - indent = " ", - ) - print(format_table(tabspec)) - return "" -} - -def run_reset_cmd(db : sqlite3?; flags : Flags) : string { - var err = db_exec(db, sql_db_init) - if (err != "") { - return "run init query: {err}" - } - - return "" -} - -def run_insert_cmd(db : sqlite3?; flags : Flags) : string { - if (length(flags.values) == 0) { - return "missing targets to insert" - } - - var commit_hash = get_string_arg(flags, "commit", "") - if (commit_hash == "") { - commit_hash = cmd_exec("git rev-parse HEAD") - } - - var tag_string = "" - for (t in flags.tags) { - tag_string += "[{t}]" - } - - var stmt : sqlite3_stmt? - let rc = sqlite3_prepare_v2(db, sql_bench_insert, -1, unsafe(addr(stmt)), null) - if (rc != SQLITE_OK) { - return "prepare insertion statement: {sqlite3_errmsg(db)}" - } - defer() { - sqlite3_finalize(stmt) - } - - for (filename in flags.values) { - let data = read_file(filename) - if (data == "") { - print("{filename}: ignored\n") - continue - } - let entries = parse_bench_output(data) - var added = 0 - for (e in entries) { - db_bind_text_param(stmt, "@commit_hash", commit_hash) - db_bind_text_param(stmt, "@tags", tag_string) - db_bind_text_param(stmt, "@name", e.name) - db_bind_text_param(stmt, "@sub_name", e.sub_name) - db_bind_text_param(stmt, "@mode", e.func_type) - db_bind_int64_param(stmt, "@n", int64(e.n)) - db_bind_int64_param(stmt, "@time_ns", e.time_ns) - db_bind_int64_param(stmt, "@allocs", int64(e.allocs)) - db_bind_int64_param(stmt, "@heap_bytes", int64(e.heap_bytes)) - db_bind_int64_param(stmt, "@string_allocs", int64(e.string_allocs)) - db_bind_int64_param(stmt, "@string_heap_bytes", int64(e.string_heap_bytes)) - let step_rc = sqlite3_step(stmt) - if (step_rc != SQLITE_DONE) { - return "insert failed: {sqlite3_errmsg(db)}" - } - added++ - sqlite3_reset(stmt); - sqlite3_clear_bindings(stmt); - } - print("{filename}: added {added} samples\n") - } - - return "" -} - def warning(msg : string) { print("{yellow_str("[warning]")} {msg}\n") } diff --git a/utils/benchctl/table_fmt.das b/utils/benchctl/table_fmt.das index 92a79bcee6..703f19fefc 100644 --- a/utils/benchctl/table_fmt.das +++ b/utils/benchctl/table_fmt.das @@ -1,3 +1,5 @@ +options gen2 + require math require strings diff --git a/utils/benchctl/utils.das b/utils/benchctl/utils.das index 6859f7dd2b..2f29eb1552 100644 --- a/utils/benchctl/utils.das +++ b/utils/benchctl/utils.das @@ -1,3 +1,5 @@ +options gen2 + require daslib/fio require strings require daslib/strings_boost @@ -24,7 +26,7 @@ def file_exists(filename : string) : bool { def read_file(filename : string) : string { var result = "" - fopen(filename, "r") $(f) { + fopen(filename, "rb") $(f) { if (f != null) { result = fread(f) } From 3851f96cd2f9dd6f1d61169104950c0c9f0d80ea Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Thu, 7 May 2026 03:29:56 -0700 Subject: [PATCH 2/4] benchctl: address review feedback, simplify sql/ to 3 modes Copilot review on PR #2599: - run_reset_cmd now uses try_drop_table_if_exists / try_create_table and returns an error string; the implicit-init path propagates it instead of crashing on a corrupt or locked DB. - run_query_cmd rejects multiple --tag values for query (previously silently used only the first); compare-side scalar flags unchanged. - Insert tag loop skips empty tags and rejects '[' / ']' in tag values (the bracket scheme cannot delimit them safely). - README annotates --tag as single-value for query, bracket-free for insert. benchmarks/sql/ simplification (per Boris): drop the m1d / m2m / m2d modes (disk vs memory was a one-shot finding; no-_sql DB modes only re-prove that select_from materializes -- already documented). Each file now compares 3 modes: m1 (_sql over :memory:), m3 (plain array LINQ), m3f (_fold-fused array LINQ). Removed disk_db_setup / cleanup helpers from _common.das; updated benchmarks/README.md mode matrix. count_aggregate bumped to 1M; per-element cost is flat from 100k -> 1M across all three modes (m1 38, m3 12, m3f 3 ns/op). Co-Authored-By: Claude Opus 4.7 (1M context) --- benchmarks/README.md | 19 +++-- benchmarks/sql/_common.das | 11 --- benchmarks/sql/count_aggregate.das | 83 ++++------------------ benchmarks/sql/select_where.das | 72 ++----------------- benchmarks/sql/select_where_order_take.das | 81 ++------------------- utils/benchctl/README.md | 4 +- utils/benchctl/main.das | 30 ++++++-- 7 files changed, 61 insertions(+), 239 deletions(-) diff --git a/benchmarks/README.md b/benchmarks/README.md index b28edb0ffc..5828995da7 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -60,20 +60,17 @@ Every `.das` benchmark file in this directory tree is listed below, grouped by s ## sql/ -6-mode comparison: `_sql` macro vs `select_from` without macro, across `:memory:` and on-disk SQLite, plus pure in-memory `array` LINQ in materializing and `_fold`-fused forms. Mirrors the `tests/dasSQLITE/parity_check_*.das` pattern but oriented to throughput. Disk DBs are populated and deleted outside the timed block. +3-mode comparison: `_sql` macro over `:memory:` SQLite vs pure in-memory `array` LINQ, with the array form measured both in its naive (intermediate-materializing) and `_fold`-fused shapes. Mirrors the `tests/dasSQLITE/parity_check_*.das` pattern but oriented to throughput. -| Mode | Source | Macro | +| Mode | Source | Form | |---|---|---| -| `m1m` | `:memory:` SQLite | `_sql` — compile-time SQL emission | -| `m1d` | on-disk `.db` file | `_sql` — compile-time SQL emission | -| `m2m` | `:memory:` SQLite | none — `select_from` materializes the full table, then in-memory LINQ | -| `m2d` | on-disk `.db` file | none — same as `m2m` over disk | -| `m3` | pre-populated `array` | none — chain materializes intermediate filter/sort arrays | +| `m1` | `:memory:` SQLite | `_sql` — compile-time SQL emission, work pushed to the engine | +| `m3` | pre-populated `array` | plain LINQ chain — materializes intermediate filter/sort arrays | | `m3f` | pre-populated `array` | `_fold` from `daslib/linq_boost` — fuses the chain into a single pass, in-place where possible | | File | Description | |---|---| -| `_common.das` | Shared `Car` `[sql_table]` + `fixture_db` / `fixture_array` / `disk_db_setup` / `disk_db_cleanup` (not a benchmark) | -| `select_where.das` | Filter chain — `_where(_.price > 500)` over 10K rows. Modest asymmetry; M2/M3 must walk every row. | -| `select_where_order_take.das` | Filter + sort + limit — `_where \|> _order_by(_.price) \|> take(10)`. SQL ORDER BY + LIMIT bounds work; M2/M3 sort the full filtered set. | -| `count_aggregate.das` | Aggregate — `count()` after `_where`. SQL pushes `COUNT(*)` to the engine returning one row; M2/M3 materialize the full filtered array then count it. Highest-asymmetry chain. | +| `_common.das` | Shared `Car` `[sql_table]` + `fixture_db` / `fixture_array` (not a benchmark) | +| `select_where.das` | Filter chain — `_where(_.price > 500)` over 10K rows. Modest asymmetry; m3 walks every row. | +| `select_where_order_take.das` | Filter + sort + limit — `_where \|> _order_by(_.price) \|> take(10)`. SQL ORDER BY + LIMIT bounds work; m3 sorts the full filtered set. | +| `count_aggregate.das` | Aggregate — `count()` after `_where` over 100K rows. SQL pushes `COUNT(*)` to the engine returning one row; m3 materializes the full filtered array then counts it; m3f fuses where+count into one pass. Highest-asymmetry chain. | diff --git a/benchmarks/sql/_common.das b/benchmarks/sql/_common.das index 7ee5e6d498..2952b975f1 100644 --- a/benchmarks/sql/_common.das +++ b/benchmarks/sql/_common.das @@ -33,14 +33,3 @@ def public fixture_array(n : int) : array { } return <- arr } - -def public disk_db_setup(path : string; n : int) { - remove(path) - with_sqlite(path) $(db) { - fixture_db(db, n) - } -} - -def public disk_db_cleanup(path : string) { - remove(path) -} diff --git a/benchmarks/sql/count_aggregate.das b/benchmarks/sql/count_aggregate.das index 67926b2505..728bb11f90 100644 --- a/benchmarks/sql/count_aggregate.das +++ b/benchmarks/sql/count_aggregate.das @@ -6,14 +6,15 @@ require _common public let THRESHOLD = 500 // SQL pushes COUNT(*) to the engine returning one row. -// M2/M3 must materialize the full filtered array, then walk to count it. +// m3 must materialize the full filtered array, then walk to count it. +// m3f folds where+count into a single fused pass (no intermediate array). // Highest-asymmetry comparison among the three benchmark chains. -// --- M1m: _sql over :memory: --- -def run_m1m(b : B?; n : int) { +// --- m1: _sql over :memory: --- +def run_m1(b : B?; n : int) { with_sqlite(":memory:") $(db) { fixture_db(db, n) - b |> run("m1m_sql/{n}", n) { + b |> run("m1_sql/{n}", n) { let c = _sql(db |> select_from(type) |> _where(_.price > THRESHOLD) |> count()) if (c == 0) { b->failNow() @@ -22,50 +23,7 @@ def run_m1m(b : B?; n : int) { } } -// --- M1d: _sql over disk file --- -def run_m1d(b : B?; n : int) { - let path = "_sql_bench_cnt_m1d_{n}.db" - disk_db_setup(path, n) - with_sqlite(path) $(db) { - b |> run("m1d_sql/{n}", n) { - let c = _sql(db |> select_from(type) |> _where(_.price > THRESHOLD) |> count()) - if (c == 0) { - b->failNow() - } - } - } - disk_db_cleanup(path) -} - -// --- M2m: no _sql, :memory: source --- -def run_m2m(b : B?; n : int) { - with_sqlite(":memory:") $(db) { - fixture_db(db, n) - b |> run("m2m_db_no_sql/{n}", n) { - let c = db |> select_from(type) |> _where(_.price > THRESHOLD) |> count() - if (c == 0) { - b->failNow() - } - } - } -} - -// --- M2d: no _sql, disk source --- -def run_m2d(b : B?; n : int) { - let path = "_sql_bench_cnt_m2d_{n}.db" - disk_db_setup(path, n) - with_sqlite(path) $(db) { - b |> run("m2d_db_no_sql/{n}", n) { - let c = db |> select_from(type) |> _where(_.price > THRESHOLD) |> count() - if (c == 0) { - b->failNow() - } - } - } - disk_db_cleanup(path) -} - -// --- M3: array LINQ (materializes intermediate filter array) --- +// --- m3: array LINQ (materializes intermediate filter array) --- def run_m3(b : B?; n : int) { let arr <- fixture_array(n) b |> run("m3_array/{n}", n) { @@ -76,7 +34,7 @@ def run_m3(b : B?; n : int) { } } -// --- M3f: array LINQ folded into a single fused pass --- +// --- m3f: array LINQ folded into a single fused pass --- def run_m3f(b : B?; n : int) { let arr <- fixture_array(n) b |> run("m3f_array_fold/{n}", n) { @@ -88,31 +46,16 @@ def run_m3f(b : B?; n : int) { } [benchmark] -def count_aggregate_10k_m1m(b : B?) { - run_m1m(b, 10000) -} - -[benchmark] -def count_aggregate_10k_m1d(b : B?) { - run_m1d(b, 10000) -} - -[benchmark] -def count_aggregate_10k_m2m(b : B?) { - run_m2m(b, 10000) -} - -[benchmark] -def count_aggregate_10k_m2d(b : B?) { - run_m2d(b, 10000) +def count_aggregate_1m_m1(b : B?) { + run_m1(b, 1000000) } [benchmark] -def count_aggregate_10k_m3(b : B?) { - run_m3(b, 10000) +def count_aggregate_1m_m3(b : B?) { + run_m3(b, 1000000) } [benchmark] -def count_aggregate_10k_m3f(b : B?) { - run_m3f(b, 10000) +def count_aggregate_1m_m3f(b : B?) { + run_m3f(b, 1000000) } diff --git a/benchmarks/sql/select_where.das b/benchmarks/sql/select_where.das index 2e8def8615..9c1174765f 100644 --- a/benchmarks/sql/select_where.das +++ b/benchmarks/sql/select_where.das @@ -5,11 +5,11 @@ require _common public let THRESHOLD = 500 -// --- M1m: _sql over :memory: --- -def run_m1m(b : B?; n : int) { +// --- m1: _sql over :memory: --- +def run_m1(b : B?; n : int) { with_sqlite(":memory:") $(db) { fixture_db(db, n) - b |> run("m1m_sql/{n}", n) { + b |> run("m1_sql/{n}", n) { let rows <- _sql(db |> select_from(type) |> _where(_.price > THRESHOLD)) if (length(rows) == 0) { b->failNow() @@ -18,50 +18,7 @@ def run_m1m(b : B?; n : int) { } } -// --- M1d: _sql over disk file --- -def run_m1d(b : B?; n : int) { - let path = "_sql_bench_m1d_{n}.db" - disk_db_setup(path, n) - with_sqlite(path) $(db) { - b |> run("m1d_sql/{n}", n) { - let rows <- _sql(db |> select_from(type) |> _where(_.price > THRESHOLD)) - if (length(rows) == 0) { - b->failNow() - } - } - } - disk_db_cleanup(path) -} - -// --- M2m: no _sql, :memory: source --- -def run_m2m(b : B?; n : int) { - with_sqlite(":memory:") $(db) { - fixture_db(db, n) - b |> run("m2m_db_no_sql/{n}", n) { - let rows <- (db |> select_from(type) |> _where(_.price > THRESHOLD)) - if (length(rows) == 0) { - b->failNow() - } - } - } -} - -// --- M2d: no _sql, disk source --- -def run_m2d(b : B?; n : int) { - let path = "_sql_bench_m2d_{n}.db" - disk_db_setup(path, n) - with_sqlite(path) $(db) { - b |> run("m2d_db_no_sql/{n}", n) { - let rows <- (db |> select_from(type) |> _where(_.price > THRESHOLD)) - if (length(rows) == 0) { - b->failNow() - } - } - } - disk_db_cleanup(path) -} - -// --- M3: array LINQ (materializing intermediate arrays) --- +// --- m3: array LINQ (materializing intermediate arrays) --- def run_m3(b : B?; n : int) { let arr <- fixture_array(n) b |> run("m3_array/{n}", n) { @@ -72,7 +29,7 @@ def run_m3(b : B?; n : int) { } } -// --- M3f: array LINQ folded into a single fused pass --- +// --- m3f: array LINQ folded into a single fused pass --- def run_m3f(b : B?; n : int) { let arr <- fixture_array(n) b |> run("m3f_array_fold/{n}", n) { @@ -84,23 +41,8 @@ def run_m3f(b : B?; n : int) { } [benchmark] -def select_where_10k_m1m(b : B?) { - run_m1m(b, 10000) -} - -[benchmark] -def select_where_10k_m1d(b : B?) { - run_m1d(b, 10000) -} - -[benchmark] -def select_where_10k_m2m(b : B?) { - run_m2m(b, 10000) -} - -[benchmark] -def select_where_10k_m2d(b : B?) { - run_m2d(b, 10000) +def select_where_10k_m1(b : B?) { + run_m1(b, 10000) } [benchmark] diff --git a/benchmarks/sql/select_where_order_take.das b/benchmarks/sql/select_where_order_take.das index b14fcfd428..128d44c285 100644 --- a/benchmarks/sql/select_where_order_take.das +++ b/benchmarks/sql/select_where_order_take.das @@ -6,11 +6,11 @@ require _common public let THRESHOLD = 500 let TAKE_N = 10 -// --- M1m: _sql over :memory: --- -def run_m1m(b : B?; n : int) { +// --- m1: _sql over :memory: --- +def run_m1(b : B?; n : int) { with_sqlite(":memory:") $(db) { fixture_db(db, n) - b |> run("m1m_sql/{n}", n) { + b |> run("m1_sql/{n}", n) { let rows <- _sql(db |> select_from(type) |> _where(_.price > THRESHOLD) |> _order_by(_.price) @@ -22,59 +22,7 @@ def run_m1m(b : B?; n : int) { } } -// --- M1d: _sql over disk file --- -def run_m1d(b : B?; n : int) { - let path = "_sql_bench_swot_m1d_{n}.db" - disk_db_setup(path, n) - with_sqlite(path) $(db) { - b |> run("m1d_sql/{n}", n) { - let rows <- _sql(db |> select_from(type) - |> _where(_.price > THRESHOLD) - |> _order_by(_.price) - |> take(TAKE_N)) - if (length(rows) == 0) { - b->failNow() - } - } - } - disk_db_cleanup(path) -} - -// --- M2m: no _sql, :memory: source --- -def run_m2m(b : B?; n : int) { - with_sqlite(":memory:") $(db) { - fixture_db(db, n) - b |> run("m2m_db_no_sql/{n}", n) { - let rows <- (db |> select_from(type) - |> _where(_.price > THRESHOLD) - |> _order_by(_.price) - |> take(TAKE_N)) - if (length(rows) == 0) { - b->failNow() - } - } - } -} - -// --- M2d: no _sql, disk source --- -def run_m2d(b : B?; n : int) { - let path = "_sql_bench_swot_m2d_{n}.db" - disk_db_setup(path, n) - with_sqlite(path) $(db) { - b |> run("m2d_db_no_sql/{n}", n) { - let rows <- (db |> select_from(type) - |> _where(_.price > THRESHOLD) - |> _order_by(_.price) - |> take(TAKE_N)) - if (length(rows) == 0) { - b->failNow() - } - } - } - disk_db_cleanup(path) -} - -// --- M3: array LINQ (materializing intermediate arrays) --- +// --- m3: array LINQ (materializing intermediate arrays) --- def run_m3(b : B?; n : int) { let arr <- fixture_array(n) b |> run("m3_array/{n}", n) { @@ -87,7 +35,7 @@ def run_m3(b : B?; n : int) { } } -// --- M3f: array LINQ folded into a single fused pass --- +// --- m3f: array LINQ folded into a single fused pass --- def run_m3f(b : B?; n : int) { let arr <- fixture_array(n) b |> run("m3f_array_fold/{n}", n) { @@ -102,23 +50,8 @@ def run_m3f(b : B?; n : int) { } [benchmark] -def select_where_order_take_10k_m1m(b : B?) { - run_m1m(b, 10000) -} - -[benchmark] -def select_where_order_take_10k_m1d(b : B?) { - run_m1d(b, 10000) -} - -[benchmark] -def select_where_order_take_10k_m2m(b : B?) { - run_m2m(b, 10000) -} - -[benchmark] -def select_where_order_take_10k_m2d(b : B?) { - run_m2d(b, 10000) +def select_where_order_take_10k_m1(b : B?) { + run_m1(b, 10000) } [benchmark] diff --git a/utils/benchctl/README.md b/utils/benchctl/README.md index 55fa4ba1a2..b7b4706c65 100644 --- a/utils/benchctl/README.md +++ b/utils/benchctl/README.md @@ -67,7 +67,7 @@ Options: |------|-------------| | `--db ` | Database file path (default: `benchdata.db`) | | `--commit ` | Git commit hash to tag results with (default: `git rev-parse HEAD`) | -| `--tag ` | Tag label to attach to results - can be repeated | +| `--tag ` | Tag label to attach to results - can be repeated (must not contain `[` or `]`) | Input files must contain newline-delimited JSON records as produced by the dastest benchmark runner. Non-JSON lines are silently skipped. @@ -109,7 +109,7 @@ Options: |------|-------------| | `--db ` | Database file path | | `--commit ` | Filter to records with this exact commit hash | -| `--tag ` | Filter to records carrying this tag (uses LIKE under the hood — empty `--tag` means no filter) | +| `--tag ` | Filter to records carrying this tag (single-value; uses LIKE under the hood — empty `--tag` means no filter) | This is mostly needed to debug the selection filters before using a more useful `compare` command. diff --git a/utils/benchctl/main.das b/utils/benchctl/main.das index 746d56cf08..43a9a7e6f2 100644 --- a/utils/benchctl/main.das +++ b/utils/benchctl/main.das @@ -80,12 +80,14 @@ def run_subcommand(parsed : ParsedArgs) : string { var inscope db <- open_r._value if (need_init) { - run_reset_cmd(db) + let init_err = run_reset_cmd(db) + if (init_err != "") { + return init_err + } } if (cmd == "reset") { - run_reset_cmd(db) - return "" + return run_reset_cmd(db) } elif (cmd == "insert") { return run_insert_cmd(db, parsed) } elif (cmd == "query") { @@ -96,9 +98,16 @@ def run_subcommand(parsed : ParsedArgs) : string { return "unknown subcommand: {cmd}" } -def run_reset_cmd(db : SqlRunner) { - db |> drop_table_if_exists(type) - db |> create_table(type) +def run_reset_cmd(db : SqlRunner) : string { + let drop_err = db |> try_drop_table_if_exists(type) + if (drop_err |> is_some) { + return "drop table: {drop_err |> unwrap}" + } + let create_err = db |> try_create_table(type) + if (create_err |> is_some) { + return "create table: {create_err |> unwrap}" + } + return "" } def run_insert_cmd(db : SqlRunner; parsed : ParsedArgs) : string { @@ -113,6 +122,12 @@ def run_insert_cmd(db : SqlRunner; parsed : ParsedArgs) : string { var tag_string = "" for (t in parsed.tags) { + if (t == "") { + continue + } + if (find(t, "[") != -1 || find(t, "]") != -1) { + return "tag '{t}' contains illegal character '[' or ']'" + } tag_string += "[{t}]" // nolint:PERF001 — typically 0-2 tags, not a hot path } @@ -162,6 +177,9 @@ def query_benchmarks(db : SqlRunner; commit_filter : string; tag_filter : string } def run_query_cmd(db : SqlRunner; parsed : ParsedArgs) : string { + if (length(parsed.tags) > 1) { + return "--tag may only be specified once for query" + } let tag_filter = length(parsed.tags) > 0 ? parsed.tags[0] : "" let entries <- query_benchmarks(db, parsed.commit, tag_filter) if (length(entries) == 0) { From af9a57129a9143bb41adc323cc2062a0de091ef6 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Thu, 7 May 2026 03:54:55 -0700 Subject: [PATCH 3/4] benchctl: round 2 review fixes; +indexed_lookup benchmark; +where+count fold test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Copilot review round 2 on PR #2599: - run_subcommand validates `cmd` against {reset, insert, query, compare} before opening the SQLite DB. Typos (e.g. `qurey`) no longer create an empty benchdata.db as a side effect. - Extracted validate_tag_chars helper; insert/query/compare all reject '[' or ']' in tag values consistently (previously only insert did). - run_compare_cmd restores the overlap-exclusion semantic the old code had: collects --old result IDs into a table and post-filters --new entries against it. Identical/empty filters on both sides can no longer compare a result set against itself. Implemented in daslang (no raw-SQL escape hatch). - benchmarks/README.md table previously claimed `count_aggregate` ran at 100K; updated to 1M to match the shipped benchmark constant. Two adds: - benchmarks/sql/indexed_lookup.das — point-lookup benchmark (`_where(_.id == K)` against PRIMARY KEY) at 1M rows. Inverse-asymmetry pair to count_aggregate: SQLite's b-tree wins by ~1000x (m1 ~3 us, m3 ~9 ms, m3f ~3.4 ms per lookup at JIT). Documents where indexed storage earns its keep. - tests/linq/test_linq_fold.das — regression test for the where+count fold rule added in fcb648157. Covers half-match, zero-match, all-match, and empty-source cases. Co-Authored-By: Claude Opus 4.7 (1M context) --- benchmarks/README.md | 3 +- benchmarks/sql/indexed_lookup.das | 62 +++++++++++++++++++++++++++++++ tests/linq/test_linq_fold.das | 30 +++++++++++++++ utils/benchctl/main.das | 50 +++++++++++++++++++++---- 4 files changed, 137 insertions(+), 8 deletions(-) create mode 100644 benchmarks/sql/indexed_lookup.das diff --git a/benchmarks/README.md b/benchmarks/README.md index 5828995da7..323e4fbf60 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -73,4 +73,5 @@ Every `.das` benchmark file in this directory tree is listed below, grouped by s | `_common.das` | Shared `Car` `[sql_table]` + `fixture_db` / `fixture_array` (not a benchmark) | | `select_where.das` | Filter chain — `_where(_.price > 500)` over 10K rows. Modest asymmetry; m3 walks every row. | | `select_where_order_take.das` | Filter + sort + limit — `_where \|> _order_by(_.price) \|> take(10)`. SQL ORDER BY + LIMIT bounds work; m3 sorts the full filtered set. | -| `count_aggregate.das` | Aggregate — `count()` after `_where` over 100K rows. SQL pushes `COUNT(*)` to the engine returning one row; m3 materializes the full filtered array then counts it; m3f fuses where+count into one pass. Highest-asymmetry chain. | +| `count_aggregate.das` | Aggregate — `count()` after `_where` over 1M rows. SQL pushes `COUNT(*)` to the engine returning one row; m3 materializes the full filtered array then counts it; m3f fuses where+count into one pass. Highest-asymmetry chain in daslang's favor. | +| `indexed_lookup.das` | Indexed point lookup — `_where(_.id == K)` against the PRIMARY KEY over 1M rows. SQLite uses the PK b-tree (O(log n)); m3/m3f have no index (O(n) linear scan). Inverse-asymmetry: SQLite wins by ~1000×, illustrating where indexed storage earns its keep. | diff --git a/benchmarks/sql/indexed_lookup.das b/benchmarks/sql/indexed_lookup.das new file mode 100644 index 0000000000..40ebe6d987 --- /dev/null +++ b/benchmarks/sql/indexed_lookup.das @@ -0,0 +1,62 @@ +options gen2 +options persistent_heap + +require _common public + +// Indexed point-lookup: _where(_.id == K) where id is the PRIMARY KEY. +// SQLite uses the PK b-tree -> O(log n). +// m3/m3f have no index -> O(n) linear scan over the array. +// Inverse-asymmetry of count_aggregate: m1 wins by complexity-class margin. + +// --- m1: _sql over :memory: --- +def run_m1(b : B?; n : int) { + with_sqlite(":memory:") $(db) { + fixture_db(db, n) + let key = n / 2 + b |> run("m1_sql/{n}") { + let c = _sql(db |> select_from(type) |> _where(_.id == key) |> count()) + if (c == 0) { + b->failNow() + } + } + } +} + +// --- m3: array LINQ (linear scan) --- +def run_m3(b : B?; n : int) { + let arr <- fixture_array(n) + let key = n / 2 + b |> run("m3_array/{n}") { + let c = arr |> _where(_.id == key) |> count() + if (c == 0) { + b->failNow() + } + } +} + +// --- m3f: array LINQ folded into a single fused pass --- +def run_m3f(b : B?; n : int) { + let arr <- fixture_array(n) + let key = n / 2 + b |> run("m3f_array_fold/{n}") { + let c = _fold(each(arr)._where(_.id == key).count()) + if (c == 0) { + b->failNow() + } + } +} + +[benchmark] +def indexed_lookup_1m_m1(b : B?) { + run_m1(b, 1000000) +} + +[benchmark] +def indexed_lookup_1m_m3(b : B?) { + run_m3(b, 1000000) +} + +[benchmark] +def indexed_lookup_1m_m3f(b : B?) { + run_m3f(b, 1000000) +} diff --git a/tests/linq/test_linq_fold.das b/tests/linq/test_linq_fold.das index 1405e9de8e..180e96ceda 100644 --- a/tests/linq/test_linq_fold.das +++ b/tests/linq/test_linq_fold.das @@ -724,3 +724,33 @@ def test_order_distinct(t : T?) { } } +[test] +def test_where_count_fold(t : T?) { + // Guards the where + count fold rule in linq_boost (fold_where_count). + // The fused loop emits a single counter pass with no intermediate filter array. + t |> run("where + count: half match") @(t : T?) { + let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + let c = _fold(each(arr)._where(_ > 5).count()) + t |> equal(typeinfo typename(c), "int const") + t |> equal(5, c) + } + + t |> run("where + count: zero matches") @(t : T?) { + let arr <- [1, 2, 3, 4, 5] + let c = _fold(each(arr)._where(_ > 999).count()) + t |> equal(0, c) + } + + t |> run("where + count: all match") @(t : T?) { + let arr <- [1, 2, 3, 4, 5] + let c = _fold(each(arr)._where(_ > 0).count()) + t |> equal(5, c) + } + + t |> run("where + count: empty source") @(t : T?) { + let arr : array + let c = _fold(each(arr)._where(_ > 0).count()) + t |> equal(0, c) + } +} + diff --git a/utils/benchctl/main.das b/utils/benchctl/main.das index 43a9a7e6f2..f8de6dcd54 100644 --- a/utils/benchctl/main.das +++ b/utils/benchctl/main.das @@ -72,6 +72,9 @@ def main : int { def run_subcommand(parsed : ParsedArgs) : string { let cmd = parsed.command + if (cmd != "reset" && cmd != "insert" && cmd != "query" && cmd != "compare") { + return "unknown subcommand: {cmd}" + } let need_init = !file_exists(parsed.db) && (cmd == "insert" || cmd == "query" || cmd == "compare") var open_r <- try_open_sqlite(parsed.db) if (open_r |> is_err) { @@ -92,10 +95,15 @@ def run_subcommand(parsed : ParsedArgs) : string { return run_insert_cmd(db, parsed) } elif (cmd == "query") { return run_query_cmd(db, parsed) - } elif (cmd == "compare") { - return run_compare_cmd(db, parsed) } - return "unknown subcommand: {cmd}" + return run_compare_cmd(db, parsed) +} + +def validate_tag_chars(t : string) : string { + if (find(t, "[") != -1 || find(t, "]") != -1) { + return "tag '{t}' contains illegal character '[' or ']'" + } + return "" } def run_reset_cmd(db : SqlRunner) : string { @@ -125,8 +133,9 @@ def run_insert_cmd(db : SqlRunner; parsed : ParsedArgs) : string { if (t == "") { continue } - if (find(t, "[") != -1 || find(t, "]") != -1) { - return "tag '{t}' contains illegal character '[' or ']'" + let tag_err = validate_tag_chars(t) + if (tag_err != "") { + return tag_err } tag_string += "[{t}]" // nolint:PERF001 — typically 0-2 tags, not a hot path } @@ -181,6 +190,10 @@ def run_query_cmd(db : SqlRunner; parsed : ParsedArgs) : string { return "--tag may only be specified once for query" } let tag_filter = length(parsed.tags) > 0 ? parsed.tags[0] : "" + let tag_err = validate_tag_chars(tag_filter) + if (tag_err != "") { + return tag_err + } let entries <- query_benchmarks(db, parsed.commit, tag_filter) if (length(entries) == 0) { print("no results\n") @@ -210,14 +223,37 @@ def run_query_cmd(db : SqlRunner; parsed : ParsedArgs) : string { } def run_compare_cmd(db : SqlRunner; parsed : ParsedArgs) : string { + let old_tag_err = validate_tag_chars(parsed.old_tag) + if (old_tag_err != "") { + return old_tag_err + } + let new_tag_err = validate_tag_chars(parsed.new_tag) + if (new_tag_err != "") { + return new_tag_err + } + let old_entries <- query_benchmarks(db, parsed.old_commit, parsed.old_tag) - let new_entries <- query_benchmarks(db, parsed.new_commit, parsed.new_tag) + let raw_new_entries <- query_benchmarks(db, parsed.new_commit, parsed.new_tag) + + // Exclude rows already in --old from --new so identical / overlapping filters + // (e.g. both empty) cannot compare a result set against itself. + var old_id_set : table + for (e in old_entries) { + old_id_set |> insert(e.id, true) + } + var new_entries : array + new_entries |> reserve(length(raw_new_entries)) + for (e in raw_new_entries) { + if (!key_exists(old_id_set, e.id)) { + new_entries |> push_clone(e) + } + } if (length(old_entries) == 0) { return "no rows match --old-commit / --old-tag" } if (length(new_entries) == 0) { - return "no rows match --new-commit / --new-tag" + return "no rows match --new-commit / --new-tag (after excluding rows already in --old)" } var key_map = @(x : string) : string { From fa13a2ea51f89a8fda9d71d9a9bbf33333e9fc30 Mon Sep 17 00:00:00 2001 From: Boris Batkin Date: Thu, 7 May 2026 08:49:21 -0700 Subject: [PATCH 4/4] use sets --- CLAUDE.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CLAUDE.md b/CLAUDE.md index 2d12047e8e..67982e5731 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -207,6 +207,7 @@ Full migration table (when reading older docs that say `var inscope` or `<-` for - `table[key]` (read or assign) is **safe** — do NOT wrap in `unsafe(...)`. Some legacy daslib code has `unsafe(tab[k])`; do not propagate that pattern - **Move-assign table literal:** `tab <- { "k" => v }` works for both `var tab <- { ... }` declarations and `tab <- { ... }` reassignment to existing variables - **Table comprehension move-assign:** `tab <- { for(x in range(5)); x => x*x }` — same move-assign rules apply +- **`table` (one type param) is the set type** — value type elided. `var s : table; s |> insert(5); key_exists(s, 5)`. Distinct from `table` (the map form); both shapes coexist. ### Iterators and `each`