diff --git a/CLAUDE.md b/CLAUDE.md index 2d12047e8e..67982e5731 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -207,6 +207,7 @@ Full migration table (when reading older docs that say `var inscope` or `<-` for - `table[key]` (read or assign) is **safe** — do NOT wrap in `unsafe(...)`. Some legacy daslib code has `unsafe(tab[k])`; do not propagate that pattern - **Move-assign table literal:** `tab <- { "k" => v }` works for both `var tab <- { ... }` declarations and `tab <- { ... }` reassignment to existing variables - **Table comprehension move-assign:** `tab <- { for(x in range(5)); x => x*x }` — same move-assign rules apply +- **`table` (one type param) is the set type** — value type elided. `var s : table; s |> insert(5); key_exists(s, 5)`. Distinct from `table` (the map form); both shapes coexist. ### Iterators and `each` diff --git a/benchmarks/README.md b/benchmarks/README.md index 754d576ae1..323e4fbf60 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -57,3 +57,21 @@ Every `.das` benchmark file in this directory tree is listed below, grouped by s | File | Description | |---|---| | `bench_v_ldu.das` | Fusion-engine `Op2At` array-indexed read at sizeof(T) ∈ {4,8,12,16} — int, int64, float3, float4. Used to compare DAS_FUSION=0 vs current `DAS_LDU_WORKHORSE` ladder vs `v_zero+memcpy(sizeof(CTYPE))` | + +## sql/ + +3-mode comparison: `_sql` macro over `:memory:` SQLite vs pure in-memory `array` LINQ, with the array form measured both in its naive (intermediate-materializing) and `_fold`-fused shapes. Mirrors the `tests/dasSQLITE/parity_check_*.das` pattern but oriented to throughput. + +| Mode | Source | Form | +|---|---|---| +| `m1` | `:memory:` SQLite | `_sql` — compile-time SQL emission, work pushed to the engine | +| `m3` | pre-populated `array` | plain LINQ chain — materializes intermediate filter/sort arrays | +| `m3f` | pre-populated `array` | `_fold` from `daslib/linq_boost` — fuses the chain into a single pass, in-place where possible | + +| File | Description | +|---|---| +| `_common.das` | Shared `Car` `[sql_table]` + `fixture_db` / `fixture_array` (not a benchmark) | +| `select_where.das` | Filter chain — `_where(_.price > 500)` over 10K rows. Modest asymmetry; m3 walks every row. | +| `select_where_order_take.das` | Filter + sort + limit — `_where \|> _order_by(_.price) \|> take(10)`. SQL ORDER BY + LIMIT bounds work; m3 sorts the full filtered set. | +| `count_aggregate.das` | Aggregate — `count()` after `_where` over 1M rows. SQL pushes `COUNT(*)` to the engine returning one row; m3 materializes the full filtered array then counts it; m3f fuses where+count into one pass. Highest-asymmetry chain in daslang's favor. | +| `indexed_lookup.das` | Indexed point lookup — `_where(_.id == K)` against the PRIMARY KEY over 1M rows. SQLite uses the PK b-tree (O(log n)); m3/m3f have no index (O(n) linear scan). Inverse-asymmetry: SQLite wins by ~1000×, illustrating where indexed storage earns its keep. | diff --git a/benchmarks/sql/_common.das b/benchmarks/sql/_common.das new file mode 100644 index 0000000000..2952b975f1 --- /dev/null +++ b/benchmarks/sql/_common.das @@ -0,0 +1,35 @@ +options gen2 +options persistent_heap + +require daslib/sql public +require daslib/linq_boost public +require sqlite/sqlite_boost public +require sqlite/sqlite_linq public +require dastest/testing_boost public +require daslib/fio public + +[sql_table(name = "Cars")] +struct Car { + @sql_primary_key id : int + name : string + price : int +} + +def public fixture_db(db : SqlRunner; n : int) { + db |> create_table(type) + var rows : array + rows |> resize(n) + for (i in range(n)) { + rows[i] = Car(id = i + 1, name = "Car{i}", price = (i * 37) % 1000) + } + db |> insert(rows) +} + +def public fixture_array(n : int) : array { + var arr : array + arr |> resize(n) + for (i in range(n)) { + arr[i] = Car(id = i + 1, name = "Car{i}", price = (i * 37) % 1000) + } + return <- arr +} diff --git a/benchmarks/sql/count_aggregate.das b/benchmarks/sql/count_aggregate.das new file mode 100644 index 0000000000..728bb11f90 --- /dev/null +++ b/benchmarks/sql/count_aggregate.das @@ -0,0 +1,61 @@ +options gen2 +options persistent_heap + +require _common public + +let THRESHOLD = 500 + +// SQL pushes COUNT(*) to the engine returning one row. +// m3 must materialize the full filtered array, then walk to count it. +// m3f folds where+count into a single fused pass (no intermediate array). +// Highest-asymmetry comparison among the three benchmark chains. + +// --- m1: _sql over :memory: --- +def run_m1(b : B?; n : int) { + with_sqlite(":memory:") $(db) { + fixture_db(db, n) + b |> run("m1_sql/{n}", n) { + let c = _sql(db |> select_from(type) |> _where(_.price > THRESHOLD) |> count()) + if (c == 0) { + b->failNow() + } + } + } +} + +// --- m3: array LINQ (materializes intermediate filter array) --- +def run_m3(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3_array/{n}", n) { + let c = arr |> _where(_.price > THRESHOLD) |> count() + if (c == 0) { + b->failNow() + } + } +} + +// --- m3f: array LINQ folded into a single fused pass --- +def run_m3f(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3f_array_fold/{n}", n) { + let c = _fold(each(arr)._where(_.price > THRESHOLD).count()) + if (c == 0) { + b->failNow() + } + } +} + +[benchmark] +def count_aggregate_1m_m1(b : B?) { + run_m1(b, 1000000) +} + +[benchmark] +def count_aggregate_1m_m3(b : B?) { + run_m3(b, 1000000) +} + +[benchmark] +def count_aggregate_1m_m3f(b : B?) { + run_m3f(b, 1000000) +} diff --git a/benchmarks/sql/indexed_lookup.das b/benchmarks/sql/indexed_lookup.das new file mode 100644 index 0000000000..40ebe6d987 --- /dev/null +++ b/benchmarks/sql/indexed_lookup.das @@ -0,0 +1,62 @@ +options gen2 +options persistent_heap + +require _common public + +// Indexed point-lookup: _where(_.id == K) where id is the PRIMARY KEY. +// SQLite uses the PK b-tree -> O(log n). +// m3/m3f have no index -> O(n) linear scan over the array. +// Inverse-asymmetry of count_aggregate: m1 wins by complexity-class margin. + +// --- m1: _sql over :memory: --- +def run_m1(b : B?; n : int) { + with_sqlite(":memory:") $(db) { + fixture_db(db, n) + let key = n / 2 + b |> run("m1_sql/{n}") { + let c = _sql(db |> select_from(type) |> _where(_.id == key) |> count()) + if (c == 0) { + b->failNow() + } + } + } +} + +// --- m3: array LINQ (linear scan) --- +def run_m3(b : B?; n : int) { + let arr <- fixture_array(n) + let key = n / 2 + b |> run("m3_array/{n}") { + let c = arr |> _where(_.id == key) |> count() + if (c == 0) { + b->failNow() + } + } +} + +// --- m3f: array LINQ folded into a single fused pass --- +def run_m3f(b : B?; n : int) { + let arr <- fixture_array(n) + let key = n / 2 + b |> run("m3f_array_fold/{n}") { + let c = _fold(each(arr)._where(_.id == key).count()) + if (c == 0) { + b->failNow() + } + } +} + +[benchmark] +def indexed_lookup_1m_m1(b : B?) { + run_m1(b, 1000000) +} + +[benchmark] +def indexed_lookup_1m_m3(b : B?) { + run_m3(b, 1000000) +} + +[benchmark] +def indexed_lookup_1m_m3f(b : B?) { + run_m3f(b, 1000000) +} diff --git a/benchmarks/sql/select_where.das b/benchmarks/sql/select_where.das new file mode 100644 index 0000000000..9c1174765f --- /dev/null +++ b/benchmarks/sql/select_where.das @@ -0,0 +1,56 @@ +options gen2 +options persistent_heap + +require _common public + +let THRESHOLD = 500 + +// --- m1: _sql over :memory: --- +def run_m1(b : B?; n : int) { + with_sqlite(":memory:") $(db) { + fixture_db(db, n) + b |> run("m1_sql/{n}", n) { + let rows <- _sql(db |> select_from(type) |> _where(_.price > THRESHOLD)) + if (length(rows) == 0) { + b->failNow() + } + } + } +} + +// --- m3: array LINQ (materializing intermediate arrays) --- +def run_m3(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3_array/{n}", n) { + let rows <- (arr |> _where(_.price > THRESHOLD)) + if (length(rows) == 0) { + b->failNow() + } + } +} + +// --- m3f: array LINQ folded into a single fused pass --- +def run_m3f(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3f_array_fold/{n}", n) { + let rows <- _fold(each(arr)._where(_.price > THRESHOLD).to_array()) + if (length(rows) == 0) { + b->failNow() + } + } +} + +[benchmark] +def select_where_10k_m1(b : B?) { + run_m1(b, 10000) +} + +[benchmark] +def select_where_10k_m3(b : B?) { + run_m3(b, 10000) +} + +[benchmark] +def select_where_10k_m3f(b : B?) { + run_m3f(b, 10000) +} diff --git a/benchmarks/sql/select_where_order_take.das b/benchmarks/sql/select_where_order_take.das new file mode 100644 index 0000000000..128d44c285 --- /dev/null +++ b/benchmarks/sql/select_where_order_take.das @@ -0,0 +1,65 @@ +options gen2 +options persistent_heap + +require _common public + +let THRESHOLD = 500 +let TAKE_N = 10 + +// --- m1: _sql over :memory: --- +def run_m1(b : B?; n : int) { + with_sqlite(":memory:") $(db) { + fixture_db(db, n) + b |> run("m1_sql/{n}", n) { + let rows <- _sql(db |> select_from(type) + |> _where(_.price > THRESHOLD) + |> _order_by(_.price) + |> take(TAKE_N)) + if (length(rows) == 0) { + b->failNow() + } + } + } +} + +// --- m3: array LINQ (materializing intermediate arrays) --- +def run_m3(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3_array/{n}", n) { + let rows <- (arr |> _where(_.price > THRESHOLD) + |> _order_by(_.price) + |> take(TAKE_N)) + if (length(rows) == 0) { + b->failNow() + } + } +} + +// --- m3f: array LINQ folded into a single fused pass --- +def run_m3f(b : B?; n : int) { + let arr <- fixture_array(n) + b |> run("m3f_array_fold/{n}", n) { + let rows <- _fold(each(arr)._where(_.price > THRESHOLD) + ._order_by(_.price) + .take(TAKE_N) + .to_array()) + if (length(rows) == 0) { + b->failNow() + } + } +} + +[benchmark] +def select_where_order_take_10k_m1(b : B?) { + run_m1(b, 10000) +} + +[benchmark] +def select_where_order_take_10k_m3(b : B?) { + run_m3(b, 10000) +} + +[benchmark] +def select_where_order_take_10k_m3f(b : B?) { + run_m3f(b, 10000) +} diff --git a/daslib/linq_boost.das b/daslib/linq_boost.das index 8b7ee9866e..cc95f369a5 100644 --- a/daslib/linq_boost.das +++ b/daslib/linq_boost.das @@ -956,6 +956,11 @@ var private g_foldSeq = [ // those are applied in order calls = ["distinct", "order" ], folder = @@fold_order_distinct ), +// where + count (single-pass count, no intermediate filter array) + FoldSequence( + calls = ["where_", "count"], + folder = @@fold_where_count + ), // select and where FoldSequence( calls = ["where_", "select" ], @@ -1030,6 +1035,38 @@ def private fold_where(argIndex : int; var topValue : Expression?; var blk : Exp return append_comprehension(argIndex, topValue, comprehension, blk, calls[0]._0.at) } +[macro_function] +def private fold_where_count(argIndex : int; var topValue : Expression?; var blk : ExprBlock?; var calls : array>) : Expression? { + //! folds `_where(p) |> count()` into a single-pass loop with the predicate inlined — no intermediate filter array, no block-call overhead + var eWhere = calls[0]._0 + let srcName = "`source`{argIndex}`{eWhere.at.line}`{eWhere.at.column}" + let itName = "`it`{argIndex}`{eWhere.at.line}`{eWhere.at.column}" + let nName = "`n`{argIndex}`{eWhere.at.line}`{eWhere.at.column}" + var whereCond = fold_linq_cond(eWhere.arguments[1], itName) + var fusedCall : Expression? = qmacro(invoke($($i(srcName) : typedecl($e(topValue)) - const) { + var $i(nName) = 0 + for ($i(itName) in $i(srcName)) { + if ($e(whereCond)) { + $i(nName) ++ + } + } + return $i(nName) + }, $e(topValue))) + fusedCall.force_at(calls[0]._0.at) + fusedCall.force_generated(true) + let newArgName = "pass_{argIndex}" + blk.list |> emplace_new <| qmacro_expr() { + var $i(newArgName) = $e(fusedCall) + } + (blk.list.back() as ExprLet).variables[0].flags |= VariableFlags.can_shadow + if (argIndex != 0) { + blk.list |> emplace_new <| qmacro_expr() { + delete $e(topValue) + } + } + return qmacro($i(newArgName)) +} + [macro_function] def private fold_select(argIndex : int; var topValue : Expression?; var blk : ExprBlock?; var calls : array>) : Expression? { //! folds select into a single comprehension diff --git a/tests/linq/test_linq_fold.das b/tests/linq/test_linq_fold.das index 1405e9de8e..180e96ceda 100644 --- a/tests/linq/test_linq_fold.das +++ b/tests/linq/test_linq_fold.das @@ -724,3 +724,33 @@ def test_order_distinct(t : T?) { } } +[test] +def test_where_count_fold(t : T?) { + // Guards the where + count fold rule in linq_boost (fold_where_count). + // The fused loop emits a single counter pass with no intermediate filter array. + t |> run("where + count: half match") @(t : T?) { + let arr <- [1, 2, 3, 4, 5, 6, 7, 8, 9, 10] + let c = _fold(each(arr)._where(_ > 5).count()) + t |> equal(typeinfo typename(c), "int const") + t |> equal(5, c) + } + + t |> run("where + count: zero matches") @(t : T?) { + let arr <- [1, 2, 3, 4, 5] + let c = _fold(each(arr)._where(_ > 999).count()) + t |> equal(0, c) + } + + t |> run("where + count: all match") @(t : T?) { + let arr <- [1, 2, 3, 4, 5] + let c = _fold(each(arr)._where(_ > 0).count()) + t |> equal(5, c) + } + + t |> run("where + count: empty source") @(t : T?) { + let arr : array + let c = _fold(each(arr)._where(_ > 0).count()) + t |> equal(0, c) + } +} + diff --git a/utils/benchctl/README.md b/utils/benchctl/README.md index 851b203c65..b7b4706c65 100644 --- a/utils/benchctl/README.md +++ b/utils/benchctl/README.md @@ -13,7 +13,7 @@ benchctl stores benchmark output in a local SQLite database and provides statist Key capabilities: - Insert benchmark JSON output files into a persistent database, tagged by commit hash (and optionally custom tags) -- Query the database with raw SQL conditions (e.g. selecting by name or tags) +- Query the database with structured filter flags (`--commit`, `--tag`) - Compare two sets of results - Compute geometric mean deltas across all benchmarks in a comparison @@ -31,7 +31,7 @@ Key capabilities: daslang utils/benchctl/main.das -- [options...] ``` -All commands accept `--db ` to specify the database file (default: `benchdata.db`) and `--colors false` to disable ANSI color output. +All commands accept `--db ` to specify the database file (default: `benchdata.db`) and `--no-color` to disable ANSI color output. Consider using an explicit name for a long-term database while using the default `benchdata.db` as a scratch db you can reset between the experiments. @@ -44,8 +44,6 @@ Initializes (or reinitializes) the benchmark database. > **Warning:** drops all existing data. ``` -# The name is benchdata.db by default, so this explicit parameter -# is only used for demonstrative purposes daslang utils/benchctl/main.das -- reset --db benchdata.db ``` @@ -69,12 +67,12 @@ Options: |------|-------------| | `--db ` | Database file path (default: `benchdata.db`) | | `--commit ` | Git commit hash to tag results with (default: `git rev-parse HEAD`) | -| `--tag ` | Tag label to attach to results - can be repeated | +| `--tag ` | Tag label to attach to results - can be repeated (must not contain `[` or `]`) | Input files must contain newline-delimited JSON records as produced by the dastest benchmark runner. Non-JSON lines are silently skipped. ``` -# Adds all samples from result1.txt and result2.txt tagging them with 2 prodived tags, +# Adds all samples from result1.txt and result2.txt tagging them with 2 provided tags, # the commit hash will be "git rev-parse HEAD" (use --commit to override that) daslang utils/benchctl/main.das -- insert --tag example1 --tag foo result1.txt result2.txt ``` @@ -99,10 +97,10 @@ daslang utils/benchctl/main.das -- insert results.txt ### `query` -Displays benchmark records from the database. +Displays benchmark records from the database. Filters compose with AND. ``` -daslang utils/benchctl/main.das -- query [--db benchdata.db] [--select ] +daslang utils/benchctl/main.das -- query [--db benchdata.db] [--commit ] [--tag ] ``` Options: @@ -110,9 +108,10 @@ Options: | Flag | Description | |------|-------------| | `--db ` | Database file path | -| `--select ` | SQL `WHERE` clause to filter records | +| `--commit ` | Filter to records with this exact commit hash | +| `--tag ` | Filter to records carrying this tag (single-value; uses LIKE under the hood — empty `--tag` means no filter) | -This is mostly needed to debug the selection queries before using a more useful `compare` command. +This is mostly needed to debug the selection filters before using a more useful `compare` command. **Examples:** @@ -121,45 +120,18 @@ This is mostly needed to debug the selection queries before using a more useful daslang utils/benchctl/main.das -- query # Show records for a specific commit -daslang utils/benchctl/main.das -- query --select "commit_hash='abc12345'" +daslang utils/benchctl/main.das -- query --commit abc12345 -# Show only string-allocation-heavy benchmarks -daslang utils/benchctl/main.das -- query --select "string_allocs > 0" -``` - -You can use any columns from the `benchmarks` table to do the filtering. - -> Keep in mind: the table contains the samples, they are joined and analyzed together during the processing. +# Show records with a tag +daslang utils/benchctl/main.das -- query --tag before +# Combine filters (AND) +daslang utils/benchctl/main.das -- query --commit abc12345 --tag before ``` -id INTEGER -- an autoincrement ID of the sample - -commit_hash TEXT -- a git commit hash -tags TEXT -- a list of tags bundled inside a string (see below) -insert_date INTEGER -- the date of the "insert" command being executed for this sample -full_name TEXT -- a full sample's benchmark identifier, "{name}/{sub_name}" -name TEXT -- a benchmark's function name -sub_name TEXT -- a benchmark's "run" argument which specifies the subtest +If you need a one-off filter that the structured flags don't cover (e.g. `WHERE string_allocs > 0`), open the DB directly with the `sqlite3` shell — `benchctl` no longer accepts arbitrary SQL via the CLI. -mode TEXT -- the execution mode ("JIT", "INTERP", or "AOT") - -n INTEGER -- how many times the benchmarking function was executed - -time_ns INTEGER -- nanosecs per every operation run (time) -allocs INTEGER -- a number of non-string heap allocs -heap_bytes INTEGER -- a number of heap bytes allocated (excluding the string bytes) -string_allocs INTEGER -- like allocs, but only for heap strings -string_heap_bytes INTEGER -- like heap_bytes, but only for heap strings -``` - -Tags are stored as `[tag1][tag2]` strings, so you can filter by tag with `LIKE`: - -```sh -daslang utils/benchctl/main.das -- query --select "tags LIKE '%[before]%'" -``` - -> "Has no tag" condition can be implemented using the `NOT LIKE` operation. +> Stored columns: `id`, `commit_hash`, `tags`, `insert_date`, `full_name`, `name`, `sub_name`, `mode`, `n`, `time_ns`, `allocs`, `heap_bytes`, `string_allocs`, `string_heap_bytes`. Tags are stored as `[tag1][tag2]` strings. --- @@ -176,27 +148,29 @@ Options: | Flag | Description | |------|-------------| | `--db ` | Database file path | -| `--select_old ` | SQL `WHERE` clause for the baseline (old) results | -| `--select_new ` | SQL `WHERE` clause for the new results | +| `--old-commit ` | Baseline commit filter | +| `--old-tag ` | Baseline tag filter | +| `--new-commit ` | New commit filter | +| `--new-tag ` | New tag filter | | `--s to>` | Regex rename: rewrite old benchmark names to match new names | -| `--colors false` | Disable colored output | +| `--no-color` | Disable colored output | -> Both "select" arguments are abbreviated for convenience to `--old` and `--new`, but you're still encouraged to create an alias/shortcut for the most common use cases you might have. +Each side's filters compose with AND. Empty side filters mean "all rows for that side" — usually you want at least one filter per side. **Example - compare two commits:** ```sh daslang utils/benchctl/main.das -- compare \ - --select_old "commit_hash='abc12345'" \ - --select_new "commit_hash='def67890'" + --old-commit abc12345 \ + --new-commit def67890 ``` **Example - compare using tags:** ```sh daslang utils/benchctl/main.das -- compare \ - --select_old "tags LIKE '%[before]%'" \ - --select_new "tags LIKE '%[after]%'" + --old-tag before \ + --new-tag after ``` Both sample sets (old and new) will be compared over the matching `full_name`. This means only the same benchmark results (but across different revisions) can be compared. Unless you use a renaming rule. @@ -207,8 +181,8 @@ If benchmarks have different names, but logically can be compared to one another ```sh daslang utils/benchctl/main.das -- compare \ - --select_old "..." \ - --select_new "..." \ + --old-tag before \ + --new-tag after \ --s "BenchmarkBad=>BenchmarkGood" ``` @@ -273,8 +247,5 @@ daslang dastest/dastest.das -- --test mybench.das --bench --bench-format json | daslang utils/benchctl/main.das -- insert --tag after new.json # 4. Compare the two runs -# (note: using the aliased select_old and select_new) -daslang utils/benchctl/main.das -- compare \ - --old "tags LIKE '%[before]%'" \ - --new "tags LIKE '%[after]%'" +daslang utils/benchctl/main.das -- compare --old-tag before --new-tag after ``` diff --git a/utils/benchctl/bench_args.das b/utils/benchctl/bench_args.das new file mode 100644 index 0000000000..f73f75c41f --- /dev/null +++ b/utils/benchctl/bench_args.das @@ -0,0 +1,94 @@ +options gen2 + +require daslib/clargs + +[CommandLineArgs] +struct BenchctlArgs { + @clarg_positional + @clarg_doc = "Subcommand: reset / insert / query / compare / help" + command : Option + + @clarg_positional + @clarg_doc = "Files to insert (insert subcommand only)" + files : array + + @clarg_doc = "SQLite database path" + db : string = "benchdata.db" + + @clarg_name = "no-color" + @clarg_doc = "Disable colored output" + no_color : bool + + @clarg_doc = "Git commit hash for insert (default: git rev-parse HEAD); also filters query" + commit : string + + @clarg_doc = "Tag (insert: repeatable; query: filter)" + tag : array + + @clarg_name = "old-commit" + @clarg_doc = "Baseline commit hash filter (compare)" + old_commit : string + + @clarg_name = "old-tag" + @clarg_doc = "Baseline tag filter (compare)" + old_tag : string + + @clarg_name = "new-commit" + @clarg_doc = "New commit hash filter (compare)" + new_commit : string + + @clarg_name = "new-tag" + @clarg_doc = "New tag filter (compare)" + new_tag : string + + @clarg_doc = "Regex rename old=>new before pairing benchmark names (compare)" + s : string + + @clarg_short = "?" + @clarg_doc = "Show this help and exit" + help : bool +} + +struct public ParsedArgs { + ok : bool + err : string + command : string + files : array + db : string + no_color : bool + commit : string + tags : array + old_commit : string + old_tag : string + new_commit : string + new_tag : string + s : string + help : bool +} + +def public parse_benchctl_args(argv : array) : ParsedArgs { + var parse_r <- parse_args(type, argv) + if (parse_r |> is_err) { + return ParsedArgs(ok = false, err = parse_r |> unwrap_err) + } + var parsed <- parse_r |> move_unwrap + return <- ParsedArgs( + ok = true, + command = parsed.command ?? "", + files <- parsed.files, + db = parsed.db, + no_color = parsed.no_color, + commit = parsed.commit, + tags <- parsed.tag, + old_commit = parsed.old_commit, + old_tag = parsed.old_tag, + new_commit = parsed.new_commit, + new_tag = parsed.new_tag, + s = parsed.s, + help = parsed.help + ) +} + +def public print_benchctl_help() { + print(format_help_with_auto_help(get_command_info(type), "benchctl")) +} diff --git a/utils/benchctl/bench_sql.das b/utils/benchctl/bench_sql.das deleted file mode 100644 index 70b2bb0f82..0000000000 --- a/utils/benchctl/bench_sql.das +++ /dev/null @@ -1,82 +0,0 @@ -require sqlite/sqlite_boost - -require daslib/stringify -require daslib/defer - -def db_exec(db : sqlite3?; sql : string) : string { - var err_msg : string - defer() { - sqlite3_free(err_msg) - } - let rc = sqlite3_exec(db, sql, unsafe(addr(err_msg))) - return (rc != SQLITE_OK) ? clone_string(err_msg) : "" -} - -def db_bind_text_param(stmt : sqlite3_stmt?; key : string; v : string) { - let idx = sqlite3_bind_parameter_index(stmt, key) - sqlite3_bind_text(stmt, idx, v) -} - -def db_bind_int64_param(stmt : sqlite3_stmt?; key : string; v : int64) { - let idx = sqlite3_bind_parameter_index(stmt, key) - sqlite3_bind_int64(stmt, idx, v) -} - -let sql_bench_insert = %stringify~ - INSERT INTO benchmarks ( - commit_hash, - tags, - insert_date, - full_name, - name, - sub_name, - mode, - n, - time_ns, - allocs, - heap_bytes, - string_allocs, - string_heap_bytes - ) - VALUES ( - @commit_hash, - @tags, - strftime('%s', 'now'), - @name || '/' || @sub_name, - @name, - @sub_name, - @mode, - @n, - @time_ns, - @allocs, - @heap_bytes, - @string_allocs, - @string_heap_bytes - ); -%% - -let sql_db_init = %stringify~ - DROP TABLE IF EXISTS benchmarks; - - CREATE TABLE benchmarks ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - - commit_hash TEXT, - tags TEXT, - insert_date INTEGER, - - full_name TEXT, - name TEXT, - sub_name TEXT, - - mode TEXT, - - n INTEGER, - time_ns INTEGER, - - allocs INTEGER, - heap_bytes INTEGER, - string_allocs INTEGER, - string_heap_bytes INTEGER - ); -%% diff --git a/utils/benchctl/bench_table.das b/utils/benchctl/bench_table.das new file mode 100644 index 0000000000..c0e86763d2 --- /dev/null +++ b/utils/benchctl/bench_table.das @@ -0,0 +1,23 @@ +options gen2 + +require daslib/sql +require sqlite/sqlite_boost +require sqlite/sqlite_linq + +[sql_table(name = "benchmarks")] +struct Benchmark { + @sql_primary_key id : int + commit_hash : string + tags : string + insert_date : int64 + full_name : string + name : string + sub_name : string + mode : string + n : int64 + time_ns : int64 + allocs : int64 + heap_bytes : int64 + string_allocs : int64 + string_heap_bytes : int64 +} diff --git a/utils/benchctl/benchstat.das b/utils/benchctl/benchstat.das index 15fc403987..9966efdeec 100644 --- a/utils/benchctl/benchstat.das +++ b/utils/benchctl/benchstat.das @@ -1,33 +1,15 @@ +options gen2 + require dastest/testing_boost require daslib/strings_boost require daslib/json_boost require math -struct BenchmarkEntry { - id : int64 - - commit_hash : string - tags : string - insert_date : int64 - - full_name : string - name : string - sub_name : string - - mode : string - - n : int64 - time_ns : int64 - - allocs : int64 - heap_bytes : int64 - string_allocs : int64 - string_heap_bytes : int64 -} +require bench_table public struct BenchmarkSampleSet { key : string - list : array + list : array // Assigned later, when stats are computed. stats : BenchmarkStats? = null @@ -85,7 +67,7 @@ def parse_bench_output(data : string) : array { return <- entries } -def make_sample_sets(entries : array) : table { +def make_sample_sets(entries : array) : table { var result : table for (e in entries) { let key := e.name + "/" + e.sub_name diff --git a/utils/benchctl/flags.das b/utils/benchctl/flags.das deleted file mode 100644 index 6abda2606d..0000000000 --- a/utils/benchctl/flags.das +++ /dev/null @@ -1,51 +0,0 @@ -require strings -require daslib/strings_boost - -struct Flags { - tags : array - argmap : table - values : array -} - -def get_string_aliased_arg(flags : Flags; key : string; alias : string, default_value : string = "") : string { - return get_string_arg(flags, alias, get_string_arg(flags, key, default_value)) -} - -def get_string_arg(flags : Flags; key : string; default_value : string = "") : string { - return flags.argmap?[key] ?? default_value -} - -def parse_flags(args : array) : Flags { - var tags : array - var values : array - var argmap : table - var i = 0 - while (i < length(args)) { - if (args[i] == "--tag") { - i++ - if (i < length(args)) { - let tag = args[i] - i++ - tags |> push(tag) - } - continue - } - if (starts_with(args[i], "--")) { - let key = args[i] - i++ - if (i < length(args)) { - let val = args[i] - i++ - argmap |> insert(slice(key, 2), val) - } - continue - } - values |> push(args[i]) - i++ - } - return Flags( - values <- values, - argmap <- argmap, - tags <- tags, - ) -} diff --git a/utils/benchctl/main.das b/utils/benchctl/main.das index 3b78b5ae81..f8de6dcd54 100644 --- a/utils/benchctl/main.das +++ b/utils/benchctl/main.das @@ -1,191 +1,266 @@ options gen2 -require sqlite/sqlite_boost - require daslib/fio -require math require daslib/defer require daslib/regex_boost require daslib/strings_boost +require math +require strings -require bench_sql -require utils -require flags +require sqlite/sqlite_boost +require sqlite/sqlite_linq +require bench_table +require bench_args require benchstat +require utils require table_fmt -[export] -def main() { - let err := run_main() - var exit_code = 0 - if (err != "") { - print("[{red_str("error")}] {err}\n") - exit_code = 1 - } - return exit_code +def print_usage() { + print("benchctl - benchmark database management tool\n\n") + print("Usage:\n") + print(" daslang utils/benchctl/main.das -- [options]\n\n") + print("Commands:\n") + print(" reset Initialize or reinitialize the benchmark database (drops all data)\n") + print(" insert Insert benchmark JSON output files into the database\n") + print(" query Display benchmark records (filter by --commit / --tag)\n") + print(" compare Compare two sets of results statistically\n") + print(" help Show this help message\n\n") + print_benchctl_help() } -def run_main() : string { - var args <- get_command_line_arguments() - let arg_index = find_index(args, "--") - if (arg_index == -1) { - return "-- separator not found" +[export] +def main : int { + var argv <- get_command_line_arguments() + let dash_idx = find_index(argv, "--") + if (dash_idx != -1) { + argv <- subarray(argv, (dash_idx + 1)..length(argv)) } - var script_args <- subarray(args, (arg_index + 1)..length(args)) - if (length(script_args) == 0) { - return "expected a subcommand name" + + if (empty(argv) || find_index(argv, "--help") != -1 || find_index(argv, "-h") != -1) { + print_usage() + return 0 } - let subcmd_name = script_args[0] - let subcmd_args <- subarray(script_args, 1..length(script_args)) - let flags = parse_flags(subcmd_args) + let parsed <- parse_benchctl_args(argv) + if (!parsed.ok) { + print("[{red_str("error")}] {parsed.err}\n\n") + print_usage() + return 1 + } - if (get_string_arg(flags, "colors", "true") == "false") { + if (parsed.no_color) { colored_output = false } - if (subcmd_name == "help") { - return run_help_cmd() + if (parsed.help) { + print_usage() + return 0 } - let db_filename = get_string_arg(flags, "db", "benchdata.db") - var need_db_init = !file_exists(db_filename) && find_index([ - "insert", - "query", - "compare", - ], subcmd_name) != -1 + if (parsed.command == "" || parsed.command == "help") { + print_usage() + return 0 + } - var db : sqlite3? - defer() { - sqlite3_close(db) + let err = run_subcommand(parsed) + if (err != "") { + print("[{red_str("error")}] {err}\n") + return 1 } - var rc = sqlite3_open(db_filename, unsafe(addr(db))) - if (rc != SQLITE_OK) { - return "open database: {sqlite3_errmsg(db)}" + return 0 +} + +def run_subcommand(parsed : ParsedArgs) : string { + let cmd = parsed.command + if (cmd != "reset" && cmd != "insert" && cmd != "query" && cmd != "compare") { + return "unknown subcommand: {cmd}" } + let need_init = !file_exists(parsed.db) && (cmd == "insert" || cmd == "query" || cmd == "compare") + var open_r <- try_open_sqlite(parsed.db) + if (open_r |> is_err) { + return "open database: {open_r |> unwrap_err}" + } + var inscope db <- open_r._value - if (need_db_init) { - let err = run_reset_cmd(db, flags) - if (err != "") { - return "implicit db creation: {err}" + if (need_init) { + let init_err = run_reset_cmd(db) + if (init_err != "") { + return init_err } } - if (subcmd_name == "reset") { - return run_reset_cmd(db, flags) - } elif (subcmd_name == "insert") { - return run_insert_cmd(db, flags) - } elif (subcmd_name == "query") { - return run_query_cmd(db, flags) - } elif (subcmd_name == "compare") { - return run_compare_cmd(db, flags) - } else { - return "unknown subcommand name: {subcmd_name}" + if (cmd == "reset") { + return run_reset_cmd(db) + } elif (cmd == "insert") { + return run_insert_cmd(db, parsed) + } elif (cmd == "query") { + return run_query_cmd(db, parsed) } + return run_compare_cmd(db, parsed) +} +def validate_tag_chars(t : string) : string { + if (find(t, "[") != -1 || find(t, "]") != -1) { + return "tag '{t}' contains illegal character '[' or ']'" + } return "" } -def run_help_cmd() : string { - // TODO: it would be better if we could get this help command for free - // using a command line library (e.g. like "flag" package in Go) - - print("benchctl - benchmark database management tool\n\n") - print("Usage:\n") - print(" daslang benchctl/main.das -- [options]\n\n") - print("Commands:\n") - print(" reset Initialize or reinitialize the benchmark database (drops all data)\n") - print(" insert Insert benchmark results from JSON output files\n") - print(" query Query and display stored benchmark records\n") - print(" compare Compare two sets of benchmark results statistically\n") - print(" help Show this help message\n\n") - print("Common options:\n") - print(" --db SQLite3 database file path (default: benchdata.db)\n") - print(" --colors false Disable colored terminal output\n\n") - print("insert options:\n") - print(" --commit Git commit hash to tag results with (default: git rev-parse HEAD)\n") - print(" --tag Tag label to attach to results (can be repeated)\n") - print(" ... Benchmark JSON output files to insert\n\n") - print("query options:\n") - print(" --select SQL WHERE clause to filter records\n\n") - print("compare options:\n") - print(" --select_old SQL WHERE clause for baseline (old) results\n") - print(" --select_new SQL WHERE clause for new results\n") - print(" --s to> Regex rename: map old benchmark names to new names\n") - print(" --old --select_old alias\n") - print(" --new --select_new alias\n") +def run_reset_cmd(db : SqlRunner) : string { + let drop_err = db |> try_drop_table_if_exists(type) + if (drop_err |> is_some) { + return "drop table: {drop_err |> unwrap}" + } + let create_err = db |> try_create_table(type) + if (create_err |> is_some) { + return "create table: {create_err |> unwrap}" + } return "" } -def query_benchmarks(db : sqlite3?; cond : string; exclude : table = default>) : tuple, string> { - var result : array - - var sql_text = "SELECT * FROM benchmarks" - if (cond != "") { - sql_text += " WHERE {cond}" - } - - var err_msg : string - defer() { - sqlite3_free(err_msg) - } - let rc = sqlite3_exec(db, sql_text, unsafe(addr(err_msg))) $(values, columns) { - var entry : BenchmarkEntry - for (v, c in values, columns) { - if (c == "id") { - entry.id = to_int64(v) - } elif (c == "commit_hash") { - entry.commit_hash := v - } elif (c == "tags") { - entry.tags := v - } elif (c == "insert_date") { - entry.insert_date = to_int64(v) - } elif (c == "full_name") { - entry.full_name := v - } elif (c == "name") { - entry.name := v - } elif (c == "sub_name") { - entry.sub_name := v - } elif (c == "mode") { - entry.mode := v - } elif (c == "n") { - entry.n = to_int64(v) - } elif (c == "time_ns") { - entry.time_ns = to_int64(v) - } elif (c == "allocs") { - entry.allocs = to_int64(v) - } elif (c == "heap_bytes") { - entry.heap_bytes = to_int64(v) - } elif (c == "string_allocs") { - entry.string_allocs = to_int64(v) - } elif (c == "string_heap_bytes") { - entry.string_heap_bytes = to_int64(v) - } +def run_insert_cmd(db : SqlRunner; parsed : ParsedArgs) : string { + if (length(parsed.files) == 0) { + return "missing targets to insert" + } + + var commit_hash = parsed.commit + if (commit_hash == "") { + commit_hash = cmd_exec("git rev-parse HEAD") + } + + var tag_string = "" + for (t in parsed.tags) { + if (t == "") { + continue } - if (!key_exists(exclude, entry.id)) { - result |> push(entry) + let tag_err = validate_tag_chars(t) + if (tag_err != "") { + return tag_err } - return SQLITE_OK + tag_string += "[{t}]" // nolint:PERF001 — typically 0-2 tags, not a hot path } - var err = "" - if (rc != SQLITE_OK) { - err = clone_string(sqlite3_errmsg(db)) + let now = int64(get_clock()) + + for (filename in parsed.files) { + let data = read_file(filename) + if (data == "") { + print("{filename}: ignored\n") + continue + } + let stats_entries = parse_bench_output(data) + var rows : array + rows |> reserve(length(stats_entries)) + for (e in stats_entries) { + rows |> push(Benchmark( + id = 0, + commit_hash = commit_hash, + tags = tag_string, + insert_date = now, + full_name = "{e.name}/{e.sub_name}", + name = e.name, + sub_name = e.sub_name, + mode = e.func_type, + n = int64(e.n), + time_ns = e.time_ns, + allocs = int64(e.allocs), + heap_bytes = int64(e.heap_bytes), + string_allocs = int64(e.string_allocs), + string_heap_bytes = int64(e.string_heap_bytes), + )) + } + let txn_err = db |> try_insert(rows) + if (txn_err |> is_err) { + return "insert {filename}: {txn_err |> unwrap_err}" + } + print("{filename}: added {length(rows)} samples\n") } + return "" +} - return (<- result, err) +def query_benchmarks(db : SqlRunner; commit_filter : string; tag_filter : string) : array { + let want_tag = (tag_filter == "" ? "" : "[{tag_filter}]") + return <- _sql(db |> select_from(type) + |> _where(commit_filter == "" || _.commit_hash == commit_filter) + |> _where(want_tag == "" || _.tags |> contains(want_tag))) } -def run_compare_cmd(db : sqlite3?; flags : Flags) : string { - var cond_old = get_string_aliased_arg(flags, "select_old", "old", "") - var cond_new = get_string_aliased_arg(flags, "select_new", "new", "") +def run_query_cmd(db : SqlRunner; parsed : ParsedArgs) : string { + if (length(parsed.tags) > 1) { + return "--tag may only be specified once for query" + } + let tag_filter = length(parsed.tags) > 0 ? parsed.tags[0] : "" + let tag_err = validate_tag_chars(tag_filter) + if (tag_err != "") { + return tag_err + } + let entries <- query_benchmarks(db, parsed.commit, tag_filter) + if (length(entries) == 0) { + print("no results\n") + return "" + } + var table_rows : array + table_rows |> reserve(length(entries)) + for (e in entries) { + let commit_short = length(e.commit_hash) > 8 ? slice(e.commit_hash, 0, 8) : e.commit_hash + let ns_per_op = e.n > 0l ? double(e.time_ns) / double(e.n) : 0.0lf + table_rows |> push(new TableRow(columns <- [ + e.full_name, + commit_short, + e.tags, + "{e.n}", + "{ns_per_op:.1f} ns/op", + ])) + } + print(" {yellow_str("benchmark")} {yellow_str("commit")} {yellow_str("tags")} {yellow_str("n")} {yellow_str("ns/op")}\n\n") + var tabspec = new TableData( + rows <- table_rows, + col_gap = 2, + indent = " ", + ) + print(format_table(tabspec)) + return "" +} + +def run_compare_cmd(db : SqlRunner; parsed : ParsedArgs) : string { + let old_tag_err = validate_tag_chars(parsed.old_tag) + if (old_tag_err != "") { + return old_tag_err + } + let new_tag_err = validate_tag_chars(parsed.new_tag) + if (new_tag_err != "") { + return new_tag_err + } + + let old_entries <- query_benchmarks(db, parsed.old_commit, parsed.old_tag) + let raw_new_entries <- query_benchmarks(db, parsed.new_commit, parsed.new_tag) + + // Exclude rows already in --old from --new so identical / overlapping filters + // (e.g. both empty) cannot compare a result set against itself. + var old_id_set : table + for (e in old_entries) { + old_id_set |> insert(e.id, true) + } + var new_entries : array + new_entries |> reserve(length(raw_new_entries)) + for (e in raw_new_entries) { + if (!key_exists(old_id_set, e.id)) { + new_entries |> push_clone(e) + } + } + + if (length(old_entries) == 0) { + return "no rows match --old-commit / --old-tag" + } + if (length(new_entries) == 0) { + return "no rows match --new-commit / --new-tag (after excluding rows already in --old)" + } var key_map = @(x : string) : string { return x } - var key_mapper = get_string_arg(flags, "s", "") - if (key_mapper != "") { - let parts <- split(key_mapper, "=>") + if (parsed.s != "") { + let parts <- split(parsed.s, "=>") if (length(parts) != 2) { return "invalid -s argument, expected 'from=>to' format" } @@ -195,20 +270,6 @@ def run_compare_cmd(db : sqlite3?; flags : Flags) : string { } } - let (old_entries, err) = query_benchmarks(db, cond_old) - if (err != "") { - return "query --old: {err}" - } - var old_ids : table - for (e in old_entries) { - old_ids |> insert(e.id) - } - - let (new_entries, err) = query_benchmarks(db, cond_new, old_ids) - if (err != "") { - return "query --new: {err}" - } - var old_samples = make_sample_sets(old_entries) var new_samples = make_sample_sets(new_entries) @@ -383,105 +444,6 @@ def run_compare_cmd(db : sqlite3?; flags : Flags) : string { return "" } -def run_query_cmd(db : sqlite3?; flags : Flags) : string { - var cond = get_string_arg(flags, "select", "") - let (entries, err) = query_benchmarks(db, cond) - if (err != "") { - return "run query: {err}" - } - if (length(entries) == 0) { - print("no results\n") - return "" - } - var table_rows : array - for (e in entries) { - let commit_short = length(e.commit_hash) > 8 ? slice(e.commit_hash, 0, 8) : e.commit_hash - let ns_per_op = e.n > 0l ? double(e.time_ns) / double(e.n) : 0.0lf - table_rows |> push(new TableRow(columns <- [ - e.full_name, - commit_short, - e.tags, - "{e.n}", - "{ns_per_op:.1f} ns/op", - ])) - } - print(" {yellow_str("benchmark")} {yellow_str("commit")} {yellow_str("tags")} {yellow_str("n")} {yellow_str("ns/op")}\n\n") - var tabspec = new TableData( - rows <- table_rows, - col_gap = 2, - indent = " ", - ) - print(format_table(tabspec)) - return "" -} - -def run_reset_cmd(db : sqlite3?; flags : Flags) : string { - var err = db_exec(db, sql_db_init) - if (err != "") { - return "run init query: {err}" - } - - return "" -} - -def run_insert_cmd(db : sqlite3?; flags : Flags) : string { - if (length(flags.values) == 0) { - return "missing targets to insert" - } - - var commit_hash = get_string_arg(flags, "commit", "") - if (commit_hash == "") { - commit_hash = cmd_exec("git rev-parse HEAD") - } - - var tag_string = "" - for (t in flags.tags) { - tag_string += "[{t}]" - } - - var stmt : sqlite3_stmt? - let rc = sqlite3_prepare_v2(db, sql_bench_insert, -1, unsafe(addr(stmt)), null) - if (rc != SQLITE_OK) { - return "prepare insertion statement: {sqlite3_errmsg(db)}" - } - defer() { - sqlite3_finalize(stmt) - } - - for (filename in flags.values) { - let data = read_file(filename) - if (data == "") { - print("{filename}: ignored\n") - continue - } - let entries = parse_bench_output(data) - var added = 0 - for (e in entries) { - db_bind_text_param(stmt, "@commit_hash", commit_hash) - db_bind_text_param(stmt, "@tags", tag_string) - db_bind_text_param(stmt, "@name", e.name) - db_bind_text_param(stmt, "@sub_name", e.sub_name) - db_bind_text_param(stmt, "@mode", e.func_type) - db_bind_int64_param(stmt, "@n", int64(e.n)) - db_bind_int64_param(stmt, "@time_ns", e.time_ns) - db_bind_int64_param(stmt, "@allocs", int64(e.allocs)) - db_bind_int64_param(stmt, "@heap_bytes", int64(e.heap_bytes)) - db_bind_int64_param(stmt, "@string_allocs", int64(e.string_allocs)) - db_bind_int64_param(stmt, "@string_heap_bytes", int64(e.string_heap_bytes)) - let step_rc = sqlite3_step(stmt) - if (step_rc != SQLITE_DONE) { - return "insert failed: {sqlite3_errmsg(db)}" - } - added++ - sqlite3_reset(stmt); - sqlite3_clear_bindings(stmt); - } - print("{filename}: added {added} samples\n") - } - - return "" -} - def warning(msg : string) { print("{yellow_str("[warning]")} {msg}\n") } diff --git a/utils/benchctl/table_fmt.das b/utils/benchctl/table_fmt.das index 92a79bcee6..703f19fefc 100644 --- a/utils/benchctl/table_fmt.das +++ b/utils/benchctl/table_fmt.das @@ -1,3 +1,5 @@ +options gen2 + require math require strings diff --git a/utils/benchctl/utils.das b/utils/benchctl/utils.das index 6859f7dd2b..2f29eb1552 100644 --- a/utils/benchctl/utils.das +++ b/utils/benchctl/utils.das @@ -1,3 +1,5 @@ +options gen2 + require daslib/fio require strings require daslib/strings_boost @@ -24,7 +26,7 @@ def file_exists(filename : string) : bool { def read_file(filename : string) : string { var result = "" - fopen(filename, "r") $(f) { + fopen(filename, "rb") $(f) { if (f != null) { result = fread(f) }