diff --git a/CLAUDE.md b/CLAUDE.md index d1ccee315b..080603edf7 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -214,7 +214,7 @@ Full migration table (when reading older docs that say `var inscope` or `<-` for - `table[key]` (read or assign) is **safe** — do NOT wrap in `unsafe(...)`. Some legacy daslib code has `unsafe(tab[k])`; do not propagate that pattern - **Move-assign table literal:** `tab <- { "k" => v }` works for both `var tab <- { ... }` declarations and `tab <- { ... }` reassignment to existing variables - **Table comprehension move-assign:** `tab <- { for(x in range(5)); x => x*x }` — same move-assign rules apply -- **`table` (one type param) is the set type** — value type elided. `var s : table; s |> insert(5); key_exists(s, 5)`. Distinct from `table` (the map form); both shapes coexist. +- **`table` (one type param) is the set type** — value type elided. `var s : table; s |> insert(5); key_exists(s, 5)`. Distinct from `table` (the map form); both shapes coexist. Set-literal init: `let STOP_WORDS : table <- { "a", "an", "the" }` — value-less braces, comma-separated. Use this instead of declaring `var X : table` and populating in an `[init]` function. ### Iterators and `each` diff --git a/CMakeLists.txt b/CMakeLists.txt index 873a09a406..c548603734 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1573,6 +1573,15 @@ install(DIRECTORY ${PROJECT_SOURCE_DIR}/utils/mouse/tests/ FILES_MATCHING PATTERN "*.das" PATTERN "*.md" ) +# Install utils/common (git-aware staleness signature shared between +# utils/mcp/tools/cpp_common and utils/mouse/index). +file(GLOB DAS_UTILS_COMMON_FILES ${PROJECT_SOURCE_DIR}/utils/common/*.das) +install(FILES ${DAS_UTILS_COMMON_FILES} DESTINATION utils/common) +install(DIRECTORY ${PROJECT_SOURCE_DIR}/utils/common/tests/ + DESTINATION utils/common/tests + FILES_MATCHING PATTERN "*.das" +) + # Install daspkg (package manager) file(GLOB DAS_DASPKG_FILES ${PROJECT_SOURCE_DIR}/utils/daspkg/*.das) install(FILES ${DAS_DASPKG_FILES} DESTINATION utils/daspkg) diff --git a/daslib/fio.das b/daslib/fio.das index 0ec17bd74a..3eb8307bbe 100644 --- a/daslib/fio.das +++ b/daslib/fio.das @@ -704,3 +704,26 @@ def rmdir_rec_result(path : string) : fs_result_bool { } return fs_result_bool(value = res) } + +def run_and_capture(args : array; var output : string&; timeout_sec : float = 0.0) : int { + //! Run an external command and capture its stdout+stderr (merged into one + //! pipe by the underlying ``popen_argv``). Returns the process exit code; + //! ``output`` is filled with whatever the child wrote to either stream. + //! + //! ``args[0]`` is the executable; remaining elements are positional arguments. + //! ``timeout_sec > 0`` kills the process tree after that many seconds (returns + //! ``popen_timed_out``); ``timeout_sec <= 0`` means no timeout. + //! + //! Argv-based: bypasses the shell entirely (Windows: CreateProcess; Unix: + //! fork+execvp). No ``cmd.exe`` quote-stripping, no ``/bin/sh`` ``$()``/backtick + //! expansion — every argv element reaches the child verbatim. Callers don't + //! (and shouldn't) quote arguments themselves. + var captured : string + let exit_code = unsafe(popen_argv(args, timeout_sec, $(f) { + if (f != null) { + captured := unsafe(fread_to_eof(f)) + } + })) + output := captured + return exit_code +} diff --git a/daslib/json_boost.das b/daslib/json_boost.das index 3d2a22d169..2cc3bc9304 100644 --- a/daslib/json_boost.das +++ b/daslib/json_boost.das @@ -155,20 +155,11 @@ def operator ?. value(var a : JsonValue? ==const) : JsValue? { [macro_function] def private is_json_ptr_value(td : TypeDeclPtr) { //! Checks if the type is a pointer to json::JsonValue - if (td.baseType != Type.tPointer) { - return false - } - if (td.firstType == null) { - return false - } - if (td.firstType.baseType != Type.tStructure) { + if (td.baseType != Type.tPointer || td.firstType == null || td.firstType.baseType != Type.tStructure) { return false } let st = td.firstType.structType - if (st.name != "JsonValue" && st._module.name != "json") { - return false - } - return true + return st.name == "JsonValue" && st._module.name == "json" } @@ -438,7 +429,7 @@ def public parse_json_annotation(name : string; annotation : array 0 && first_character(name) == '_') { + } elif (ann.data is tBool && !empty(name) && first_character(name) == '_') { fieldState.argName = slice(name, 1) } } elif (ann.name == "enum_as_int" && ann.data is tBool) { @@ -602,7 +593,13 @@ def JV(value : auto(TT)) : JsonValue? { if (field == null) { return ; } } else { static_if (typeinfo is_workhorse(field)) { - if (field == default) { return ; } + // bool path uses `!field` (avoids STYLE018 on `bool == false` after + // generic instantiation); numeric workhorses keep the generic-zero compare. + static_if (typeinfo stripped_typename(field) == "bool") { + if (!field) { return ; } + } else { + if (field == default) { return ; } + } } } } diff --git a/daslib/strings_boost.das b/daslib/strings_boost.das index 5afefb212f..202b79194b 100644 --- a/daslib/strings_boost.das +++ b/daslib/strings_boost.das @@ -205,6 +205,37 @@ def levenshtein_distance_fast(s, t : string implicit) : int { return v0[tLen] } +def public jaccard(a : table; b : table) : float { + //! Jaccard similarity over two string-sets: ``|intersection| / |union|`` in 0..1. + //! Empty either side returns 0.0. Use ``table`` (the set form) so + //! the intersect lookup is O(1). + if (empty(a) || empty(b)) { + return 0.0f + } + var intersect = 0 + for (k in keys(a)) { + if (key_exists(b, k)) { + intersect ++ + } + } + let unionSize = length(a) + length(b) - intersect + return float(intersect) / float(unionSize) +} + +def public jaccard(a, b : array) : float { + //! Jaccard similarity over two string arrays. Builds two ``table`` + //! sets and delegates — convenient when callers don't already have sets. + var sa : table + for (x in a) { + sa |> insert(x) + } + var sb : table + for (x in b) { + sb |> insert(x) + } + return jaccard(sa, sb) +} + def replace_multiple(source : string; replaces : array>) { //! replaces occurances of multiple strings in a string. does not support overlap if (empty(source) || empty(replaces)) { diff --git a/doc/reflections/das2rst.das b/doc/reflections/das2rst.das index 22e27adc33..53a28c3af6 100644 --- a/doc/reflections/das2rst.das +++ b/doc/reflections/das2rst.das @@ -193,7 +193,7 @@ def document_module_fio(root : string) { group_by_regex("Directory manipulation", mod, %regex~(dir|dir_rec|mkdir|mkdir_rec|mkdir_result|rmdir|rmdir_rec|rmdir_rec_result|rmdir_result|chdir|getcwd)$%%), group_by_regex("Glob and pattern matching", mod, %regex~(match_glob|glob|glob_filtered|is_glob_pattern|expand_glob|parse_file_list)$%%), group_by_regex("Filesystem queries", mod, %regex~(temp_directory|temp_directory_result|create_temp_file|create_temp_file_result|create_temp_directory|create_temp_directory_result|disk_space)$%%), - group_by_regex("OS specific routines", mod, %regex~(sleep|exit|system|popen|popen_binary|popen_timeout|popen_argv|popen_timed_out|get_env_variable|sanitize_command_line|has_env_variable)$%%), + group_by_regex("OS specific routines", mod, %regex~(sleep|exit|system|popen|popen_binary|popen_timeout|popen_argv|popen_timed_out|run_and_capture|get_env_variable|sanitize_command_line|has_env_variable)$%%), group_by_regex("Dynamic modules", mod, %regex~(register_dynamic_module|register_native_path)$%%) ) documents("File input output library", mod, "fio.rst", groups) @@ -376,7 +376,7 @@ def document_module_strings_boost(root : string) { group_by_regex("Search and match", mod, %regex~(last_index_of|glob_match|text_match)$%%), group_by_regex("Replace", mod, %regex~(replace_multiple)$%%), group_by_regex("Prefix and suffix", mod, %regex~(trim_prefix|trim_suffix)$%%), - group_by_regex("Levenshtein distance", mod, %regex~(levenshtein_distance|levenshtein_distance_fast)$%%), + group_by_regex("String similarity", mod, %regex~(levenshtein_distance|levenshtein_distance_fast|jaccard)$%%), group_by_regex("Character traits", mod, %regex~(is_hex|is_tab_or_space)$%%)) document("Boost package for string manipulation library", mod, "strings_boost.rst", groups) } diff --git a/doc/source/stdlib/handmade/function-strings_boost-jaccard-0x4338ed9b289792ae.rst b/doc/source/stdlib/handmade/function-strings_boost-jaccard-0x4338ed9b289792ae.rst new file mode 100644 index 0000000000..00555454f1 --- /dev/null +++ b/doc/source/stdlib/handmade/function-strings_boost-jaccard-0x4338ed9b289792ae.rst @@ -0,0 +1 @@ +Jaccard similarity over two string-sets, returning ``|intersection| / |union|`` in 0..1. Empty either side returns 0. Pass two ``table`` (the set form) for O(1) intersect lookup, or two ``array`` and the array overload will build the sets internally. diff --git a/mouse-data/docs/das2rst-emits-a-stub-for-my-new-public-daslib-function-even-though-i-added-a-doc-comment-what-s-the-right-fix.md b/mouse-data/docs/das2rst-emits-a-stub-for-my-new-public-daslib-function-even-though-i-added-a-doc-comment-what-s-the-right-fix.md new file mode 100644 index 0000000000..843713b703 --- /dev/null +++ b/mouse-data/docs/das2rst-emits-a-stub-for-my-new-public-daslib-function-even-though-i-added-a-doc-comment-what-s-the-right-fix.md @@ -0,0 +1,37 @@ +--- +slug: das2rst-emits-a-stub-for-my-new-public-daslib-function-even-though-i-added-a-doc-comment-what-s-the-right-fix +title: das2rst emits a // stub for my new public daslib function even though I added a //! doc-comment — what's the right fix? +created: 2026-05-09 +last_verified: 2026-05-09 +links: [] +--- + +Two things are going on; the `documentation_rst.md` skill rolls them together as "add `//!` instead of filling the stub" but the real story has a wrinkle. + +**1. `//!` placement — must be INSIDE the function body, not before `def`.** +For pure-daslang functions in `daslib/*.das`, `rst_comment.das` extracts `//!` comments only when they appear as the first line(s) of the function body: + +```daslang +def public foo(x : int) : int { + //! Docs go HERE — first lines of the body. + //! Multi-line continues like this. + return x + 1 +} +``` + +`//!` placed *before* `def` is silently ignored. Symptom: regen still produces a `// stub` placeholder under `doc/source/stdlib/handmade/function---.rst`. The fix is to move the doc-comment inside the body and re-run `das2rst`. + +**2. Some daslib modules expect BOTH a `//!` body comment AND a per-symbol `handmade/*.rst`.** +Modules like `strings_boost`, `fio`, and other long-established ones have a per-symbol `handmade/*.rst` for *every* function — see e.g. `function-strings_boost-levenshtein_distance-0xbb5a4a3017b240a5.rst`. When you add a new public function to one of those modules, `das2rst` will emit a fresh `// stub` even with correctly-placed `//!`. The convention there is: keep the `//!` (it lands in `detail/`) **and** fill the stub with a 1-2 sentence handmade description. Don't delete the stub — re-running `das2rst` recreates it. + +Newer modules like `archive`, `json_boost`, `command_line` don't have per-symbol handmade entries; for those, `//!` alone is enough. + +**How to apply:** +- New daslib public function → add `//!` inside body first. +- Run `bin/daslang doc/reflections/das2rst.das`. +- `grep -rln "// stub" doc/source/stdlib/handmade/` — if your function appears, fill that file with a short description (plain text, no RST directives). If your function doesn't appear, you're done. +- Re-run `das2rst` to confirm clean. +- Verify `grep -c Uncategorized doc/source/stdlib/generated/*.rst | grep -v ':0$'` is empty (means the function is in a `group_by_regex` group in `das2rst.das`). + +## Questions +- das2rst emits a // stub for my new public daslib function even though I added a //! doc-comment — what's the right fix? diff --git a/mouse-data/docs/how-do-i-capture-a-function-param-var-state-t-struct-ref-by-reference-into-a-daslang-lambda.md b/mouse-data/docs/how-do-i-capture-a-function-param-var-state-t-struct-ref-by-reference-into-a-daslang-lambda.md new file mode 100644 index 0000000000..62d4c73b5c --- /dev/null +++ b/mouse-data/docs/how-do-i-capture-a-function-param-var-state-t-struct-ref-by-reference-into-a-daslang-lambda.md @@ -0,0 +1,42 @@ +--- +slug: how-do-i-capture-a-function-param-var-state-t-struct-ref-by-reference-into-a-daslang-lambda +title: How do I capture a function param (var state : T struct ref) by reference into a daslang lambda so the lambda can mutate the underlying module global from outside the function? +created: 2026-05-09 +last_verified: 2026-05-09 +links: [] +--- + +## Pattern + +The capture clause goes BEFORE `<|`, the `&` marks each ref-captured name, and the WHOLE move into the storage requires `unsafe`: + +```das +def register_widget(var state : State; ident : string) { + unsafe { + var d <- @ capture(& state) (payload : int) { + state.pending = true + state.value = payload + } + g_dispatchers[ident] <- d + } +} +``` + +## Three syntax landmines + +1. **`@ <| capture(& x) (args) { ... }` is a parse error.** Capture clause attaches to `@`, not to `<|`. Correct form: `@ capture(& x) (args) { ... }` — drop the `<|`. +2. **`g_dispatchers[ident] <- @capture(...) (args) {...}` errors as `error[30941]: can't move from a constant value`.** The lambda literal is const-typed; you can't move directly into a table-indexing lvalue. Move into a temp `var d <- ...` first, then `<- d` into the table. +3. **`error[31003]: capture by reference requires unsafe`.** Wrap the move-and-store in `unsafe { ... }`. Capture-by-reference is unsafe because the lambda outlives the function frame; daslang requires you to opt in. + +## Why this works for module-global state + +When the caller passes a module global (`register_widget(G, "G")`), the function param `var state : State` is a stable ref to G. `capture(& state)` smuggles that ref into the closure. Since G lives forever (module-global), the captured ref stays valid for the lambda's lifetime — invoking the lambda from anywhere mutates G in place. + +Verified end-to-end (2026-05-09, daslang Opus 4.7 session): module-global state mutation through a lambda stored in a module-global `table>`, invoked from outside the registering function. + +## Source + +Canonical patterns in `dastest/dastest.das:306` (`new_thread <| @capture(& res, & mainCtx) {...}`) and `dastest/suite.das:451-475` (multiple `testing.onFail <- @ capture(& testing, & failed, ...)` shapes — note `<-` works for struct-field assignment without unsafe; only table-indexing assignment needs the `unsafe` wrapper). + +## Questions +- How do I capture a function param (var state : T struct ref) by reference into a daslang lambda so the lambda can mutate the underlying module global from outside the function? diff --git a/mouse-data/docs/how-do-i-make-an-mcp-tool-spawn-a-fresh-daslang-subprocess-so-each-call-gets-clean-macro-state.md b/mouse-data/docs/how-do-i-make-an-mcp-tool-spawn-a-fresh-daslang-subprocess-so-each-call-gets-clean-macro-state.md new file mode 100644 index 0000000000..f832e4b1d9 --- /dev/null +++ b/mouse-data/docs/how-do-i-make-an-mcp-tool-spawn-a-fresh-daslang-subprocess-so-each-call-gets-clean-macro-state.md @@ -0,0 +1,98 @@ +--- +slug: how-do-i-make-an-mcp-tool-spawn-a-fresh-daslang-subprocess-so-each-call-gets-clean-macro-state +title: How do I make an MCP tool spawn a fresh daslang subprocess so each call gets a clean macro state, and what is the cold-start cost? +created: 2026-05-09 +last_verified: 2026-05-09 +links: [] +--- + +When an MCP tool runs `compile_file()` on user code that registers `[function_macro]` / `[call_macro]` annotations, the C++-side Annotation pointers persist for the rest of the daslang process's lifetime. In a long-lived MCP server, this means **subsequent edits to the macro source are invisible** until the server restarts. Symptoms: identical input that worked an hour ago now hits a stale macro version; restarting the MCP fixes it. + +The fix is to make each MCP tool that compiles user code shell out to a fresh `daslang.exe` subprocess. Macro state is process-local, so every call starts clean. + +## Pattern + +`utils/mcp/tools/.das` becomes a thin wrapper: + +```das +options gen2 +require common public + +def do_compile_check(file : string; project : string = ""; json : bool = false) : string { + return run_mcp_subtool("compile_check", [file, project, json ? "true" : "false"]) +} +``` + +The real logic lives at `utils/mcp/subtools/.das`: + +```das +options gen2 +require ../tools/common.das public +// ... real work ... + +[export] +def main { + let raw <- get_command_line_arguments() + let args <- subtool_user_args(raw) + if (length(args) < 3) { + print(make_tool_result("compile_check subtool: expected 3 args, got {length(args)}", true)) + return + } + let file = string(args[0]) + let project = string(args[1]) + let json = string(args[2]) == "true" + print(run_compile_check(file, project, json)) +} +``` + +`run_mcp_subtool` (in `utils/mcp/tools/common.das`) handles the popen, the `--` separator, timeout, and exit-code translation: + +```das +def run_mcp_subtool(subtool_name : string; args : array; timeout_sec : float = 120.0) : string { + let exe = get_daslang_exe() + let subtool_path = path_join(get_das_root(), "utils/mcp/subtools/{subtool_name}.das") + var argv <- [exe, subtool_path, "--"] // `--` is critical + argv |> reserve(length(argv) + length(args)) + for (a in args) { argv |> push(a) } + var output : string + let exit_code = run_and_capture(argv, output, timeout_sec) + if (exit_code == popen_timed_out) { + return make_tool_result("MCP subtool '{subtool_name}' timed out after {timeout_sec}s:\n{output}", true) + } + if (exit_code != 0) { + return make_tool_result("MCP subtool '{subtool_name}' failed (exit {exit_code}):\n{output}", true) + } + return output +} +``` + +## The `--` separator gotcha + +Without `--`, daslang treats positional argv past the script path as additional `.das` files to load AND auto-runs each. With `--`, daslang stops parsing its own options and exposes the rest via `get_command_line_arguments()`. The subtool then uses `subtool_user_args(raw)` (also in `tools/common.das`) to skip past the interpreter+script-path prefix and the `--` to get just the user args. + +## Cold-start cost + +Every subprocess pays the daslang.exe boot + subtool-script compile + require-chain compile cost. Measured on Windows (Release): + +| Subtool | Fast-fail wall time | +|---|---| +| `daslang.exe --version` | ~35 ms | +| `compile_check.das` / `find_symbol.das` / `list_module_api.das` | ~0.7 s | +| `aot.das` | ~1.9 s (extra ~1.2 s for `daslib/aot_cpp`) | + +So per-MCP-call overhead is 0.7–1.9 s before the actual work. For a 270-test MCP test suite, totalling ~89 s. Acceptable for offline runs. The dasImgui PR #2620 has the full benchmark + the deferred-speedup backlog. + +## When to apply + +- Any MCP tool that calls `compile_file()` on user code (i.e., the user's macros may register). +- Any tool that walks RTTI of compiled programs (the same `compile_file` underlies it). + +When NOT to apply: +- Tools that only walk the C++ AST index (`cpp_*` family) — no daslang macro state involved. +- `live_*` tools (talk to live HTTP host). +- Tools that already shell out to external processes (run_script, run_test, format_file, ast-grep wrappers). + +Verified 2026-05-09 in PR #2620. + +## Questions +- How do I make an MCP tool spawn a fresh daslang subprocess so each call gets a clean macro state, and what is the cold-start cost? diff --git a/mouse-data/docs/how-do-i-narrow-cpp_search_dirs-per-call-to-avoid-the-multi-second-full-tree-ast-grep-scan.md b/mouse-data/docs/how-do-i-narrow-cpp_search_dirs-per-call-to-avoid-the-multi-second-full-tree-ast-grep-scan.md new file mode 100644 index 0000000000..cde090b85c --- /dev/null +++ b/mouse-data/docs/how-do-i-narrow-cpp_search_dirs-per-call-to-avoid-the-multi-second-full-tree-ast-grep-scan.md @@ -0,0 +1,56 @@ +--- +slug: how-do-i-narrow-cpp_search_dirs-per-call-to-avoid-the-multi-second-full-tree-ast-grep-scan +title: How do I narrow CPP_SEARCH_DIRS per call to avoid the multi-second full-tree ast-grep scan when looking up a single C++ symbol? +created: 2026-05-09 +last_verified: 2026-05-09 +links: [] +--- + +The MCP `with_cpp_source` redirect (used by `find_symbol` and `goto_definition` to surface a builtin's C++ source location) walks a global C++ index built from `CPP_SEARCH_DIRS = [src, include, modules]` — about 947 files in daslang. First call costs ~2 s; subsequent calls reuse the cached index with a cheap signature recheck. But for an MCP tool that runs in a fresh subprocess per call, **every call rebuilds**, so each `with_cpp_source=true` lookup pays the full 2 s. + +Fix: pass an optional `cpp_dirs` arg that scopes a fresh narrow scan, bypassing the global cache. + +## API + +`do_find_symbol` and `do_goto_definition` have an optional last param: + +```das +def do_find_symbol(query : string; kind : string = ""; file : string = ""; + project : string = ""; with_cpp_source : bool = false; + cpp_dirs : string = "") : string + +def public do_goto_definition(file : string; line_str, col_str : string; + no_opt_str : string = ""; project : string = ""; + with_cpp_source : bool = false; + cpp_dirs : string = "") : string +``` + +`cpp_dirs` is a comma-separated list of repo-relative paths. Empty -> use the cached global index (today's behavior, slow for cold subprocess). Non-empty -> fresh scoped scan via `cpp_lookup_by_name_scoped()` (in `utils/mcp/tools/cpp_common.das`), no global cache touched. + +## Example + +`print` is a builtin defined in `src/builtin/`. Narrowing the scan from full default (947 files) to just `src/builtin` (33 files): + +```das +do_find_symbol("=print", "function", "", "", true, "src/builtin") +``` + +Test timing improved from 19 s to 1.7 s (10×) for `test_goto_definition_with_cpp_source`, and from 19 s to 3.4 s (5.7×) for `test_find_symbol_with_cpp_source`. Total `test_tools.das` suite: 132 s -> 89 s (33% faster). + +## How the helper works + +`cpp_lookup_by_name_scoped(cppName, dirs, var match)` does a fresh `cpp_run_scan(dirs, "", err)` over only the requested subdirs, then linear-scans the entries. No caching, but the scan is small enough it doesn't matter at this scale. + +## When to use + +- Tests that look up a known builtin where the source-file location is predictable (e.g. `print` in `src/builtin`, `addFunction` in `src/ast`). +- Production MCP calls where the user is searching for a specific known scope. + +When NOT to use: +- General-purpose user queries — they want the full search scope. +- The first `with_cpp_source=true` call in a fresh subprocess gets the same per-call cost regardless; narrowing only pays off for repeated lookups in the same scope. + +Verified 2026-05-09 in PR #2620. + +## Questions +- How do I narrow CPP_SEARCH_DIRS per call to avoid the multi-second full-tree ast-grep scan when looking up a single C++ symbol? diff --git a/mouse-data/docs/how-do-i-scope-a-git-staleness-signature-to-a-subdirectory-at-head.md b/mouse-data/docs/how-do-i-scope-a-git-staleness-signature-to-a-subdirectory-at-head.md new file mode 100644 index 0000000000..43b548d00c --- /dev/null +++ b/mouse-data/docs/how-do-i-scope-a-git-staleness-signature-to-a-subdirectory-at-head.md @@ -0,0 +1,46 @@ +--- +slug: how-do-i-scope-a-git-staleness-signature-to-a-subdirectory-at-head +title: How do I scope a git-tracked staleness signature to a single subdirectory so commits outside it don't invalidate the cache? +created: 2026-05-10 +last_verified: 2026-05-10 +links: [] +--- + +`git rev-parse HEAD:` returns the **tree object hash** for that subdirectory at HEAD. Fold it into your staleness signature instead of the global `git rev-parse HEAD`: + +- **Stable** when only files outside the subtree change at HEAD (commits/branch switches that don't touch your indexed paths). +- **Changes** when any tracked file under the subtree changes at HEAD. + +For the signature to also pick up pending uncommitted changes, combine with filtered `git status --porcelain --untracked-files=normal` — keep only lines whose extracted path is under one of your search_dirs. + +Why per-tree HEAD matters: global `git rev-parse HEAD` flips on every commit anywhere in the repo. In a monorepo, every `git pull` or branch switch invalidates every cache that uses it, even when nothing under the cache's scope changed. Per-tree HEAD avoids that whole class of spurious rebuilds. + +**The pattern lives in `utils/common/git_signature.das:compute_signature`** (added in PR #2621). Both `utils/mouse/index.das` (markdown docs index) and `utils/mcp/tools/cpp_common.das` (C++ source-search index) use it: + +```daslang +require ../../common/git_signature.das + +let core = compute_signature( + root, // any dir inside the repo + @(p : string) => ends_with(p, ".md"), // file-class predicate + [docs_abs]) // indexed dirs (absolute) +``` + +Edge cases handled in the shared module: +- `git rev-parse HEAD:` errors when `` isn't tracked at HEAD (e.g., newly added) → push a placeholder; the next `git status` line catches new files. +- Empty `` (search_dir == toplevel) → fall back to bare `HEAD`. +- Caller-passed `search_dirs_abs` order matters for the hash if not normalized → `dirs_rel` is sorted before folding. +- No git checkout at all → fall back to `compute_filesystem_signature` (recursive walk, sorted by path before hashing for cross-platform determinism). +- `git status --porcelain` paths are pre-narrowed to those under any search_dir, so the consumer's predicate only needs the file-class filter. + +If you're building a NEW indexed cache under `utils/`, plug into the shared module rather than copying the git plumbing — the predicate is the only consumer-specific bit. + +## Questions +- How do I scope a git-tracked staleness signature to a single subdirectory at HEAD? +- How do I avoid spurious rebuilds on `git pull` for caches scoped to a subtree? +- What does `git rev-parse HEAD:` actually return? +- Where does the shared staleness-signature code live in this repo? +- How do both mouse and the cpp-search MCP tool detect index staleness? + +## Questions +- How do I scope a git-tracked staleness signature to a single subdirectory so commits outside it don't invalidate the cache? diff --git a/mouse-data/docs/how-do-i-special-case-bool-or-any-specific-type-inside-a-generic-static_if-at-compile-time.md b/mouse-data/docs/how-do-i-special-case-bool-or-any-specific-type-inside-a-generic-static_if-at-compile-time.md new file mode 100644 index 0000000000..a629f048af --- /dev/null +++ b/mouse-data/docs/how-do-i-special-case-bool-or-any-specific-type-inside-a-generic-static_if-at-compile-time.md @@ -0,0 +1,53 @@ +--- +slug: how-do-i-special-case-bool-or-any-specific-type-inside-a-generic-static_if-at-compile-time +title: How do I special-case `bool` (or any specific concrete type) inside a generic / static_if at compile time in daslang? +created: 2026-05-09 +last_verified: 2026-05-09 +links: [] +--- + +When you need to discriminate one workhorse type from the others inside a generic — typically because the lint forbids one shape on one type but the generic must work for all — use `static_if (typeinfo stripped_typename(field) == "bool")`. + +## Why `stripped_typename`, not `typename` + +`typeinfo typename(x)` returns the FULL type name including const/ref decorations. For a struct field accessed in a loop, `typename(field)` may yield `"bool#"` or `"bool const&"`, not `"bool"`. `stripped_typename` returns the clean canonical name without decorations — matches what you'd write in source. + +Used widely in the daslang test suite: +```das +static_if (typeinfo stripped_typename(field) == "int") { ... } +static_elif (typeinfo stripped_typename(field) == "string") { ... } +``` +(see `tests/apply/test_apply.das`). + +## Real example: avoiding STYLE018 in a generic + +`daslib/json_boost.das` originally had this generic-zero compare, which works for `int`/`float`/etc. but lowers to `if (field == false)` for `bool` and trips the new STYLE018 lint: + +```das +static_if (typeinfo is_workhorse(field)) { + if (field == default) { return ; } +} +``` + +The fix splits the workhorse path: + +```das +static_if (typeinfo is_workhorse(field)) { + static_if (typeinfo stripped_typename(field) == "bool") { + if (!field) { return ; } + } else { + if (field == default) { return ; } + } +} +``` + +`bool` uses the idiomatic `!field`; numeric workhorses keep the generic-zero compare. The STYLE018 false positive disappears, semantics are unchanged. + +## What `is_*` typeinfo traits exist + +There IS no `typeinfo is_bool` in daslang core (as of 2026-05-09). The full set in `src/ast/ast_infer_type.cpp` is roughly: `is_dim`, `is_struct`, `is_tuple`, `is_variant`, `is_class`, `is_lambda`, `is_enum`, `is_bitfield`, `is_string`, `is_handle`, `is_ref`, `is_ref_type`, `is_ref_value`, `is_const`, `is_temp`, `is_temp_type`, `is_pointer`, `is_smart_ptr`, `is_iterator`, `is_iterable`, `is_vector`, `is_array`, `is_table`, `is_numeric`, `is_numeric_comparable`, `is_local`, `is_function`, `is_void`, `is_void_pointer`, `is_workhorse`, `is_pod`, `is_raw`, plus `can_*` and `need_*` family. For anything not in the list (bool, specific enum, specific struct), use `stripped_typename(x) == "name"`. + +Verified 2026-05-09 in PR #2620 (`daslib/json_boost.das:597-611`). + +## Questions +- How do I special-case `bool` (or any specific concrete type) inside a generic / static_if at compile time in daslang? diff --git a/mouse-data/docs/what-s-the-difference-between-at-and-double-at-lambda-syntax-in-daslang.md b/mouse-data/docs/what-s-the-difference-between-at-and-double-at-lambda-syntax-in-daslang.md new file mode 100644 index 0000000000..e85264dcfc --- /dev/null +++ b/mouse-data/docs/what-s-the-difference-between-at-and-double-at-lambda-syntax-in-daslang.md @@ -0,0 +1,47 @@ +--- +slug: what-s-the-difference-between-at-and-double-at-lambda-syntax-in-daslang +title: What's the difference between `@(args) => expr` and `@@(args) => expr` in daslang, and when does the distinction matter? +created: 2026-05-10 +last_verified: 2026-05-10 +links: [] +--- + +`@(args) { body }` / `@(args) => expr` is a **lambda** — a closure with captures. Its type is `lambda<(arg_types) : ret_type>`. + +`@@(args) { body }` / `@@(args) => expr` is a **function pointer** — no captures allowed (it's a code-pointer + signature, with nothing to close over). Its type is `function<(arg_types) : ret_type>`. + +`lambda<...>` and `function<...>` are NOT interchangeable. Passing a function pointer where a lambda is expected fails type-check: + +``` +no matching functions or generics: compute_signature(string const&, + function<(p:string const):bool const>, array) +... invalid argument 'predicate' (1). expecting + 'lambda<(repo_rel_path:string const):bool> const', + passing 'function<(p:string const):bool const>' +``` + +**Rule of thumb:** default to `@(...)`. Use `@@(...)` only when the API explicitly takes a `function<...>` (FFI callbacks, certain dispatch tables, places where the runtime needs a no-allocation pointer). Going from `@` to `@@` is a silent narrowing — looks fine until you later need a capture and hit "no closures allowed". + +If you DO need to capture into a lambda but get `error 31003: implicit capture by move requires unsafe` (or `... by reference requires unsafe`), the explicit form is: + +``` +unsafe(@ capture(& var) (args) => expr) // by reference +unsafe(@ capture(:= var) (args) => expr) // by move +``` + +Live examples: +- `utils/mcp/tools/cpp_common.das:ensure_cpp_index` — `unsafe(@ capture(& prune_dirs) (p : string) => ...)` +- `utils/find-dupe/main.das:432` — `@ capture(ref(jobs), ref(sources_by_id)) () { ... }` +- `utils/das-fmt/dasfmt.das:145` — `@ capture(& verified, := args) { ... }` +- `utils/benchctl/main.das:267` — `@capture(:= re, := parts) (x : string) : string { ... }` + +The `unsafe(...)` wrap is required because both reference and move capture touch lifetime invariants the type-checker can't otherwise prove safe. + +## Questions +- What's the difference between `@(args) => expr` and `@@(args) => expr` in daslang? +- When should I use `@@(...)` instead of `@(...)`? +- Why does my predicate fail with "expecting lambda, passing function"? +- How do I capture a non-copyable variable into a daslang lambda? + +## Questions +- What's the difference between `@(args) => expr` and `@@(args) => expr` in daslang, and when does the distinction matter? diff --git a/mouse-data/docs/why-does-require-shared-foo-das-fail-to-parse-with-unexpected-shared.md b/mouse-data/docs/why-does-require-shared-foo-das-fail-to-parse-with-unexpected-shared.md new file mode 100644 index 0000000000..6eedd43d1e --- /dev/null +++ b/mouse-data/docs/why-does-require-shared-foo-das-fail-to-parse-with-unexpected-shared.md @@ -0,0 +1,29 @@ +--- +slug: why-does-require-shared-foo-das-fail-to-parse-with-unexpected-shared +title: Why does `require ../path/shared/foo.das` fail with `syntax error, unexpected shared, expecting .. or name or '%' or '.'`? +created: 2026-05-10 +last_verified: 2026-05-10 +links: [] +--- + +daslang's parser tokenizes `shared` as the `DAS_SHARED` keyword (used for shared smart pointers and `module foo shared`). When `shared` appears as a path component in a relative `require`, the parser sees the keyword instead of an identifier and reports: + +``` +error[30151]: syntax error, unexpected shared, expecting .. or name or '%' or '.' +``` + +The require grammar at this position accepts only `..`, an identifier, `%`, or `.`. `shared` matches none — it's a hard token, not a name. + +**Fix**: rename the directory. Any non-keyword name works — `common/`, `_shared/`, `lib/`, `internal/`. To pre-check a candidate, grep `src/parser/ds_lexer.lpp` for `""`; if it's there, it's a reserved token. Same trap applies to any future cross-tool sharing under `utils/` or `tests/`. + +Concrete: PR #2621 first tried `utils/shared/git_signature.das`, hit this error immediately, and the directory was renamed to `utils/common/`. The rename is purely cosmetic — the require path becomes `require ../../common/git_signature.das` and everything else is unchanged. + +Worth knowing because the error message is cryptic if you don't already suspect the parser is treating your dir name as a keyword: the diagnostic says "unexpected shared" but doesn't hint that `shared` is reserved. + +## Questions +- Why does `require ../path/shared/foo.das` fail to parse with "unexpected shared"? +- Can a daslang require path use any directory name? +- Which directory names break daslang's relative-require parser? + +## Questions +- Why does `require ../path/shared/foo.das` fail with `syntax error, unexpected shared, expecting .. or name or '%' or '.'`? diff --git a/tests/macro_call/_widget_dispatch_helper.das b/tests/macro_call/_widget_dispatch_helper.das new file mode 100644 index 0000000000..ac5e5662a1 --- /dev/null +++ b/tests/macro_call/_widget_dispatch_helper.das @@ -0,0 +1,142 @@ +// Helper module replicating the dasImgui [widget] macro pattern in +// isolation — no imgui binding, no GLFW, just enough to exercise: +// - function_macro injects a `widget_ident : string` param at index 1 +// - call_macro intercepts `(IDENT, args...)`, auto-emits IDENT as +// a module global of the state struct's type, and rewrites the call +// to `(IDENT, "IDENT", args...)` +// +// If this helper's [widget] dispatch fires correctly while dasImgui's +// equivalent doesn't, the bug is imgui-specific (require chain, +// build order, something about the C++ module). If it ALSO fails here, +// the bug is in the macro logic itself. + +options gen2 +options indenting = 4 +options no_unused_block_arguments = false +options no_unused_function_arguments = false + +module _widget_dispatch_helper shared + +require daslib/ast public +require daslib/ast_boost +require daslib/templates_boost + +struct StubState { + value : int + pending : bool +} + +def widget_prelude(widget_ident : string) { + // No-op — real dasImgui uses PushID. For dispatch testing we just need + // a name to call so the macro-injected `widget_prelude(widget_ident)` + // resolves at typer time. +} + +def widget_finalize(widget_ident : string; kind : string; var state : StubState) { + state.pending = true +} + +[function_macro(name = "widget")] +class WidgetFunctionMacro : AstFunctionAnnotation { + def override apply(var func : FunctionPtr; var group : ModuleGroup; + args : AnnotationArgumentList; var errors : das_string) : bool { + if (empty(func.arguments)) { + errors := "[widget] requires at least one parameter (state struct)" + return false + } + let kind = string(func.name) + let stateArg = func.arguments[0] + let stateTypeName = describe(stateArg._type) + // Capture the resolved structure pointer so the call_macro can build + // a TypeDecl for auto-emitted globals from the actual struct type + // (cross-module-safe). Type.alias with a qualified name doesn't + // resolve at the caller's scope. + if (stateArg._type == null || stateArg._type.structType == null) { + errors := "[widget] first parameter must be a struct (state)" + return false + } + let stateStruct = stateArg._type.structType + func.flags |= FunctionFlags.generated + var widgetIdentVar = new Variable(at = func.at, + name := "widget_ident", + _type <- new TypeDecl(baseType = Type.tString, at = func.at)) + widgetIdentVar.flags |= VariableFlags.generated + func.arguments |> emplace(widgetIdentVar, 1) + var fblk = new ExprBlock(at = func.body.at) + fblk.list |> push(qmacro(widget_prelude($i("widget_ident")))) + let oldBody = func.body as ExprBlock + for (el in oldBody.list) { + fblk.list |> push(clone_expression(el)) + } + for (ef in oldBody.finalList) { + fblk.finalList |> push(clone_expression(ef)) + } + func.body = fblk + var inst = new WidgetCallMacro( + kind_name = kind, + state_type_name = stateTypeName, + state_struct = stateStruct, + render_fn_name = kind) + compiling_module() |> add_call_macro(make_call_macro(kind, inst)) + return true + } +} + +class WidgetCallMacro : AstCallMacro { + kind_name : string + state_type_name : string + state_struct : Structure const? + render_fn_name : string + + def override canVisitArgument(expr : ExprCallMacro?; argIndex : int) : bool { + return argIndex != 0 + } + + def override visit(prog : ProgramPtr; mod : Module?; var expr : ExprCallMacro?) : ExpressionPtr { + if (empty(expr.arguments)) { + macro_error(prog, expr.at, "{kind_name}(IDENT, ...): missing widget identifier") + return <- default + } + let identExpr = expr.arguments[0] + if (!(identExpr is ExprVar)) { + macro_error(prog, expr.at, "{kind_name}(IDENT, ...): first argument must be an identifier") + return <- default + } + let identName = string((identExpr as ExprVar).name) + if (find_variable(mod, identName) == null) { + // Build the type from the captured Structure pointer so the + // emitted global resolves cross-module. Type.alias with a + // qualified name like "_other_mod::Foo" doesn't resolve at the + // caller's scope. + var v = new Variable(at = expr.at, + name := identName, + _type <- new TypeDecl(baseType = Type.tStructure, + structType = unsafe(reinterpret(state_struct)), + at = expr.at)) + v.flags |= VariableFlags.generated | VariableFlags.private_variable + mod |> add_variable(v) + } + var newCall = new ExprCall(at = expr.at, name := render_fn_name) + newCall.arguments |> push(new ExprVar(at = expr.at, name := identName)) + newCall.arguments |> push(new ExprConstString(at = expr.at, value := identName)) + // Walk remaining args. Named-tuple literals `(name = val, ...)` carry + // the call's named-arg ergonomic; daslang's square-bracket form + // `[name = val]` builds an ExprNamedCall and bypasses call_macro + // dispatch entirely. Destructure named tuples into positional args + // (in source order — matches the user-facing parameter order). + for (i in range(1, length(expr.arguments))) { + let a = expr.arguments[i] + if (a is ExprMakeTuple) { + let mt = a as ExprMakeTuple + if (length(mt.recordNames) == length(mt.values) && !empty(mt.recordNames)) { + for (v in mt.values) { + newCall.arguments |> push(clone_expression(v)) + } + continue + } + } + newCall.arguments |> push(clone_expression(a)) + } + return <- newCall + } +} diff --git a/tests/macro_call/test_call_macro_cross_expr_emit.das b/tests/macro_call/test_call_macro_cross_expr_emit.das new file mode 100644 index 0000000000..cc29eef46d --- /dev/null +++ b/tests/macro_call/test_call_macro_cross_expr_emit.das @@ -0,0 +1,39 @@ +options gen2 +options indenting = 4 + +require dastest/testing_boost public +require _call_macro_emit_gate_helper + +// Pre-declared global to anchor the right-hand side of the +// "fresh + existing" mix in test_mixed_fresh_and_existing. +var _cx_existing : int = 7 + +[test] +def test_two_fresh_in_one_expression(t : T?) { + //! Two emit_or_reuse calls in ONE expression both emit fresh globals. + //! Each macro returns ExprVar(name) for a global it just add_variable()'d. + //! If the typer's reportAstChanged() invariant ever regresses, the + //! second ExprVar fails to resolve in the same infer pass and this + //! errors at compile time. + t |> equal(emit_or_reuse_int("_cx_two_fresh_a") + emit_or_reuse_int("_cx_two_fresh_b"), 0, + "two fresh globals emitted in same expression must both resolve, summing to 0+0") +} + +[test] +def test_mixed_fresh_and_existing(t : T?) { + //! Mix: left side is a fresh emit (_cx_mixed = 0), right side is the + //! pre-declared _cx_existing = 7. Confirms find_variable still sees + //! pre-existing module globals after a sibling emit in the same + //! expression node. + t |> equal(emit_or_reuse_int("_cx_mixed") + emit_or_reuse_int("_cx_existing"), 7, + "fresh+existing in same expression must resolve to 0+7") +} + +[test] +def test_repeated_same_name_in_one_expression(t : T?) { + //! Same fresh name twice in one expression: first call emits, second + //! call must find_variable() the just-emitted global and reuse it + //! (NOT add_variable() a duplicate). Duplicate would error at typer. + t |> equal(emit_or_reuse_int("_cx_repeat") + emit_or_reuse_int("_cx_repeat"), 0, + "repeated name in one expression must reuse, not duplicate") +} diff --git a/tests/macro_call/test_widget_dispatch.das b/tests/macro_call/test_widget_dispatch.das new file mode 100644 index 0000000000..febb11a3aa --- /dev/null +++ b/tests/macro_call/test_widget_dispatch.das @@ -0,0 +1,41 @@ +options gen2 +options indenting = 4 + +require dastest/testing_boost public +require _widget_dispatch_helper + +[widget] +def my_button(var state : StubState; text : string) : int { + return state.value +} + +[test] +def test_widget_call_macro_fires(t : T?) { + //! `my_button(BTN_A, "click me")` should be intercepted by the [widget] + //! call_macro: it auto-emits BTN_A as a module global of type StubState, + //! then rewrites the call to `my_button(BTN_A, "BTN_A", "click me")`, + //! matching the macro-expanded function signature + //! `def my_button(state, widget_ident, text)`. + //! + //! If the call_macro never fires, the typer sees a 2-arg call against + //! a 3-arg function and errors with "no matching functions or generics". + //! + //! If the auto-emit never happens, BTN_A is unresolved and the typer + //! errors with "can't locate variable 'BTN_A'". + let v = my_button(BTN_A, (text = "click me")) + t |> equal(v, 0, "auto-emitted BTN_A's StubState defaults to value=0") +} + +[test] +def test_widget_global_visible_outside_call(t : T?) { + //! After `my_button(BTN_B, ...)` auto-emits BTN_B, a separate + //! ExprVar(BTN_B) elsewhere in the module must resolve to that global. + //! This is the cross-expression resolution path (gap #3 in the + //! dasImgui plan) — guaranteed by reportAstChanged() in + //! ast_infer_type.cpp:5220. + let v = my_button(BTN_B, (text = "x")) + t |> equal(v, 0, "fresh BTN_B reads as value=0") + BTN_B.value = 42 + let v2 = my_button(BTN_B, (text = "x")) + t |> equal(v2, 42, "second call sees the mutated module global") +} diff --git a/tests/strings/strings_jaccard.das b/tests/strings/strings_jaccard.das new file mode 100644 index 0000000000..42a2378047 --- /dev/null +++ b/tests/strings/strings_jaccard.das @@ -0,0 +1,72 @@ +options gen2 +require dastest/testing_boost +require daslib/strings_boost + +def make_set(items : array) : table { + var s : table + for (x in items) { + s |> insert(x) + } + return <- s +} + +[test] +def test_jaccard_table(t : T?) { + t |> run("equal sets") @@(t : T?) { + let a <- make_set(["foo", "bar", "baz"]) + let b <- make_set(["foo", "bar", "baz"]) + t |> equal(jaccard(a, b), 1.0f) + } + + t |> run("disjoint sets") @@(t : T?) { + let a <- make_set(["foo", "bar"]) + let b <- make_set(["baz", "qux"]) + t |> equal(jaccard(a, b), 0.0f) + } + + t |> run("partial overlap") @@(t : T?) { + let a <- make_set(["foo", "bar", "baz"]) + let b <- make_set(["foo", "bar", "qux"]) + t |> equal(jaccard(a, b), 0.5f) + } + + t |> run("subset") @@(t : T?) { + let a <- make_set(["foo", "bar"]) + let b <- make_set(["foo", "bar", "baz", "qux"]) + t |> equal(jaccard(a, b), 0.5f) + } + + t |> run("empty either side returns 0") @@(t : T?) { + let a <- make_set(["foo"]) + let b : table + t |> equal(jaccard(a, b), 0.0f) + t |> equal(jaccard(b, a), 0.0f) + } + + t |> run("both empty returns 0") @@(t : T?) { + let a : table + let b : table + t |> equal(jaccard(a, b), 0.0f) + } +} + +[test] +def test_jaccard_array(t : T?) { + t |> run("array overload matches table form") @@(t : T?) { + let a <- ["foo", "bar", "baz"] + let b <- ["foo", "bar", "qux"] + t |> equal(jaccard(a, b), 0.5f) + } + + t |> run("array dedupes via set conversion") @@(t : T?) { + let a <- ["foo", "foo", "bar"] // dedupes to {foo, bar} + let b <- ["foo", "bar"] + t |> equal(jaccard(a, b), 1.0f) + } + + t |> run("array empty returns 0") @@(t : T?) { + let a <- ["foo"] + let b : array + t |> equal(jaccard(a, b), 0.0f) + } +} diff --git a/utils/common/git_signature.das b/utils/common/git_signature.das new file mode 100644 index 0000000000..510763657c --- /dev/null +++ b/utils/common/git_signature.das @@ -0,0 +1,194 @@ +// Shared git-aware staleness signature for utils/* index caches. +// +// Used by both `utils/mcp/tools/cpp_common.das` (cpp source-search index) +// and `utils/mouse/index.das` (markdown docs index). Each consumer plugs +// in its own path predicate and search-dirs scope; this module owns the +// git plumbing, the filesystem fallback, and the determinism guarantees +// (sorted fs walk, per-tree HEAD hash). + +options gen2 + +require strings public +require daslib/fio public +require daslib/strings_boost public + +// Repo-relative path with `/`-separators; falls back to `path` on error +// (mirrors `make_relative_path` from utils/mcp/tools/common.das, kept +// local so this module has no cross-tool dependencies). +def private to_relative(path, base : string) : string { + var err : string + let rel = relative(path, base, err) + return to_generic_path(empty(rel) ? path : rel) +} + +// Strip `git status --porcelain` 3-char status prefix; return the new +// name on rename lines (`R old -> new`). +def public extract_status_path(line : string) : string { + if (length(line) <= 3) { + return "" + } + var path = slice(line, 3) + let arrow = find(path, " -> ") + if (arrow >= 0) { + path = slice(path, arrow + 4) + } + return path +} + +// stat() wrapper that returns 0 on missing/error so signature compute +// doesn't crash on a deleted/inaccessible file. +def public stat_mtime_or_zero(path : string) : int64 { + let st = stat(path) + return st.is_valid ? int64(st.mtime) : 0l +} + +def private is_directory(path : string) : bool { + let s = stat(path) + return s.is_valid && s.is_dir +} + +// Filesystem-only fallback signature. Walks each `search_dirs_abs` +// recursively, collects (repo_rel_path, mtime) for files passing +// `predicate`, **sorts the collection by path**, then hashes. The sort +// makes the signature deterministic across platforms — `_findfirst` on +// Windows and `readdir()` on Unix don't guarantee stable order, so the +// raw walk order would otherwise produce different signatures for the +// same on-disk state. +// +// Returns "fs:HASH" on success, "" if every search_dir is missing. +def public compute_filesystem_signature( + root : string; + predicate : lambda<(repo_rel_path : string) : bool>; + search_dirs_abs : array + ) : string { + var collected : array> + var any_dir = false + for (sd in search_dirs_abs) { + if (!is_directory(sd)) { + continue + } + any_dir = true + var sd_rel = to_relative(sd, root) + if (sd_rel == ".") { + sd_rel = "" + } + dir_rec(sd) $(filename, is_dir) { + if (is_dir) { + return + } + let leaf = to_generic_path(filename) + let rel = empty(sd_rel) ? leaf : "{sd_rel}/{leaf}" + if (!invoke(predicate, rel)) { + return + } + collected |> push((rel, stat_mtime_or_zero(path_join(sd, filename)))) + } + } + if (!any_dir) { + return "" + } + sort(collected) <| $(a, b) => a._0 < b._0 + let combined = build_string() $(var w) { + for (item in collected) { + w |> write("{item._0}:{item._1}\n") + } + } + return "fs:{hash(combined)}" +} + +// Compute a git-aware staleness signature for the index covering +// `search_dirs_abs`, considering only paths matching `predicate`. +// +// `root` is any directory inside the repo; the module discovers the git +// toplevel via `git -C root rev-parse --show-toplevel`. +// +// `predicate(repo_rel_path)` decides whether a path is part of the index +// (e.g. ends_with(".md") for docs, ends_with(".cpp"|".h"|".hpp") for cpp). +// Status output is pre-narrowed to paths under any `search_dirs_abs` +// entry, so the predicate only needs the file-class filter. +// +// Strategy: +// 1. Discover toplevel; on failure → compute_filesystem_signature. +// 2. For each search_dir, `git rev-parse HEAD:` → tree hash. +// Per-tree hashing fixes the global-HEAD over-invalidation: only +// commits that change a tracked tree under an indexed dir trigger +// a rebuild. (Branch switches that don't touch indexed trees +// become no-ops; `git pull` of unrelated subtrees stops re-churning +// the index.) On rev-parse error for a dir (untracked at HEAD), use +// a placeholder so the next status line catches new files. +// 3. `git status --porcelain --untracked-files=normal`. Per line: +// extract path; reject if not under any search_dir; reject if +// predicate is false; otherwise fold ":" into hash. +// 4. Return "git:HASH". Any git step error → fall back to fs. +// +// Returns "" only if both git and the filesystem walk fail (caller +// should treat empty as "do not invalidate blindly"). +def public compute_signature( + root : string; + predicate : lambda<(repo_rel_path : string) : bool>; + search_dirs_abs : array + ) : string { + var top_buf : string + if (run_and_capture(["git", "-C", root, "rev-parse", "--show-toplevel"], top_buf, 5.0) != 0 + || empty(strip(top_buf))) { + return compute_filesystem_signature(root, predicate, search_dirs_abs) + } + let toplevel = strip(top_buf) + + var dirs_rel : array + dirs_rel |> reserve(length(search_dirs_abs)) + for (sd in search_dirs_abs) { + var rel = to_relative(sd, toplevel) + if (rel == ".") { + rel = "" + } + dirs_rel |> push(rel) + } + // Sort so the tree-hash fold below produces an order-independent + // signature regardless of how the caller ordered `search_dirs_abs`. + sort(dirs_rel) <| $(a, b) => a < b + + var tree_parts : array + for (rel in dirs_rel) { + var tb : string + // Empty rel means search_dir == toplevel; rev-parse HEAD: errors, + // so use bare HEAD in that case. + let spec = empty(rel) ? "HEAD" : "HEAD:{rel}" + if (run_and_capture(["git", "-C", toplevel, "rev-parse", spec], tb, 5.0) == 0 + && !empty(strip(tb))) { + tree_parts |> push("{rel}={strip(tb)}") + } else { + tree_parts |> push("{rel}=untracked") + } + } + + var status_buf : string + if (run_and_capture(["git", "-C", toplevel, "status", "--porcelain", "--untracked-files=normal"], + status_buf, 5.0) != 0) { + return compute_filesystem_signature(root, predicate, search_dirs_abs) + } + let lines <- split(status_buf, "\n") + let combined = build_string() $(var w) { + for (t in tree_parts) { + w |> write("{t}\n") + } + for (line in lines) { + let p = extract_status_path(line) + if (empty(p)) { + continue + } + var under = false + for (rel in dirs_rel) { + if (empty(rel) || p == rel || starts_with(p, "{rel}/")) { + under = true + break + } + } + if (!under || !invoke(predicate, p)) { + continue + } + w |> write("{line}:{stat_mtime_or_zero(path_join(toplevel, p))}\n") + } + } + return "git:{hash(combined)}" +} diff --git a/utils/common/tests/test_git_signature.das b/utils/common/tests/test_git_signature.das new file mode 100644 index 0000000000..fb07272236 --- /dev/null +++ b/utils/common/tests/test_git_signature.das @@ -0,0 +1,151 @@ +options gen2 + +require dastest/testing_boost public +require strings +require daslib/fio +require ../git_signature.das + +// ─── helpers ───────────────────────────────────────────────────────── + +def make_temp_root(t : T?) : string { + let r = create_temp_directory_result("gitsig_test") + if (!(r is value)) { + t |> failure("could not create temp dir: {unsafe(r.error)}") + return "" + } + return unsafe(r.value) +} + +def cleanup_root(root : string) { + if (!empty(root)) { + rmdir_rec(root) + } +} + +def md_predicate(p : string) : bool { + return ends_with(p, ".md") +} + +// ─── extract_status_path ───────────────────────────────────────────── + +[test] +def test_extract_status_path_basic(t : T?) { + t |> run("modified file: ' M file.txt' -> 'file.txt'") <| @(t : T?) { + t |> equal(extract_status_path(" M file.txt"), "file.txt") + } + t |> run("untracked file: '?? new.md' -> 'new.md'") <| @(t : T?) { + t |> equal(extract_status_path("?? new.md"), "new.md") + } +} + +[test] +def test_extract_status_path_rename(t : T?) { + t |> run("rename: 'R old.txt -> new.txt' returns the new name") <| @(t : T?) { + t |> equal(extract_status_path("R old.txt -> new.txt"), "new.txt") + } +} + +[test] +def test_extract_status_path_short(t : T?) { + t |> run("line of length <= 3 returns empty") <| @(t : T?) { + t |> equal(extract_status_path(""), "") + t |> equal(extract_status_path("M"), "") + t |> equal(extract_status_path(" M "), "") + } +} + +// ─── compute_filesystem_signature ──────────────────────────────────── + +[test] +def test_filesystem_signature_deterministic(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + t |> success(fwrite("{root}/a.md", "alpha")) + t |> success(fwrite("{root}/b.md", "beta")) + t |> success(fwrite("{root}/c.md", "gamma")) + t |> run("two invocations on the same on-disk state produce the same signature") <| @(t : T?) { + let sig1 = compute_filesystem_signature(root, @(p : string) => md_predicate(p), [root]) + let sig2 = compute_filesystem_signature(root, @(p : string) => md_predicate(p), [root]) + t |> success(!empty(sig1), "first signature should not be empty") + t |> equal(sig1, sig2) + } + cleanup_root(root) +} + +[test] +def test_filesystem_signature_predicate_filters(t : T?) { + let root_a = make_temp_root(t) + let root_b = make_temp_root(t) + if (empty(root_a) || empty(root_b)) { + return + } + // root_a: just .md files. root_b: same .md files plus .txt that should + // be filtered by the predicate. Signatures must be equal. + t |> success(fwrite("{root_a}/a.md", "alpha")) + t |> success(fwrite("{root_a}/b.md", "beta")) + t |> success(fwrite("{root_b}/a.md", "alpha")) + t |> success(fwrite("{root_b}/b.md", "beta")) + t |> success(fwrite("{root_b}/skip.txt", "should be filtered")) + t |> success(fwrite("{root_b}/skip.cmake", "also filtered")) + t |> run("predicate(.md only) makes .txt/.cmake invisible to the signature") <| @(t : T?) { + // Signatures hash relative paths; since temp dirs differ, we can't + // compare across roots. Compute against a SINGLE root with and + // without the txt files. + let with_txt = compute_filesystem_signature(root_b, @(p : string) => md_predicate(p), [root_b]) + // Delete .txt files; signature should not change. + var rm_err : string + let _drop_txt = remove(path_join(root_b, "skip.txt"), rm_err) + let _drop_cmake = remove(path_join(root_b, "skip.cmake"), rm_err) + let after_drop = compute_filesystem_signature(root_b, @(p : string) => md_predicate(p), [root_b]) + t |> success(!empty(with_txt), "should produce a non-empty sig") + t |> equal(with_txt, after_drop) + } + cleanup_root(root_a) + cleanup_root(root_b) +} + +[test] +def test_filesystem_signature_returns_empty_when_dirs_missing(t : T?) { + t |> run("all search_dirs missing -> empty signature") <| @(t : T?) { + let sig = compute_filesystem_signature("/", @(p : string) => md_predicate(p), ["/__definitely_does_not_exist_42__"]) + t |> equal(sig, "") + } +} + +// ─── compute_signature ─────────────────────────────────────────────── + +[test] +def test_compute_signature_returns_non_empty(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + t |> success(fwrite("{root}/a.md", "alpha")) + t |> run("returns a non-empty signature with either git: or fs: prefix") <| @(t : T?) { + let sig = compute_signature(root, @(p : string) => md_predicate(p), [root]) + t |> success(!empty(sig), "compute_signature should not return empty for a populated dir") + let is_git = starts_with(sig, "git:") + let is_fs = starts_with(sig, "fs:") + t |> success(is_git || is_fs, "expected 'git:' or 'fs:' prefix, got: {sig}") + } + cleanup_root(root) +} + +[test] +def test_compute_signature_no_git_falls_back(t : T?) { + // create_temp_directory creates dirs in the system temp area (e.g. + // /var/folders/... on macOS, /tmp on Linux), which is not inside a + // git checkout. compute_signature should fall back to filesystem-only. + let root = make_temp_root(t) + if (empty(root)) { + return + } + t |> success(fwrite("{root}/a.md", "alpha")) + t |> run("system temp dir (not in git) -> fs: prefix") <| @(t : T?) { + let sig = compute_signature(root, @(p : string) => md_predicate(p), [root]) + t |> success(starts_with(sig, "fs:"), "expected fs: prefix outside git, got: {sig}") + } + cleanup_root(root) +} diff --git a/utils/mcp/protocol.das b/utils/mcp/protocol.das index 28fe6d68e7..b4dd1ccc7a 100644 --- a/utils/mcp/protocol.das +++ b/utils/mcp/protocol.das @@ -315,7 +315,8 @@ def handle_tools_list(id_json : string) : string { "kind" => PropertySchema(_type = "string", description = "Limit to kind: 'function', 'generic', 'struct', 'handled', 'field', 'enum', 'global'"), "file" => PropertySchema(_type = "string", description = "Optional .das file - if provided, searches all modules loaded by that file (including daslib)"), "project" => PROJECT_PROP, - "with_cpp_source" => PropertySchema(_type = "string", description = "If 'true', for each result with a C++ implementation (builtin functions, handled types), append the resolved C++ source location via the cpp index. Adds ~2s on first call (lazy index build).") + "with_cpp_source" => PropertySchema(_type = "string", description = "If 'true', for each result with a C++ implementation (builtin functions, handled types), append the resolved C++ source location via the cpp index. Adds ~2s on first call (lazy index build)."), + "cpp_dirs" => PropertySchema(_type = "string", description = "Optional comma-separated repo-relative paths to scope the C++ source-redirect lookup to (e.g. 'src/builtin'). Only consulted when with_cpp_source='true'. Empty -> use the cached global C++ index over CPP_SEARCH_DIRS. Non-empty -> fresh scoped scan, no global cache touched. Useful when you know which subtree the symbol lives in and want to skip the multi-hundred-file index build.") }, ["query"] )) @@ -347,7 +348,8 @@ def handle_tools_list(id_json : string) : string { "column" => PropertySchema(_type = "string", description = "Column number (1-based)"), "no_opt" => PropertySchema(_type = "string", description = "If 'true', disable optimizations to preserve original AST (constant folding, inlining)"), "project" => PROJECT_PROP, - "with_cpp_source" => PropertySchema(_type = "string", description = "If 'true' and the resolved symbol is a builtin function or a handled type, append the resolved C++ source location. Adds ~2s on first call (lazy index build).") + "with_cpp_source" => PropertySchema(_type = "string", description = "If 'true' and the resolved symbol is a builtin function or a handled type, append the resolved C++ source location. Adds ~2s on first call (lazy index build)."), + "cpp_dirs" => PropertySchema(_type = "string", description = "Optional comma-separated repo-relative paths to scope the C++ source-redirect lookup to (e.g. 'src/builtin'). Only consulted when with_cpp_source='true'. Empty -> cached global C++ index. Non-empty -> fresh scoped scan, no global cache touched.") }, ["file", "line", "column"] )) @@ -644,7 +646,7 @@ def dispatch_tool(tool_name, arg1, arg2, arg3, arg4, arg5, arg6, project : strin } elif (tool_name == "convert_to_gen2") { return do_convert_to_gen2(arg1, arg2 == "true") } elif (tool_name == "goto_definition") { - return do_goto_definition(arg1, arg2, arg3, arg4, project, arg5 == "true") + return do_goto_definition(arg1, arg2, arg3, arg4, project, arg5 == "true", arg6) } elif (tool_name == "type_of") { return do_type_of(arg1, arg2, arg3, arg4, project) } elif (tool_name == "find_references") { @@ -654,7 +656,7 @@ def dispatch_tool(tool_name, arg1, arg2, arg3, arg4, arg5, arg6, project : strin } elif (tool_name == "list_modules") { return do_list_modules(arg1 == "true") } elif (tool_name == "find_symbol") { - return do_find_symbol(arg1, arg2, arg3, project, arg4 == "true") + return do_find_symbol(arg1, arg2, arg3, project, arg4 == "true", arg5) } elif (tool_name == "list_module_api") { return do_list_module_api(arg1, arg2, arg3, arg4, project) } elif (tool_name == "grep_usage") { @@ -782,6 +784,7 @@ def handle_tools_call(id_json : string; params : JsonValue?) : string { arg3 = get_string_arg(args, "column") arg4 = get_string_arg(args, "no_opt") arg5 = get_string_arg(args, "with_cpp_source") + arg6 = get_string_arg(args, "cpp_dirs") } elif (name == "type_of") { arg1 = get_string_arg(args, "file") if (empty(arg1)) { @@ -830,6 +833,7 @@ def handle_tools_call(id_json : string; params : JsonValue?) : string { arg2 = get_string_arg(args, "kind") arg3 = get_string_arg(args, "file") arg4 = get_string_arg(args, "with_cpp_source") + arg5 = get_string_arg(args, "cpp_dirs") } elif (name == "list_module_api") { arg1 = get_string_arg(args, "module") if (empty(arg1)) { diff --git a/utils/mcp/subtools/_test_panic.das b/utils/mcp/subtools/_test_panic.das new file mode 100644 index 0000000000..94439c18ab --- /dev/null +++ b/utils/mcp/subtools/_test_panic.das @@ -0,0 +1,14 @@ +options gen2 +options no_unused_function_arguments = false +options no_unused_block_arguments = false + +require ../tools/common.das public + +//! Test fixture for run_mcp_subtool. Prints a marker to stdout, then panics +//! to drive a non-zero exit code through the wrapper. + +[export] +def main { + print("STDOUT_BEFORE_PANIC") + panic("intentional panic from _test_panic subtool") +} diff --git a/utils/mcp/subtools/_test_passthrough.das b/utils/mcp/subtools/_test_passthrough.das new file mode 100644 index 0000000000..c44c2519fe --- /dev/null +++ b/utils/mcp/subtools/_test_passthrough.das @@ -0,0 +1,21 @@ +options gen2 +options no_unused_function_arguments = false +options no_unused_block_arguments = false + +require ../tools/common.das public + +//! Test fixture for run_mcp_subtool. Echoes its `--`-separated args back as +//! a `make_tool_result` payload so the wrapper test can assert on the round-trip. + +[export] +def main { + let raw <- get_command_line_arguments() + let args <- subtool_user_args(raw) + let payload = build_string() $(var w) { + w |> write("PASSTHROUGH:") + for (a in args) { + w |> write(" {a}") + } + } + print(make_tool_result(payload)) +} diff --git a/utils/mcp/subtools/_test_sleep.das b/utils/mcp/subtools/_test_sleep.das new file mode 100644 index 0000000000..2c2008fab0 --- /dev/null +++ b/utils/mcp/subtools/_test_sleep.das @@ -0,0 +1,22 @@ +options gen2 +options no_unused_function_arguments = false +options no_unused_block_arguments = false + +require ../tools/common.das public +require daslib/jobque + +//! Test fixture for run_mcp_subtool. Sleeps for N milliseconds (argv[0]) +//! and exits clean. Used to drive the timeout path in the wrapper. + +[export] +def main { + let raw <- get_command_line_arguments() + let args <- subtool_user_args(raw) + if (empty(args)) { + print(make_tool_result("_test_sleep: expected 1 arg (milliseconds)", true)) + return + } + let ms = to_int(string(args[0])) + sleep(uint(ms)) + print(make_tool_result("DONE_AFTER_{ms}ms")) +} diff --git a/utils/mcp/subtools/aot.das b/utils/mcp/subtools/aot.das new file mode 100644 index 0000000000..f765e334a4 --- /dev/null +++ b/utils/mcp/subtools/aot.das @@ -0,0 +1,158 @@ +options gen2 +options rtti +options indenting = 4 +options no_unused_function_arguments = false +options no_unused_block_arguments = false + +require ../tools/common.das public +require daslib/aot_cpp +require daslib/ast_boost +require strings + +//! Subprocess form of `aot`. Argv: . + +struct AotFuncInfo { + name : string + aot_name : string + signature : string +} + +def collect_aot_functions(program : smart_ptr) : array { + var result : array + let this_mod = program.getThisModule + this_mod |> for_each_module_function($(fn) { + if (fn.flags.noAot || fn.moreFlags.isTemplate || !fn.flags.used || fn.flags.builtIn || fn.flags.generated) { + return + } + let sig = build_string() $(var w) { + write(w, fn.name) + write(w, "(") + var first = true + for (i in range(length(fn.arguments))) { + if (fn.arguments[i]._type.baseType == Type.fakeContext || fn.arguments[i]._type.baseType == Type.fakeLineInfo) { + continue + } + if (!first) { + write(w, "; ") + } + first = false + write(w, "{string(fn.arguments[i].name)} : {describe(fn.arguments[i]._type)}") + } + write(w, ")") + if (fn.result != null && !fn.result.isVoid) { + write(w, " : {describe(fn.result)}") + } + } + result |> emplace(AotFuncInfo( + name = string(fn.name), + aot_name = aotFuncName(fn), + signature = sig + )) + }) + return <- result +} + +def match_function_name(fn_name, query : string) : bool { + if (fn_name == query) { + return true + } + let bt = find(fn_name, "`") + return bt >= 0 && slice(fn_name, bt + 1) == query +} + +def match_function_generic(fn_name, query : string; program : smart_ptr) : bool { + var found = false + let this_mod = program.getThisModule + this_mod |> for_each_module_function($(fn) { + if (found || fn.name != fn_name) { + return + } + if (fn.fromGeneric != null && match_function_name(string(fn.fromGeneric.name), query)) { + found = true + } + }) + return found +} + +def run_aot(file, func_name : string; project : string = "") : string { + if (empty(file)) { + return make_tool_result("missing 'file' argument", true) + } + if (empty(func_name)) { + return compile_and_simulate_ctx(file, project) $(program : smart_ptr; var ctx : smart_ptr; issues : string) { + var warnings = "" + if (!empty(issues)) { + warnings = "Warnings:\n{issues}\n" + } + var result : string + using() $(var cop : CodeOfPolicies) { + cop.aot = false + cop.aot_module = true + result = "{warnings}{_::run_aot(program.get_ptr(), ctx.get_ptr(), cop)}" + } + return result + } + } + return compile_and_simulate_ctx(file, project) $(program : smart_ptr; var ctx : smart_ptr; issues : string) { + let all_funcs <- collect_aot_functions(program) + var matches : array + for (i in range(length(all_funcs))) { + if (all_funcs[i].name == func_name) { + matches |> push(i) + } + } + if (empty(matches)) { + for (i in range(length(all_funcs))) { + if (all_funcs[i].aot_name == func_name) { + matches |> push(i) + } + } + } + if (empty(matches)) { + for (i in range(length(all_funcs))) { + if (match_function_name(all_funcs[i].name, func_name) || match_function_generic(all_funcs[i].name, func_name, program)) { + matches |> push(i) + } + } + } + if (empty(matches)) { + var sigs : array + for (fi in all_funcs) { + sigs |> push(fi.signature) + } + return "No function matching '{func_name}' found.\nAvailable functions:\n" + join(sigs, "\n") + } + if (length(matches) == 1) { + let matched_name = all_funcs[matches[0]].aot_name + var result : string + using() $(var cop : CodeOfPolicies) { + cop.aot = false + cop.aot_module = true + result = run_aot_function(program.get_ptr(), ctx.get_ptr(), cop, matched_name) + } + if (empty(result)) { + return "Function '{matched_name}' produced no AOT output (marked noAot or no_aot)." + } + return result + } + var lines : array + for (idx in matches) { + lines |> push("{all_funcs[idx].signature} [{all_funcs[idx].aot_name}]") + } + return "Multiple functions match '{func_name}':\n" + join(lines, "\n") + "\nUse the bracketed name for exact selection." + } +} + +[export] +def main { + let raw <- get_command_line_arguments() + let args <- subtool_user_args(raw) + if (length(args) < 3) { + print(make_tool_result("aot subtool: expected 3 args (file, func_name, project), got {length(args)}", true)) + return + } + let file = string(args[0]) + let func_name = string(args[1]) + let project = string(args[2]) + print(run_aot(file, func_name, project)) +} diff --git a/utils/mcp/subtools/compile_check.das b/utils/mcp/subtools/compile_check.das new file mode 100644 index 0000000000..1b1e417190 --- /dev/null +++ b/utils/mcp/subtools/compile_check.das @@ -0,0 +1,119 @@ +options gen2 +options rtti +options indenting = 4 +options no_unused_function_arguments = false +options no_unused_block_arguments = false + +require ../tools/common.das public +require daslib/json_boost + +//! Subprocess form of compile_check. Argv (after daslang exe + script path): +//! +//! where is the literal "true" or "false". Prints the make_tool_result +//! envelope to stdout. Exits 0 on success (including tool-level "compile +//! failed" — those come back as isError=true inside the envelope). +//! +//! Lives here (not in tools/) because the in-process logic must NOT be +//! reachable from tools/compile_check.das, which is now a thin popen wrapper. +//! Putting it here breaks any circular import path through the wrapper. + +struct CompileResult { + file : string + success : bool + errors : string + warnings : string +} + +def compile_check_single(file : string; project : string = "") : string { + return compile_and_simulate(file, project) $(program; issues) { + var warnings = "" + if (!empty(issues)) { + warnings = "Warnings:\n{issues}\n" + } + let funcs = build_string() $(var writer) { + write_function_list(writer, program) + } + return "{warnings}Compilation OK.\n{funcs}" + } +} + +def private compile_check_single_json(file : string; project : string = "") : CompileResult { + var result = CompileResult(file = file) + var inscope access <- make_file_access(project) + using() $(var mg : ModuleGroup) { + using() $(var cop : CodeOfPolicies) { + cop.threadlock_context = true + cop.ignore_shared_modules = true + compile_file(file, access, unsafe(addr(mg)), cop) $(ok; program; issues) { + if (!ok) { + result.errors = string(issues) + } else { + simulate(program) $(sok; ctx; serrors) { + if (!sok) { + result.errors = string(serrors) + } else { + result.success = true + if (!empty(issues)) { + result.warnings = string(issues) + } + } + } + } + } + } + } + return result +} + +def run_compile_check(file : string; project : string = ""; json : bool = false) : string { + var files : array + parse_file_list(file, files) + if (empty(files)) { + if (json) { + return make_tool_result("[]", true) + } + return make_tool_result("no files matched: {file}", true) + } + if (!json) { + if (length(files) == 1) { + return compile_check_single(files[0], project) + } + var passed = 0 + var failed = 0 + let output = build_string() $(var w) { + for (f in files) { + let result = compile_check_single(f, project) + let is_error = find(result, "\"isError\":true") >= 0 + if (is_error) { + failed++ + write(w, "FAIL {f}\n") + } else { + passed++ + write(w, "PASS {f}\n") + } + } + write(w, "\n{passed + failed} files, {passed} passed, {failed} failed\n") + } + return make_tool_result(output, failed > 0) + } + var results : array + results |> reserve(length(files)) + for (f in files) { + results |> emplace(compile_check_single_json(f, project)) + } + return make_tool_result(sprint_json(results, false)) +} + +[export] +def main { + let raw <- get_command_line_arguments() + let args <- subtool_user_args(raw) + if (length(args) < 3) { + print(make_tool_result("compile_check subtool: expected 3 args (file, project, json), got {length(args)}", true)) + return + } + let file = string(args[0]) + let project = string(args[1]) + let json = string(args[2]) == "true" + print(run_compile_check(file, project, json)) +} diff --git a/utils/mcp/subtools/describe_type.das b/utils/mcp/subtools/describe_type.das new file mode 100644 index 0000000000..abdeb3fc0b --- /dev/null +++ b/utils/mcp/subtools/describe_type.das @@ -0,0 +1,262 @@ +options gen2 +options rtti +options indenting = 4 +options no_unused_function_arguments = false +options no_unused_block_arguments = false + +require ../tools/common.das public + +//! Subprocess form of `describe_type`. Argv: . + +def write_struct_detail(var w : StringBuilderWriter; value; mod : Module?) { + if (value.flags.isClass) { + write(w, "class {value.name}") + } else { + write(w, "struct {value.name}") + } + if (value.parent != null) { + write(w, " : {value.parent.name}") + } + write(w, " // module {mod.name}") + if (int(value.at.line) > 0) { + write(w, ", line {int(value.at.line)}") + } + write(w, "\n") + if (!empty(value.fields)) { + write(w, "Fields:\n") + for (fld in value.fields) { + write(w, " {fld.name} : {describe(fld._type)}") + if (fld.init != null) { + write(w, " // has default") + } + write(w, "\n") + } + } + var methods : array + for_each_function(mod, "") $(func) { + if (!func.flags.isClassMethod || empty(func.arguments)) { + return + } + let self_type = func.arguments[0]._type + if (self_type == null || self_type.structType == null || + self_type.structType.name != value.name || + (func.flags.generated && func.fromGeneric == null)) { + return + } + let entry = build_string() $(var ew) { + write(ew, " def {func.name}") + write_func_signature(ew, func) + } + methods |> push(entry) + } + if (!empty(methods)) { + write(w, "Methods:\n") + for (m in methods) { + write(w, "{m}\n") + } + } +} + +def write_handled_type_detail(var w : StringBuilderWriter; value : Annotation const; mod : Module?) { + write(w, "handled {value.name}") + var ann = unsafe(reinterpret value) + basic_struct_for_each_parent(*ann) $(parent) { + write(w, " : {parent.name}") + } + write(w, " // module {mod.name}") + if (!empty(value.cppName)) { + write(w, ", C++ {value.cppName}") + } + write(w, "\n") + var field_count = 0 + for_each_field(*ann) $(fname, cppName, xtype, offset) { + if (field_count == 0) { + write(w, "Fields:\n") + } + field_count++ + write(w, " {fname} : {describe(xtype)}") + if (!empty(cppName) && cppName != fname) { + write(w, " // C++ {cppName}") + } + write(w, "\n") + } + var methods : array + for_each_function(mod, "") $(func) { + if (func.flags.privateFunction || func.flags._lambda || func.flags.generated || empty(func.arguments)) { + return + } + let first_type = func.arguments[0]._type + if (first_type == null) { + return + } + if (first_type.baseType == Type.tHandle) { + if (first_type.annotation != null && first_type.annotation.name == value.name) { + let entry = build_string() $(var ew) { + write(ew, " def {func.name}") + write_func_signature(ew, func) + } + methods |> push(entry) + } + } + } + if (!empty(methods)) { + write(w, "Functions (first arg is {value.name}):\n") + for (m in methods) { + write(w, "{m}\n") + } + } +} + +def write_enum_detail(var w : StringBuilderWriter; value; mod : Module?) { + write(w, "enum {value.name} // module {mod.name}") + if (int(value.at.line) > 0) { + write(w, ", line {int(value.at.line)}") + } + write(w, "\n") + write(w, "Values:\n") + for (en in value.list) { + write(w, " {en.name}") + if (en.value != null) { + write(w, " = {describe(en.value)}") + } + write(w, "\n") + } +} + +def write_typedef_detail(var w : StringBuilderWriter; name : string; td; mod : Module?) { + let typ = td + if (typ.baseType == Type.tBitfield) { + write(w, "bitfield {name} // module {mod.name}\n") + write(w, "Flags:\n") + for (an in typ.argNames) { + write(w, " {an}\n") + } + } elif (typ.baseType == Type.tVariant) { + write(w, "variant {name} // module {mod.name}\n") + write(w, "Options:\n") + for (an, at in typ.argNames, typ.argTypes) { + write(w, " {an} : {describe(at)}\n") + } + } elif (typ.baseType == Type.tTuple) { + write(w, "tuple {name} // module {mod.name}\n") + write(w, "Fields:\n") + for (at, idx in typ.argTypes, count()) { + var fname = "_{idx}" + if (idx < length(typ.argNames)) { + let n = string(typ.argNames[idx]) + if (!empty(n)) { + fname = n + } + } + write(w, " {fname} : {describe(at)}\n") + } + } else { + write(w, "typedef {name} = {describe(td)} // module {mod.name}\n") + } +} + +def private try_describe_type(name, module_name, preamble, project : string) : TryResult { + let stub_path = make_temp_das_file() + var stub = "options gen2\n" + if (!empty(module_name)) { + stub = "{stub}require {module_name}\n" + } + stub = "{stub}[export]\ndef main() \{\}\n" + var write_ok = false + fopen(stub_path, "w") $(f) { + if (f != null) { + fwrite(f, stub) + write_ok = true + } + } + if (!write_ok) { + remove(stub_path) + return TryResult(text = make_tool_result("Cannot write temp file: {stub_path}", true), ok = false, retryable = false) + } + var ok = false + let res = compile_and_simulate(stub_path, project) $(program; issues) { + ok = true + var result = "" + program_for_each_module(program) $(mod) { + if (!empty(result)) { + return + } + for_each_structure(mod) $(value) { + if (!empty(result) || value.name != name || + value.flags.isLambda || value.flags._generator || value.flags.generated) { + return + } + result = build_string() $(var w) { + write_struct_detail(w, value, mod) + } + } + if (empty(result)) { + module_for_each_annotation(mod) $(value) { + if (!empty(result) || !value.isBasicStructureAnnotation || value.name != name) { + return + } + result = build_string() $(var w) { + write_handled_type_detail(w, value, mod) + } + } + } + if (empty(result)) { + for_each_enumeration(mod) $(value) { + if (!empty(result) || value.name != name) { + return + } + result = build_string() $(var w) { + write_enum_detail(w, value, mod) + } + } + } + if (empty(result)) { + for_each_typedef(mod) $(tname, td) { + if (!empty(result) || tname != name) { + return + } + result = build_string() $(var w) { + write_typedef_detail(w, string(tname), td, mod) + } + } + } + } + if (empty(result)) { + return "{preamble}Type '{name}' not found" + } + return "{preamble}{result}" + } + remove(stub_path) + return TryResult(text = res, ok = ok, retryable = true) +} + +def run_describe_type(name, module_name : string; project : string = "") : string { + if (empty(name)) { + return make_tool_result("missing 'name' argument", true) + } + let first = try_describe_type(name, module_name, "", project) + if (first.ok || !first.retryable || empty(module_name) || is_qualified_module_name(module_name)) { + return first.text + } + let resolved = "daslib/{module_name}" + let preamble = "[resolved '{module_name}' as '{resolved}']\n" + let second = try_describe_type(name, resolved, preamble, project) + if (second.ok) { + return second.text + } + return first.text +} + +[export] +def main { + let raw <- get_command_line_arguments() + let args <- subtool_user_args(raw) + if (length(args) < 3) { + print(make_tool_result("describe_type subtool: expected 3 args (name, module_name, project), got {length(args)}", true)) + return + } + let name = string(args[0]) + let module_name = string(args[1]) + let project = string(args[2]) + print(run_describe_type(name, module_name, project)) +} diff --git a/utils/mcp/subtools/find_symbol.das b/utils/mcp/subtools/find_symbol.das new file mode 100644 index 0000000000..e692e87586 --- /dev/null +++ b/utils/mcp/subtools/find_symbol.das @@ -0,0 +1,336 @@ +options gen2 +options rtti +options indenting = 4 +options no_unused_function_arguments = false +options no_unused_block_arguments = false + +require ../tools/common.das public +require ../tools/cpp_common.das public +require daslib/ast_boost + +//! Subprocess form of `find_symbol`. Argv: . +//! `cpp_dirs` is an optional comma-separated list of repo-relative paths that scopes the +//! C++ source-redirect lookup. Empty -> use the cached global index over CPP_SEARCH_DIRS. + +let MAX_FIND_RESULTS = 50 + +def module_display_name(mname : string) : string { + return mname == "$" ? "builtin" : mname +} + +def write_location(var w : StringBuilderWriter; at) { + if (int(at.line) > 0) { + write(w, " // line {int(at.line)}") + } +} + +def write_cpp_name(var w : StringBuilderWriter; cppName : string) { + if (!empty(cppName)) { + write(w, " // C++ {cppName}") + } +} + +def write_cpp_redirect(var w : StringBuilderWriter; cppName : string; with_cpp_source : bool; cpp_dirs : array; cpp_entries : array) { + if (!with_cpp_source || empty(cppName)) { + return + } + var match : CppMatch + // empty cpp_dirs → use the cached global C++ index (CPP_SEARCH_DIRS). + // non-empty → look up against the pre-scanned `cpp_entries` (built once + // per run_find_symbol call to avoid N rescans for N matched symbols). + let found = empty(cpp_dirs) ? cpp_lookup_by_name(cppName, match) : cpp_lookup_by_name_in_entries(cppName, cpp_entries, match) + if (found) { + write(w, "\n → cpp: {match.file}:{match.line}") + } elif (empty(cpp_dirs)) { + let why = cpp_index_status() + if (!empty(why)) { + write(w, "\n → cpp: {cppName} (index unavailable: {why})") + } else { + write(w, "\n → cpp: {cppName} (not located)") + } + } else { + let dirs_csv = join(cpp_dirs, ",") + write(w, "\n → cpp: {cppName} (not located in {dirs_csv})") + } +} + +def get_func_cpp_name(func : Function?) : string { + if (func.flags.builtIn) { + let bfn = func as BuiltInFunction + if (bfn != null) { + return string(bfn.cppName) + } + } + return "" +} + +def struct_has_matching_field(value; query : string) : bool { + for (fld in value.fields) { + if (symbol_matches(string(fld.name), query)) { + return true + } + } + return false +} + +def handled_has_matching_field(value : Annotation const; query : string) : bool { + var found = false + var ann = unsafe(reinterpret value) + for_each_field(*ann) $(name, cppName, xtype, offset) { + if (symbol_matches(name, query)) { + found = true + } + } + return found +} + +def write_struct_fields_highlighted(var w : StringBuilderWriter; value; query : string; field_search : bool) { + for (fld in value.fields) { + if (field_search && symbol_matches(string(fld.name), query)) { + write(w, " > {fld.name} : {describe(fld._type)}\n") + } else { + write(w, " {fld.name} : {describe(fld._type)}\n") + } + } +} + +def write_handled_fields_highlighted(var w : StringBuilderWriter; value : Annotation const; query : string; field_search : bool) { + var ann = unsafe(reinterpret value) + for_each_field(*ann) $(name, cppName, xtype, offset) { + let prefix = field_search && symbol_matches(name, query) ? " > " : " " + if (!empty(cppName) && cppName != name) { + write(w, "{prefix}{name} : {describe(xtype)} // C++ {cppName}\n") + } else { + write(w, "{prefix}{name} : {describe(xtype)}\n") + } + } +} + +def search_module(var results : array; var total_found : int&; mname : string; mod : Module?; query : string; kind : string; with_cpp_source : bool; cpp_dirs : array; cpp_entries : array) { + let dname = module_display_name(mname) + if (total_found < MAX_FIND_RESULTS && (empty(kind) || kind == "function")) { + for_each_function(mod, "") $(func) { + if (total_found >= MAX_FIND_RESULTS || func.flags.privateFunction || func.flags._lambda || func.flags.generated) { + return + } + let fname = string(func.name) + if (!symbol_matches(fname, query)) { + return + } + let cppName = get_func_cpp_name(func) + let entry = build_string() $(var w) { + write(w, " [{dname}] def {fname}") + write_func_signature(w, func) + write_cpp_name(w, cppName) + write_cpp_redirect(w, cppName, with_cpp_source, cpp_dirs, cpp_entries) + } + results |> push(entry) + total_found++ + } + } + if (total_found < MAX_FIND_RESULTS && (empty(kind) || kind == "generic")) { + for_each_generic(mod) $(func) { + if (total_found >= MAX_FIND_RESULTS || func.flags.privateFunction || func.flags.generated) { + return + } + let fname = string(func.name) + if (fname |> starts_with("```") || !symbol_matches(fname, query)) { + return + } + let cppName = get_func_cpp_name(func) + let entry = build_string() $(var w) { + write(w, " [{dname}] def {fname}") + write_func_signature(w, func) + write_cpp_name(w, cppName) + write_cpp_redirect(w, cppName, with_cpp_source, cpp_dirs, cpp_entries) + } + results |> push(entry) + total_found++ + } + } + if (total_found < MAX_FIND_RESULTS && (empty(kind) || kind == "struct" || kind == "field")) { + for_each_structure(mod) $(value) { + if (total_found >= MAX_FIND_RESULTS || value.flags.isLambda || value.flags._generator || value.flags.generated || value.flags.privateStructure) { + return + } + let name_match = symbol_matches(string(value.name), query) + let field_match = struct_has_matching_field(value, query) + if ((kind == "field" && !field_match) || (kind != "field" && !name_match && !field_match)) { + return + } + let entry = build_string() $(var w) { + if (value.flags.isClass) { + write(w, " [{dname}] class {value.name}") + } else { + write(w, " [{dname}] struct {value.name}") + } + write_location(w, value.at) + write(w, "\n") + write_struct_fields_highlighted(w, value, query, field_match && !name_match) + } + results |> push(entry) + total_found++ + } + } + if (total_found < MAX_FIND_RESULTS && (empty(kind) || kind == "handled" || kind == "struct" || kind == "field")) { + module_for_each_annotation(mod) $(value) { + if (total_found >= MAX_FIND_RESULTS) { + return + } + let is_basic = value.isBasicStructureAnnotation + let name_match = symbol_matches(string(value.name), query) + let field_match = is_basic && handled_has_matching_field(value, query) + if ((kind == "field" && !field_match) || (kind != "field" && !name_match && !field_match)) { + return + } + let cppName = string(value.cppName) + let entry = build_string() $(var w) { + write(w, " [{dname}] handled {value.name}") + write_cpp_name(w, cppName) + write_cpp_redirect(w, cppName, with_cpp_source, cpp_dirs, cpp_entries) + write(w, "\n") + if (is_basic) { + write_handled_fields_highlighted(w, value, query, field_match && !name_match) + } + } + results |> push(entry) + total_found++ + } + } + if (total_found < MAX_FIND_RESULTS && (empty(kind) || kind == "enum")) { + for_each_enumeration(mod) $(value) { + if (total_found >= MAX_FIND_RESULTS || value.isPrivate || !symbol_matches(string(value.name), query)) { + return + } + let entry = build_string() $(var w) { + write(w, " [{dname}] enum {value.name}") + write_location(w, value.at) + write(w, "\n") + for (en in value.list) { + write(w, " {en.name}\n") + } + } + results |> push(entry) + total_found++ + } + } + if (total_found < MAX_FIND_RESULTS && (empty(kind) || kind == "typedef" || kind == "alias")) { + for_each_typedef(mod) $(name : string#; var value : TypeDeclPtr) { + if (total_found >= MAX_FIND_RESULTS) { + return + } + let td = value + if (td.flags.isPrivateAlias) { + return + } + let sname = string(name) + if (!symbol_matches(sname, query)) { + return + } + let entry = build_string() $(var w) { + write(w, " [{dname}] typedef {sname} = {describe(value)}") + write_location(w, td.at) + } + results |> push(entry) + total_found++ + } + } + if (total_found < MAX_FIND_RESULTS && (empty(kind) || kind == "global")) { + for_each_global(mod) $(value) { + if (total_found >= MAX_FIND_RESULTS || value.flags.private_variable || !symbol_matches(string(value.name), query)) { + return + } + let entry = build_string() $(var w) { + write(w, " [{dname}] {value.name} : {describe(value._type)}") + write_location(w, value.at) + } + results |> push(entry) + total_found++ + } + } +} + +def format_find_results(results : array; total_found : int; query : string; kind : string) : string { + if (empty(results)) { + return "No symbols found matching '{query}'" + (empty(kind) ? "" : " (kind={kind})") + } + let truncated = total_found >= MAX_FIND_RESULTS ? "\n(truncated at {MAX_FIND_RESULTS} results)" : "" + return build_string() $(var writer) { + write(writer, "Found {length(results)} symbol(s) matching '{query}':\n") + for (r in results) { + write(writer, "{r}\n") + } + write(writer, truncated) + } +} + +def run_find_symbol(query : string; kind : string = ""; file : string = ""; project : string = ""; with_cpp_source : bool = false; cpp_dirs : array) : string { + if (empty(query)) { + return make_tool_result("'query' must not be empty", true) + } + // When cpp_dirs is non-empty, scan ONCE per call and reuse the entries + // for every per-symbol redirect lookup. The previous code re-scanned + // inside cpp_lookup_by_name_scoped on every match — up to MAX_FIND_RESULTS + // (50) rescans of the same dirs per query. + var cpp_entries : array + if (with_cpp_source && !empty(cpp_dirs)) { + var err : string + cpp_entries <- cpp_run_scan(cpp_dirs, "", err) + } + if (!empty(file)) { + return compile_and_simulate(file, project) $(program; issues) { + var results : array + var total_found = 0 + program_for_each_module(program) $(mod) { + let mname = string(mod.name) + if (mname == "__main__" || empty(mname)) { + return + } + search_module(results, total_found, mname, mod, query, kind, with_cpp_source, cpp_dirs, cpp_entries) + } + return format_find_results(results, total_found, query, kind) + } + } else { + var results : array + var total_found = 0 + program_for_each_registered_module() $(mod) { + let mname = string(mod.name) + if (mname == "__main__" || empty(mname)) { + return + } + search_module(results, total_found, mname, mod, query, kind, with_cpp_source, cpp_dirs, cpp_entries) + } + return make_tool_result(format_find_results(results, total_found, query, kind)) + } +} + +def parse_cpp_dirs(s : string) : array { + var out : array + if (empty(s)) { + return <- out + } + for (part in split(s, ",")) { + let trimmed = strip(part) + if (!empty(trimmed)) { + out |> push(trimmed) + } + } + return <- out +} + +[export] +def main { + let raw <- get_command_line_arguments() + let args <- subtool_user_args(raw) + if (length(args) < 5) { + print(make_tool_result("find_symbol subtool: expected 5+ args (query, kind, file, project, with_cpp_source[, cpp_dirs]), got {length(args)}", true)) + return + } + let query = string(args[0]) + let kind = string(args[1]) + let file = string(args[2]) + let project = string(args[3]) + let with_cpp_source = string(args[4]) == "true" + let cpp_dirs <- length(args) >= 6 ? parse_cpp_dirs(string(args[5])) : default> + print(run_find_symbol(query, kind, file, project, with_cpp_source, cpp_dirs)) +} diff --git a/utils/mcp/subtools/lint_tool.das b/utils/mcp/subtools/lint_tool.das new file mode 100644 index 0000000000..1e2bdfdd45 --- /dev/null +++ b/utils/mcp/subtools/lint_tool.das @@ -0,0 +1,158 @@ +options gen2 +options rtti +options indenting = 4 +options no_unused_function_arguments = false +options no_unused_block_arguments = false + +require ../tools/common.das public +require daslib/lint +require daslib/perf_lint +require daslib/style_lint +require daslib/ast_boost +require strings + +//! Subprocess form of `lint`. Argv: . + +def has_expect_directive(file : string) : bool { + let text = fread(file) + if (empty(text)) { + return false + } + let lines <- text |> split("\n") + for (line in lines) { + let trimmed = line |> strip + if (trimmed |> starts_with("expect ")) { + return true + } + } + return false +} + +def lint_single(file : string; project : string = "") : string { + if (has_expect_directive(file)) { + return "SKIP {file} (intentional compile errors via `expect`)" + } + return compile_only(file, true, true, project, true) $(program; issues) { + var compile_warnings = "" + if (!empty(issues)) { + compile_warnings = "Compilation warnings:\n{issues}\n" + } + var all_issues : array + var count = paranoid_collect(program, all_issues) + var perf_issues : array + count += perf_lint_collect(program, perf_issues) + for (w in perf_issues) { + all_issues |> push(w) + } + var style_issues : array + let hygiene = program._options |> find_arg("_comment_hygiene") ?as tBool ?? false + count += style_lint_collect(program, style_issues, false, hygiene) + for (w in style_issues) { + all_issues |> push(w) + } + if (count == 0) { + return "{compile_warnings}No lint issues." + } + return build_string() $(var w) { + if (!empty(compile_warnings)) { + w |> write(compile_warnings) + } + w |> write("{count} lint issue(s):\n") + write_deduped(w, all_issues) + } + } +} + +struct LintFileResult { + file : string + count : int + errors : array + failed : bool +} + +def lint_file(file : string; project : string = "") : LintFileResult { + var result = LintFileResult(file = file) + var inscope access <- make_file_access(project) + using() $(var mg : ModuleGroup) { + using() $(var cop : CodeOfPolicies) { + cop.threadlock_context = true + cop.ignore_shared_modules = true + cop.export_all = true + cop.lint_check = true + cop.no_optimizations = true + cop.no_infer_time_folding = true + compile_file(file, access, unsafe(addr(mg)), cop) $(ok; program; issues) { + if (!ok) { + result.failed = true + } else { + result.count = paranoid_collect(program, result.errors) + var perf_issues : array + result.count += perf_lint_collect(program, perf_issues) + for (w in perf_issues) { + result.errors |> push(w) + } + var style_issues : array + let hygiene = program._options |> find_arg("_comment_hygiene") ?as tBool ?? false + result.count += style_lint_collect(program, style_issues, false, hygiene) + for (w in style_issues) { + result.errors |> push(w) + } + } + } + } + } + return <- result +} + +def run_lint(file : string; project : string = "") : string { + var files : array + parse_file_list(file, files) + if (empty(files)) { + return make_tool_result("no files matched: {file}", true) + } + if (length(files) == 1) { + return lint_single(files[0], project) + } + var total_issues = 0 + var total_errors = 0 + var total_skipped = 0 + let output = build_string() $(var w) { + for (f in files) { + if (has_expect_directive(f)) { + total_skipped++ + w |> write("SKIP {f} (intentional compile errors via `expect`)\n") + continue + } + let result = lint_file(f, project) + if (result.failed) { + total_errors++ + w |> write("FAIL {f}\n") + } elif (result.count > 0) { + total_issues += result.count + w |> write("WARN {f} ({result.count})\n") + write_deduped(w, result.errors) + } else { + w |> write("PASS {f}\n") + } + } + if (total_skipped > 0) { + w |> write("\n{length(files)} files, {total_issues} issue(s), {total_errors} error(s), {total_skipped} skipped\n") + } else { + w |> write("\n{length(files)} files, {total_issues} issue(s), {total_errors} error(s)\n") + } + } + return make_tool_result(output, total_errors > 0) +} + +[export] +def main { + let raw <- get_command_line_arguments() + let args <- subtool_user_args(raw) + if (length(args) < 2) { + print(make_tool_result("lint subtool: expected 2 args (file, project), got {length(args)}", true)) + return + } + let file = string(args[0]) + let project = string(args[1]) + print(run_lint(file, project)) +} diff --git a/utils/mcp/subtools/list_functions.das b/utils/mcp/subtools/list_functions.das new file mode 100644 index 0000000000..90a55ea6a8 --- /dev/null +++ b/utils/mcp/subtools/list_functions.das @@ -0,0 +1,31 @@ +options gen2 +options rtti +options indenting = 4 +options no_unused_function_arguments = false +options no_unused_block_arguments = false + +require ../tools/common.das public + +//! Subprocess form of `list_functions`. Argv: . + +def run_list_functions(file : string; project : string = "") : string { + return compile_and_simulate(file, project) $(program; issues) { + let res = build_string() $(var writer) { + write_function_list(writer, program) + } + return empty(res) ? "(no functions found)" : res + } +} + +[export] +def main { + let raw <- get_command_line_arguments() + let args <- subtool_user_args(raw) + if (length(args) < 2) { + print(make_tool_result("list_functions subtool: expected 2 args (file, project), got {length(args)}", true)) + return + } + let file = string(args[0]) + let project = string(args[1]) + print(run_list_functions(file, project)) +} diff --git a/utils/mcp/subtools/list_module_api.das b/utils/mcp/subtools/list_module_api.das new file mode 100644 index 0000000000..54e2379206 --- /dev/null +++ b/utils/mcp/subtools/list_module_api.das @@ -0,0 +1,416 @@ +options gen2 +options rtti +options indenting = 4 +options no_unused_function_arguments = false +options no_unused_block_arguments = false + +require ../tools/common.das public + +//! Subprocess form of `list_module_api`. Argv:
. + +def is_operator_function(name : string) : bool { + let ch = first_character(name) + return !is_alpha(ch) && !is_number(ch) && ch != '_' +} + +def name_matches_filter(name : string; filter : string) : bool { + return empty(filter) || symbol_matches(name, filter) +} + +def want_section(section : string; name : string) : bool { + return empty(section) || section == name +} + +def write_compact_func_signature(var w : StringBuilderWriter; func) { + var has_args = false + for (i in range(length(func.arguments))) { + if (!arg_needs_documenting(func.arguments[i]._type)) { + continue + } + if (!has_args) { + write(w, "(") + has_args = true + } else { + write(w, "; ") + } + write(w, "{describe(func.arguments[i]._type)}") + } + if (has_args) { + write(w, ")") + } + if (func.result != null && !func.result.isVoid) { + write(w, " : {describe(func.result)}") + } +} + +def write_module_api(var writer : StringBuilderWriter; mod : Module?; filter : string = ""; section : string = ""; compact : bool = false) { + write(writer, "Module: {mod.name}\n") + var funcs : array + var generics : array + var operators : array + var structs : array + var handled : array + var enums : array + var globals : array + var func_annotations : array + var struct_annotations : array + var call_macros : array + var reader_macros : array + var variant_macros : array + var typeinfo_macros : array + var for_loop_macros : array + var type_macros : array + if (want_section(section, "functions") || want_section(section, "operators")) { + for_each_function(mod, "") $(func) { + if (func.flags.privateFunction || func.flags._lambda || func.flags.generated) { + return + } + let fname = string(func.name) + if (!name_matches_filter(fname, filter)) { + return + } + let entry = build_string() $(var w) { + write(w, " def {func.name}") + if (compact) { + write_compact_func_signature(w, func) + } else { + write_func_signature(w, func) + } + } + if (is_operator_function(fname) && want_section(section, "operators")) { + operators |> push(entry) + } elif (!is_operator_function(fname) && want_section(section, "functions")) { + funcs |> push(entry) + } + } + } + if (want_section(section, "generics") || want_section(section, "operators")) { + for_each_generic(mod) $(func) { + if (func.flags.privateFunction || func.flags.generated || string(func.name) |> starts_with("```")) { // nolint:PERF012 + return + } + let fname = string(func.name) + if (!name_matches_filter(fname, filter)) { + return + } + let entry = build_string() $(var w) { + write(w, " def {func.name}") + if (compact) { + write_compact_func_signature(w, func) + } else { + write_func_signature(w, func) + } + } + if (is_operator_function(fname) && want_section(section, "operators")) { + operators |> push(entry) + } elif (!is_operator_function(fname) && want_section(section, "generics")) { + generics |> push(entry) + } + } + } + if (want_section(section, "structs")) { + for_each_structure(mod) $(value) { + if (value.flags.isLambda || value.flags._generator || value.flags.generated || value.flags.privateStructure || + !name_matches_filter(string(value.name), filter)) { + return + } + let entry = build_string() $(var w) { + if (value.flags.isClass) { + write(w, " class {value.name}") + } else { + write(w, " struct {value.name}") + } + if (value.parent != null) { + write(w, " : {value.parent.name}") + } + if (!compact && int(value.at.line) > 0) { + write(w, " // line {int(value.at.line)}") + } + write(w, "\n") + if (!compact) { + for (fld in value.fields) { + write(w, " {fld.name} : {describe(fld._type)}\n") + } + } + } + structs |> push(entry) + } + } + if (want_section(section, "handled")) { + module_for_each_annotation(mod) $(value) { + if (!value.isBasicStructureAnnotation || !name_matches_filter(string(value.name), filter)) { + return + } + let entry = build_string() $(var w) { + if (compact) { + write(w, " handled {value.name}\n") + } else { + write_handled_type(w, value) + } + } + handled |> push(entry) + } + } + if (want_section(section, "enums")) { + for_each_enumeration(mod) $(value) { + if (value.isPrivate || !name_matches_filter(string(value.name), filter)) { + return + } + let entry = build_string() $(var w) { + write(w, " enum {value.name}") + if (!compact && int(value.at.line) > 0) { + write(w, " // line {int(value.at.line)}") + } + write(w, "\n") + if (!compact) { + for (en in value.list) { + write(w, " {en.name}\n") + } + } + } + enums |> push(entry) + } + } + if (want_section(section, "annotations")) { + module_for_each_annotation(mod) $(value) { + if (value.isBasicStructureAnnotation || value.isEnumerationAnnotation) { + return + } + let aname = string(value.name) + if (!name_matches_filter(aname, filter)) { + return + } + if (value.isFunctionAnnotation) { + func_annotations |> push(" [{aname}]\n") + } elif (!value.isTypeAnnotation) { + struct_annotations |> push(" [{aname}]\n") + } + } + } + if (want_section(section, "annotations")) { + for_each_call_macro(mod) $(value) { + let mname = string(value) + if (!name_matches_filter(mname, filter)) { + return + } + call_macros |> push(" {mname}\n") + } + } + if (want_section(section, "annotations")) { + for_each_reader_macro(mod) $(value) { + let mname = string(value) + if (!name_matches_filter(mname, filter)) { + return + } + reader_macros |> push(" {mname}\n") + } + } + if (want_section(section, "annotations")) { + for_each_variant_macro(mod) $(value) { + let mname = string(value.name) + if (!name_matches_filter(mname, filter)) { + return + } + variant_macros |> push(" {mname}\n") + } + } + if (want_section(section, "annotations")) { + for_each_typeinfo_macro(mod) $(value) { + let mname = string(value.name) + if (!name_matches_filter(mname, filter)) { + return + } + typeinfo_macros |> push(" {mname}\n") + } + } + if (want_section(section, "annotations")) { + for_each_for_loop_macro(mod) $(value) { + let mname = string(value.name) + if (!name_matches_filter(mname, filter)) { + return + } + for_loop_macros |> push(" {mname}\n") + } + } + if (want_section(section, "annotations")) { + for_each_typemacro(mod) $(value) { + let mname = string(value.name) + if (!name_matches_filter(mname, filter)) { + return + } + type_macros |> push(" {mname}\n") + } + } + if (want_section(section, "globals")) { + for_each_global(mod) $(value) { + if (value.flags.private_variable || !name_matches_filter(string(value.name), filter)) { + return + } + var line = " {value.name} : {describe(value._type)}" + if (int(value.at.line) > 0) { + line = "{line} // line {int(value.at.line)}" + } + globals |> push("{line}\n") + } + } + if (!empty(funcs)) { + write(writer, "Functions ({length(funcs)}):\n") + for (f in funcs) { + write(writer, "{f}\n") + } + } + if (!empty(generics)) { + write(writer, "Generics ({length(generics)}):\n") + for (g in generics) { + write(writer, "{g}\n") + } + } + if (!empty(operators)) { + write(writer, "Operators ({length(operators)}):\n") + for (o in operators) { + write(writer, "{o}\n") + } + } + if (!empty(structs)) { + write(writer, "Structs/Classes:\n") + for (s in structs) { + write(writer, s) + } + } + if (!empty(handled)) { + write(writer, "Handled types ({length(handled)}):\n") + for (h in handled) { + write(writer, h) + } + } + if (!empty(enums)) { + write(writer, "Enumerations:\n") + for (e in enums) { + write(writer, e) + } + } + if (!empty(globals)) { + write(writer, "Globals:\n") + for (g in globals) { + write(writer, g) + } + } + if (!empty(func_annotations)) { + write(writer, "Function annotations ({length(func_annotations)}):\n") + for (a in func_annotations) { + write(writer, a) + } + } + if (!empty(struct_annotations)) { + write(writer, "Structure annotations ({length(struct_annotations)}):\n") + for (a in struct_annotations) { + write(writer, a) + } + } + if (!empty(call_macros)) { + write(writer, "Call macros ({length(call_macros)}):\n") + for (m in call_macros) { + write(writer, m) + } + } + if (!empty(reader_macros)) { + write(writer, "Reader macros ({length(reader_macros)}):\n") + for (m in reader_macros) { + write(writer, m) + } + } + if (!empty(variant_macros)) { + write(writer, "Variant macros ({length(variant_macros)}):\n") + for (m in variant_macros) { + write(writer, m) + } + } + if (!empty(typeinfo_macros)) { + write(writer, "Typeinfo macros ({length(typeinfo_macros)}):\n") + for (m in typeinfo_macros) { + write(writer, m) + } + } + if (!empty(for_loop_macros)) { + write(writer, "For-loop macros ({length(for_loop_macros)}):\n") + for (m in for_loop_macros) { + write(writer, m) + } + } + if (!empty(type_macros)) { + write(writer, "Type macros ({length(type_macros)}):\n") + for (m in type_macros) { + write(writer, m) + } + } +} + +def private try_list_module_api(name : string; preamble : string; filter : string; section : string; compact : bool; project : string) : TryResult { + let stub_path = make_temp_das_file() + let stub = "require {name}\n[export]\ndef main\n pass\n" + var write_ok = false + fopen(stub_path, "w") $(f) { + if (f != null) { + fwrite(f, stub) + write_ok = true + } + } + if (!write_ok) { + remove(stub_path) + return TryResult(text = make_tool_result("Cannot write temp file: {stub_path}", true), ok = false, retryable = false) + } + var ok = false + let result = compile_and_simulate(stub_path, project) $(program; issues) { + var target_mod : Module? + program_for_each_module(program) $(mod) { + let mname = string(mod.name) + if (mname == name || "{name}" |> ends_with("/{mname}")) { + target_mod = mod + } + } + if (target_mod == null) { + return "Module '{name}' not found after compilation" + } + let res = build_string() $(var writer) { + if (!empty(preamble)) { + write(writer, preamble) + } + write_module_api(writer, target_mod, filter, section, compact) + } + ok = true + return empty(res) ? "(empty module)" : res + } + remove(stub_path) + return TryResult(text = result, ok = ok, retryable = true) +} + +def run_list_module_api(module_name : string; filter : string = ""; section : string = ""; compact_str : string = ""; project : string = "") : string { + let compact = compact_str == "true" + let first = try_list_module_api(module_name, "", filter, section, compact, project) + if (first.ok || !first.retryable || is_qualified_module_name(module_name)) { + return first.text + } + let resolved = "daslib/{module_name}" + let preamble = "[resolved '{module_name}' as '{resolved}']\n" + let second = try_list_module_api(resolved, preamble, filter, section, compact, project) + if (second.ok) { + return second.text + } + return first.text +} + +[export] +def main { + let raw <- get_command_line_arguments() + let args <- subtool_user_args(raw) + if (length(args) < 5) { + print(make_tool_result("list_module_api subtool: expected 5 args (module_name, filter, section, compact, project), got {length(args)}", true)) + return + } + let module_name = string(args[0]) + let filter = string(args[1]) + let section = string(args[2]) + let compact_str = string(args[3]) + let project = string(args[4]) + print(run_list_module_api(module_name, filter, section, compact_str, project)) +} diff --git a/utils/mcp/subtools/list_types.das b/utils/mcp/subtools/list_types.das new file mode 100644 index 0000000000..bd9aea4613 --- /dev/null +++ b/utils/mcp/subtools/list_types.das @@ -0,0 +1,126 @@ +options gen2 +options rtti +options indenting = 4 +options no_unused_function_arguments = false +options no_unused_block_arguments = false + +require ../tools/common.das public + +//! Subprocess form of `list_types`. Argv: . + +def write_type_list(var writer : StringBuilderWriter; program : smart_ptr) { + let thisMod = get_this_module(program) + var structs : array + var classes : array + var enums : array + var typedefs : array + for_each_structure(thisMod) $(value) { + if (value.flags.privateStructure || value.flags.isLambda || value.flags.generated || value.flags._generator) { + return + } + let entry = build_string() $(var w) { + if (value.flags.isClass) { + write(w, " class {value.name}") + if (value.parent != null) { + write(w, " : {value.parent.name}") + } + write(w, " // line {int(value.at.line)}\n") + for (fld in value.fields) { + if (fld.flags.classMethod || fld.flags.generated || !fld.flags.implemented || fld.flags.parentType || fld.name == "__rtti") { + continue + } + write(w, " {fld.name} : {describe(fld._type)}") + if (fld.init != null) { + write(w, " = {describe(fld.init)}") + } + write(w, "\n") + } + } else { + write(w, " struct {value.name}") + if (value.parent != null) { + write(w, " : {value.parent.name}") + } + write(w, " // line {int(value.at.line)}\n") + for (fld in value.fields) { + if (fld.flags.generated || !fld.flags.implemented || fld.flags.parentType || fld.name == "__rtti") { + continue + } + write(w, " {fld.name} : {describe(fld._type)}") + if (fld.init != null) { + write(w, " = {describe(fld.init)}") + } + write(w, "\n") + } + } + } + if (value.flags.isClass) { + classes |> push(entry) + } else { + structs |> push(entry) + } + } + for_each_enumeration(thisMod) $(value) { + if (value.isPrivate) { + return + } + let entry = build_string() $(var w) { + write(w, " enum {value.name} // line {int(value.at.line)}\n") + for (en in value.list) { + write(w, " {en.name} = {describe(en.value)}\n") + } + } + enums |> push(entry) + } + for_each_typedef(thisMod) $(name, value) { + if (value.flags.isPrivateAlias) { + return + } + typedefs |> push(" {name} = {describe(value)} // line {int(value.at.line)}\n") + } + if (!empty(structs)) { + write(writer, "Structs:\n") + for (s in structs) { + write(writer, s) + } + } + if (!empty(classes)) { + write(writer, "Classes:\n") + for (c in classes) { + write(writer, c) + } + } + if (!empty(enums)) { + write(writer, "Enumerations:\n") + for (e in enums) { + write(writer, e) + } + } + if (!empty(typedefs)) { + write(writer, "Type aliases:\n") + for (t in typedefs) { + write(writer, t) + } + } +} + +def run_list_types(file : string; project : string = "") : string { + return compile_and_simulate(file, project) $(program; issues) { + let res = build_string() $(var writer) { + write_type_list(writer, program) + } + return empty(res) ? "(no types found)" : res + } +} + +[export] +def main { + let raw <- get_command_line_arguments() + let args <- subtool_user_args(raw) + if (length(args) < 2) { + print(make_tool_result("list_types subtool: expected 2 args (file, project), got {length(args)}", true)) + return + } + let file = string(args[0]) + let project = string(args[1]) + print(run_list_types(file, project)) +} diff --git a/utils/mcp/test_tools.das b/utils/mcp/test_tools.das index e71bbeae98..8c85bed36b 100644 --- a/utils/mcp/test_tools.das +++ b/utils/mcp/test_tools.das @@ -1974,10 +1974,11 @@ def test_cpp_grep_usage_dedup(t : T?) { [test] def test_cpp_find_symbol(t : T?) { - t |> run("finds class in src/") <| @(t : T?) { + t |> run("finds class in src/ast") <| @(t : T?) { var text : string var is_error = false - parse_result(do_cpp_find_symbol("=Module", "class", "src", ""), text, is_error) + // narrowed from "src" (~144 files) to "src/ast" (~34 files); Module lives in src/ast + parse_result(do_cpp_find_symbol("=Module", "class", "src/ast", ""), text, is_error) t |> success(!is_error, "should not be error -- {cpp_diag(text)}") t |> success(find(text, "Module") >= 0, "should find Module -- {cpp_diag(text)}") } @@ -1991,21 +1992,23 @@ def test_cpp_find_symbol(t : T?) { t |> run("no-match returns clean empty result") <| @(t : T?) { var text : string var is_error = false - parse_result(do_cpp_find_symbol("zzznotarealsymbolzzz_xyzzy", "", "src", ""), text, is_error) + // narrowed scope; absence-of-symbol is structural, not scope-dependent + parse_result(do_cpp_find_symbol("zzznotarealsymbolzzz_xyzzy", "", "src/ast", ""), text, is_error) t |> success(!is_error, "should not be error on no-match -- {cpp_diag(text)}") t |> success(find(text, "No matches") >= 0, "should report no matches -- {cpp_diag(text)}") } t |> run("kind filter rejects wrong kinds") <| @(t : T?) { var text : string var is_error = false - parse_result(do_cpp_find_symbol("=addFunction", "class", "src", ""), text, is_error) + // addFunction is a method (not a class) in src/ast/; narrowed scope still proves the kind filter + parse_result(do_cpp_find_symbol("=addFunction", "class", "src/ast", ""), text, is_error) t |> success(!is_error, "should not be error -- {cpp_diag(text)}") t |> success(find(text, "No matches") >= 0, "kind=class should reject methods named addFunction -- {cpp_diag(text)}") } t |> run("missing query returns error") <| @(t : T?) { var text : string var is_error = false - parse_result(do_cpp_find_symbol("", "", "src", ""), text, is_error) + parse_result(do_cpp_find_symbol("", "", "src/ast", ""), text, is_error) t |> success(is_error, "should be error") } } @@ -2177,7 +2180,10 @@ def test_find_symbol_with_cpp_source(t : T?) { t |> run("redirect on — emits '→ cpp:' line for builtins") <| @(t : T?) { var text : string var is_error = false - parse_result(do_find_symbol("=print", "function", "", "", true), text, is_error) + // cpp_dirs="src/builtin" narrows the C++ scan from full CPP_SEARCH_DIRS + // (~947 files) to ~33 files where `print` lives. Drops this test from + // ~19s to ~1s. + parse_result(do_find_symbol("=print", "function", "", "", true, "src/builtin"), text, is_error) t |> success(!is_error, "should not be error -- {cpp_diag(text)}") // Either we located print's cpp source OR we report (not located). // Both prove the redirect path fired. @@ -2198,7 +2204,8 @@ def test_goto_definition_with_cpp_source(t : T?) { t |> run("redirect on — appends C++: line for builtin") <| @(t : T?) { var text : string var is_error = false - parse_result(do_goto_definition(fixture_path("_fixture_goto.das"), "34", "5", "", "", true), text, is_error) + // cpp_dirs="src/builtin" narrows the redirect lookup; drops this from ~17s to ~1s. + parse_result(do_goto_definition(fixture_path("_fixture_goto.das"), "34", "5", "", "", true, "src/builtin"), text, is_error) t |> success(!is_error, "should not be error -- {cpp_diag(text)}") t |> success(find(text, "Kind: builtin") >= 0, "should still resolve to a builtin -- {cpp_diag(text)}") t |> success(find(text, "C++:") >= 0, "C++ line should appear when with_cpp_source=true -- {cpp_diag(text)}") @@ -2226,38 +2233,19 @@ def test_cpp_search_config(t : T?) { [test] def test_cpp_index_signature(t : T?) { - t |> run("signature is stable across two consecutive calls (no edits)") <| @(t : T?) { - let sig1 = cpp_compute_index_signature() - let sig2 = cpp_compute_index_signature() - t |> success(!empty(sig1), "first signature should not be empty") - t |> success(sig1 == sig2, "signature should be stable across consecutive calls (no fs changes)") + t |> run("ensure_cpp_index produces a stable signature across consecutive calls (no edits)") <| @(t : T?) { + ensure_cpp_index() + let sig1 = cpp_index_signature + ensure_cpp_index() + let sig2 = cpp_index_signature + t |> success(!empty(sig1), "first signature should not be empty after ensure_cpp_index()") + t |> success(sig1 == sig2, "signature should be stable across consecutive ensure_cpp_index() calls (no fs changes)") } } -[test] -def test_cpp_filesystem_signature(t : T?) { - // Direct unit test for the no-git fallback's filesystem walker. - // (`cpp_compute_index_signature` only invokes it when git fails — hard to - // simulate in CI without breaking git, so we exercise it directly here.) - t |> run("filesystem signature is non-empty for default search dirs") <| @(t : T?) { - let fs_sig = cpp_compute_filesystem_signature(cpp_default_search_dirs()) - t |> success(!empty(fs_sig), "should return at least one path:mtime line for daslang's tree") - } - t |> run("filesystem signature is stable across consecutive calls (no edits)") <| @(t : T?) { - let dirs <- cpp_default_search_dirs() - let sig1 = cpp_compute_filesystem_signature(dirs) - let sig2 = cpp_compute_filesystem_signature(dirs) - t |> success(sig1 == sig2, "two back-to-back calls should produce identical signatures") - } - t |> run("filesystem signature only mentions indexed extensions") <| @(t : T?) { - let fs_sig = cpp_compute_filesystem_signature(cpp_default_search_dirs()) - // Any *.das, *.cmake, *.txt, *.md path slipping into the signature is - // a bug — `is_indexed_path` should have filtered them out. - t |> success(find(fs_sig, ".das:") < 0, "should not include .das files") - t |> success(find(fs_sig, ".cmake:") < 0, "should not include .cmake files") - t |> success(find(fs_sig, "CMakeLists") < 0, "should not include CMakeLists.txt") - } -} +// Direct unit tests for the filesystem-fallback walker live with the shared +// module they exercise: utils/common/tests/test_git_signature.das. Behavioral +// integration is covered by test_cpp_index_signature above. [test] def test_cpp_collect_git_excludes(t : T?) { @@ -2310,3 +2298,76 @@ def test_cpp_using_alias_visible(t : T?) { t |> success(find(text, "module_pull_t") >= 0, "should locate the using-alias -- {cpp_diag(text)}") } } + +// ── run_mcp_subtool wiring tests ────────────────────────────────────── +// +// The 270 tool-level tests above already cover the wiring end-to-end via +// `do_(...)`. These four pin down the four behaviors of the helper +// itself: argv passthrough, missing-subtool, timeout, and non-zero exit. +// Fixtures live alongside the real subtools at utils/mcp/subtools/_test_*.das +// so they share the same require-chain shape (no behavioral surprises). + +[test] +def test_run_mcp_subtool_passthrough(t : T?) { + t |> run("argv round-trips through `--` separator") <| @(t : T?) { + let raw = run_mcp_subtool("_test_passthrough", ["alpha", "beta", "gamma"]) + var text : string + var is_error = false + parse_result(raw, text, is_error) + t |> success(!is_error, "should not be error -- text={text}") + t |> success(find(text, "PASSTHROUGH: alpha beta gamma") >= 0, + "args should round-trip verbatim -- text={text}") + } + t |> run("empty args list still works") <| @(t : T?) { + let raw = run_mcp_subtool("_test_passthrough", []) + var text : string + var is_error = false + parse_result(raw, text, is_error) + t |> success(!is_error, "should not be error -- text={text}") + t |> success(find(text, "PASSTHROUGH:") >= 0, + "marker present even with no args -- text={text}") + } +} + +[test] +def test_run_mcp_subtool_missing(t : T?) { + t |> run("nonexistent subtool surfaces as error") <| @(t : T?) { + let raw = run_mcp_subtool("_does_not_exist_xyzzy", []) + var text : string + var is_error = false + parse_result(raw, text, is_error) + t |> success(is_error, "should be error -- text={text}") + t |> success(find(text, "_does_not_exist_xyzzy") >= 0, + "error should name the missing subtool -- text={text}") + } +} + +[test] +def test_run_mcp_subtool_timeout(t : T?) { + t |> run("subtool exceeding timeout surfaces 'timed out'") <| @(t : T?) { + // Sleep 5s, allow 0.5s — guaranteed timeout. Wall clock cost ~0.5s + // (process is killed at the deadline, not after the full sleep). + let raw = run_mcp_subtool("_test_sleep", ["5000"], 0.5) + var text : string + var is_error = false + parse_result(raw, text, is_error) + t |> success(is_error, "should be error -- text={text}") + t |> success(find(text, "timed out") >= 0, + "error should mention 'timed out' -- text={text}") + } +} + +[test] +def test_run_mcp_subtool_nonzero_exit(t : T?) { + t |> run("panicking subtool surfaces 'failed (exit ...)'") <| @(t : T?) { + let raw = run_mcp_subtool("_test_panic", []) + var text : string + var is_error = false + parse_result(raw, text, is_error) + t |> success(is_error, "should be error -- text={text}") + t |> success(find(text, "failed (exit") >= 0, + "error should report non-zero exit -- text={text}") + t |> success(find(text, "STDOUT_BEFORE_PANIC") >= 0, + "captured stdout before the panic should be in the error payload -- text={text}") + } +} diff --git a/utils/mcp/tools/aot.das b/utils/mcp/tools/aot.das index bb314a659f..e21d7f8eac 100644 --- a/utils/mcp/tools/aot.das +++ b/utils/mcp/tools/aot.das @@ -3,158 +3,10 @@ options no_unused_function_arguments = false options no_unused_block_arguments = false require common public -require daslib/aot_cpp -require daslib/ast_boost -require strings -struct AotFuncInfo { - name : string // fn.name (daslang name) - aot_name : string // mangled C++ name with hash - signature : string // human-readable signature -} - -def collect_aot_functions(program : smart_ptr) : array { - var result : array - let this_mod = program.getThisModule - this_mod |> for_each_module_function($(fn) { - if (fn.flags.noAot || fn.moreFlags.isTemplate || !fn.flags.used) { - return - } - if (fn.flags.builtIn || fn.flags.generated) { - return - } - var sig = build_string() $(var w) { - write(w, string(fn.name)) - write(w, "(") - var first = true - for (i in range(length(fn.arguments))) { - if (fn.arguments[i]._type.baseType == Type.fakeContext || fn.arguments[i]._type.baseType == Type.fakeLineInfo) { - continue - } - if (!first) { - write(w, "; ") - } - first = false - write(w, "{string(fn.arguments[i].name)} : {describe(fn.arguments[i]._type)}") - } - write(w, ")") - if (fn.result != null && !fn.result.isVoid) { - write(w, " : {describe(fn.result)}") - } - } - result |> emplace(AotFuncInfo( - name = string(fn.name), - aot_name = aotFuncName(fn), - signature = sig - )) - }) - return <- result -} - -def match_function_name(fn_name, query : string) : bool { - if (fn_name == query) { - return true - } - // method match: query "foo" matches "ClassName`foo" - let bt = find(fn_name, "`") - if (bt >= 0 && slice(fn_name, bt + 1) == query) { - return true - } - return false -} - -def match_function_generic(fn_name, query : string; program : smart_ptr) : bool { - var found = false - let this_mod = program.getThisModule - this_mod |> for_each_module_function($(fn) { - if (found) { - return - } - if (string(fn.name) != fn_name) { - return - } - if (fn.fromGeneric != null && match_function_name(string(fn.fromGeneric.name), query)) { - found = true - } - }) - return found -} +//! Thin popen wrapper. Real logic lives in subtools/aot.das so macro +//! state from compile_file doesn't leak across MCP calls. def do_aot(file, func_name : string; project : string = "") : string { - if (empty(file)) { - return make_tool_result("missing 'file' argument", true) - } - - // full file AOT when no function specified - if (empty(func_name)) { - return compile_and_simulate_ctx(file, project) $(program : smart_ptr; var ctx : smart_ptr; issues : string) { - var warnings = "" - if (!empty(issues)) { - warnings = "Warnings:\n{issues}\n" - } - var result : string - using() $(var cop : CodeOfPolicies) { - cop.aot = false - cop.aot_module = true - result = "{warnings}{run_aot(program.get_ptr(), ctx.get_ptr(), cop)}" - } - return result - } - } - - // single function AOT - return compile_and_simulate_ctx(file, project) $(program : smart_ptr; var ctx : smart_ptr; issues : string) { - let all_funcs <- collect_aot_functions(program) - - // try exact match on fn.name first, then on aot_name - var matches : array - for (i in range(length(all_funcs))) { - if (all_funcs[i].name == func_name) { - matches |> push(i) - } - } - // exact match on aot_name (mangled C++ name) - if (empty(matches)) { - for (i in range(length(all_funcs))) { - if (all_funcs[i].aot_name == func_name) { - matches |> push(i) - } - } - } - // fuzzy: method match and generic origin - if (empty(matches)) { - for (i in range(length(all_funcs))) { - if (match_function_name(all_funcs[i].name, func_name) || match_function_generic(all_funcs[i].name, func_name, program)) { - matches |> push(i) - } - } - } - - if (empty(matches)) { - var sigs : array - for (fi in all_funcs) { - sigs |> push(fi.signature) - } - return "No function matching '{func_name}' found.\nAvailable functions:\n" + join(sigs, "\n") - } - if (length(matches) == 1) { - let matched_name = all_funcs[matches[0]].aot_name - var result : string - using() $(var cop : CodeOfPolicies) { - cop.aot = false - cop.aot_module = true - result = run_aot_function(program.get_ptr(), ctx.get_ptr(), cop, matched_name) - } - if (empty(result)) { - return "Function '{matched_name}' produced no AOT output (marked noAot or no_aot)." - } - return result - } - // multiple matches — show signatures with aot_name for disambiguation - var lines : array - for (idx in matches) { - lines |> push("{all_funcs[idx].signature} [{all_funcs[idx].aot_name}]") - } - return "Multiple functions match '{func_name}':\n" + join(lines, "\n") + "\nUse the bracketed name for exact selection." - } + return run_mcp_subtool("aot", [file, func_name, project]) } diff --git a/utils/mcp/tools/common.das b/utils/mcp/tools/common.das index ee6d010c92..0f708f53c5 100644 --- a/utils/mcp/tools/common.das +++ b/utils/mcp/tools/common.das @@ -291,24 +291,71 @@ def get_daslang_exe() : string { return get_das_exe() } -// Run an external command, capture stdout+stderr, return exit_code. -// `args[0]` is the executable; remaining elements are positional arguments. -// timeout_sec > 0 kills the process tree after that many seconds (returns popen_timed_out). -// timeout_sec <= 0 means no timeout. +// ── MCP subtool runner ────────────────────────────────────────────── // -// Argv-based: `popen_argv` (Windows: CreateProcess; Unix: fork+execvp) bypasses -// the shell entirely. No cmd.exe quote-stripping, no /bin/sh `$()`/backtick -// expansion — every argv element reaches the child verbatim. Callers don't -// (and shouldn't) quote arguments themselves. -def run_and_capture(args : array; var output : string&; timeout_sec : float = 0.0) : int { - var captured : string - let exit_code = unsafe(popen_argv(args, timeout_sec, $(f) { - if (f != null) { - captured := unsafe(fread_to_eof(f)) +// Why subprocess: in-process compile_file / RTTI walks leak C++-side +// [function_macro] and [call_macro] registrations into the long-lived MCP +// daslang process. A second call against an edited .das file sees the OLD +// macros. Spawning a fresh daslang.exe per call resets that state entirely. +// +// Subtool contract: +// - Lives at utils/mcp/subtools/.das. +// - [export] def main reads positional argv via get_command_line_arguments(). +// - Prints the FINAL tool-result envelope (the full make_tool_result(...) JSON) +// to stdout. The wrapper passes it through verbatim — no extra wrapping. +// - Exits 0 on success AND on tool-level failure (e.g. compile errors are a +// successful tool run that reports isError=true). Exit non-0 ONLY on +// catastrophic failure (cannot read args, internal panic). +// Inside an MCP subtool's `[export] def main`, locate the caller-supplied +// args inside daslang's full argv. Conventional invocation: +// daslang.exe utils/mcp/subtools/.das -- ... +// Returns the args after `--`. Falls back to args[2..] (skip exe + script +// path) if no `--` is present, so subtools can also be invoked manually +// for debugging without the separator. +def subtool_user_args(args : array) : array { + var out : array + var i = 0 + while (i < length(args)) { + if (args[i] == "--") { + i++ + while (i < length(args)) { + out |> push(args[i]) + i++ + } + return <- out } - })) - output := captured - return exit_code + i++ + } + out |> reserve(max(0, length(args) - 2)) + for (j in range(2, length(args))) { + out |> push(args[j]) + } + return <- out +} + +def run_mcp_subtool(subtool_name : string; args : array; timeout_sec : float = 120.0) : string { + let exe = get_daslang_exe() + if (empty(exe)) { + return make_tool_result("Cannot determine daslang executable path", true) + } + let subtool_path = path_join(get_das_root(), "utils/mcp/subtools/{subtool_name}.das") + // `--` is critical: without it, daslang treats positional argv as extra + // .das files to load AND auto-runs each. With `--`, daslang stops parsing + // its own options and just exposes the rest via get_command_line_arguments(). + var argv <- [exe, subtool_path, "--"] + argv |> reserve(length(argv) + length(args)) + for (a in args) { + argv |> push(a) + } + var output : string + let exit_code = run_and_capture(argv, output, timeout_sec) + if (exit_code == popen_timed_out) { + return make_tool_result("MCP subtool '{subtool_name}' timed out after {timeout_sec}s:\n{output}", true) + } + if (exit_code != 0) { + return make_tool_result("MCP subtool '{subtool_name}' failed (exit {exit_code}):\n{output}", true) + } + return output } // ── Temp file utilities ────────────────────────────────────────────── diff --git a/utils/mcp/tools/compile_check.das b/utils/mcp/tools/compile_check.das index bc7c3df26d..948c67b65d 100644 --- a/utils/mcp/tools/compile_check.das +++ b/utils/mcp/tools/compile_check.das @@ -1,96 +1,15 @@ options gen2 -options rtti +options indenting = 4 options no_unused_function_arguments = false options no_unused_block_arguments = false require common public -require daslib/json_boost -struct CompileResult { - file : string - success : bool - errors : string - warnings : string -} - -def compile_check_single(file : string; project : string = "") : string { - return compile_and_simulate(file, project) $(program; issues) { - var warnings = "" - if (!empty(issues)) { - warnings = "Warnings:\n{issues}\n" - } - var funcs = build_string() $(var writer) { - write_function_list(writer, program) - } - return "{warnings}Compilation OK.\n{funcs}" - } -} - -def private compile_check_single_json(file : string; project : string = "") : CompileResult { - var result = CompileResult(file = file) - var inscope access <- make_file_access(project) - using() $(var mg : ModuleGroup) { - using() $(var cop : CodeOfPolicies) { - cop.threadlock_context = true - cop.ignore_shared_modules = true - compile_file(file, access, unsafe(addr(mg)), cop) $(ok; program; issues) { - if (!ok) { - result.errors = string(issues) - } else { - simulate(program) $(sok; ctx; serrors) { - if (!sok) { - result.errors = string(serrors) - } else { - result.success = true - if (!empty(issues)) { - result.warnings = string(issues) - } - } - } - } - } - } - } - return result -} +//! `compile_check` MCP wrapper. Forwards to subtools/compile_check.das so +//! every call gets a fresh daslang.exe — no stale [function_macro] / +//! [call_macro] state across calls. The subtool's stdout (already a +//! make_tool_result(...) JSON envelope) is returned verbatim. def do_compile_check(file : string; project : string = ""; json : bool = false) : string { - var files : array - parse_file_list(file, files) - if (empty(files)) { - if (json) { - return make_tool_result("[]", true) - } - return make_tool_result("no files matched: {file}", true) - } - if (!json) { - // Original text behavior - if (length(files) == 1) { - return compile_check_single(files[0], project) - } - var passed = 0 - var failed = 0 - var output = build_string() $(var w) { - for (f in files) { - let result = compile_check_single(f, project) - let is_error = find(result, "\"isError\":true") >= 0 - if (is_error) { - failed++ - write(w, "FAIL {f}\n") - } else { - passed++ - write(w, "PASS {f}\n") - } - } - write(w, "\n{passed + failed} files, {passed} passed, {failed} failed\n") - } - return make_tool_result(output, failed > 0) - } - // JSON mode - var results : array - results |> reserve(length(files)) - for (f in files) { - results |> emplace(compile_check_single_json(f, project)) - } - return make_tool_result(sprint_json(results, false)) + return run_mcp_subtool("compile_check", [file, project, json ? "true" : "false"]) } diff --git a/utils/mcp/tools/cpp_common.das b/utils/mcp/tools/cpp_common.das index 96c676b30d..c9c71c16b7 100644 --- a/utils/mcp/tools/cpp_common.das +++ b/utils/mcp/tools/cpp_common.das @@ -6,6 +6,7 @@ require common public require grep_usage public require outline public require ../cpp_search_config.das public +require ../../common/git_signature.das require daslib/json public require daslib/json_boost public require daslib/strings_boost public @@ -61,7 +62,7 @@ def cpp_rule_id_to_kind(rule_id : string) : string { } def is_cpp_ident_char(ch : int) : bool { - return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || (ch >= '0' && ch <= '9') || ch == '_' || ch == ':' || ch == '~' + return is_alpha(ch) || is_number(ch) || ch == '_' || ch == ':' || ch == '~' } def is_cpp_op_char(ch : int) : bool { @@ -127,7 +128,7 @@ def cpp_extract_name_pair(text : string; kind : string) : tuple if (kw_pos >= 0) { var rest = strip(slice(line, kw_pos + length(kw))) // skip optional API macros (any UPPER_SNAKE token before the name) - while (length(rest) > 0 && (first_character(rest) >= 'A' && first_character(rest) <= 'Z')) { + while (!empty(rest) && (first_character(rest) >= 'A' && first_character(rest) <= 'Z')) { // nolint:PERF014 let first_space = find(rest, " ") if (first_space < 0) { break @@ -137,7 +138,7 @@ def cpp_extract_name_pair(text : string; kind : string) : tuple var all_upper = true for (i in range(length(head))) { let c = character_at(head, i) // nolint:PERF002,PERF003 - if (!((c >= 'A' && c <= 'Z') || c == '_' || (c >= '0' && c <= '9'))) { + if (!((c >= 'A' && c <= 'Z') || c == '_' || is_number(c))) { // nolint:PERF014 all_upper = false break } @@ -175,7 +176,7 @@ def cpp_extract_name_pair(text : string; kind : string) : tuple } elif (starts_with(rest, "struct ")) { rest = strip(slice(rest, 7)) } - if (length(rest) > 0 && first_character(rest) != '{') { + if (!empty(rest) && first_character(rest) != '{') { var nend = length(rest) for (i in range(length(rest))) { let c = character_at(rest, i) // nolint:PERF002,PERF003 @@ -774,21 +775,6 @@ var cpp_index_entries : array // so users get a real reason instead of a silent "no matches". var cpp_index_error : string -// True if `path` (extracted from a `git status --porcelain` line) refers to a -// .cpp/.h/.hpp file inside one of CPP_SEARCH_DIRS. Anything else (.das edits, -// .cmake edits, tutorials/foo.cpp, etc.) is irrelevant to the index. -def private is_indexed_path(path : string) : bool { - if (!ends_with_any(path, CPP_SEARCH_INCLUDE_GLOBS)) { - return false - } - for (dir in CPP_SEARCH_DIRS) { - if (path == dir || starts_with(path, "{dir}/")) { - return true - } - } - return false -} - // `*.cpp` / `*.h` / `*.hpp` style patterns are simple suffixes (no embedded // glob meta-chars) — a substring compare is sufficient and avoids pulling // in a full glob engine just for extension checks. @@ -803,41 +789,11 @@ def private ends_with_any(path : string; suffix_globs : array) : bool { return false } -// Strip `git status --porcelain` 3-char status prefix. Handles the "rename" -// form `R old -> new` by returning the new name. -def private extract_status_path(line : string) : string { - if (length(line) <= 3) { - return "" - } - var path = slice(line, 3) - let arrow = find(path, " -> ") - if (arrow >= 0) { - path = slice(path, arrow + 4) - } - return path -} - -// stat() wrapper that returns 0 on missing/error so signature compute doesn't -// crash on a deleted file. -def private stat_mtime_or_zero(path : string) : int64 { - let st = stat(path) - return st.is_valid ? int64(st.mtime) : 0l -} - -// Filesystem-only staleness signature for the no-git fallback. Walks -// `search_dirs` recursively, hashes (repo-relative path, mtime) for every -// file matching `is_indexed_path`, and prunes any subtree that -// `cpp_collect_git_excludes` would skip (vendored repos, daspkg cache, etc.). -// -// This is the slow path — ~50–200ms for daslang's tree (~1500 files). Only -// invoked when `git rev-parse` / `git status` fails, so users with a normal -// git checkout pay nothing. -def cpp_compute_filesystem_signature(search_dirs : array) : string { - let das_root = get_das_root() +// `cpp_collect_git_excludes` returns ast-grep `--globs` patterns of the +// form `!**//**`; strip the wrapping to get bare repo-relative +// prefixes suitable for prefix-match pruning. +def private collect_prune_prefixes(search_dirs : array) : array { let raw_excludes <- cpp_collect_git_excludes(search_dirs, CPP_SEARCH_INCLUDE_OVERRIDES) - // `cpp_collect_git_excludes` returns ast-grep `--globs` patterns of the - // form `!**//**`; strip the wrapping to get bare repo-relative - // prefixes for prefix-match pruning. var pruned : array pruned |> reserve(length(raw_excludes)) for (p in raw_excludes) { @@ -845,76 +801,31 @@ def cpp_compute_filesystem_signature(search_dirs : array) : string { pruned |> push(slice(p, 4, length(p) - 3)) } } - return build_string() $(var w) { - for (sd in search_dirs) { - // `dir_rec` yields filenames RELATIVE to `sd`, not absolute. To - // match what `is_indexed_path` expects (full repo-relative path - // starting with one of CPP_SEARCH_DIRS) we prepend sd's repo-rel - // form, and to stat() we join filename onto sd. - let sd_rel = to_generic_path(make_relative_path(sd, das_root)) - dir_rec(sd) $(filename, is_dir) { - if (is_dir) { - return - } - let leaf = to_generic_path(filename) - let rel = empty(sd_rel) ? leaf : "{sd_rel}/{leaf}" - for (pr in pruned) { - if (starts_with(rel, "{pr}/")) { - return - } - } - if (!is_indexed_path(rel)) { - return - } - w |> write("{rel}:{stat_mtime_or_zero(path_join(sd, filename))}\n") - } - } - } + return <- pruned } -// Compute the staleness signature. Empty result on git failure → caller -// treats as "no-git" or as transient and retries. -// -// `git -C ` is used (not bare `git`) so the signature is computed -// against the daslang tree even when the MCP server was launched from a -// subdirectory or with `--dasroot` pointing elsewhere. Likewise, paths from -// `git status --porcelain` are repo-relative and stat()'d relative to -// `das_root`, not the process CWD. -def cpp_compute_index_signature() : string { - let das_root = get_das_root() - let cfg_path = path_join(das_root, CPP_SEARCH_CONFIG_PATH) - let cfg_sig = "cfg:{stat_mtime_or_zero(cfg_path)}" - var head_buf : string - if (run_and_capture(["git", "-C", das_root, "rev-parse", "HEAD"], head_buf, 5.0) != 0 - || empty(head_buf)) { - // No git checkout (or git unavailable). Walk the search dirs and fold - // a per-file mtime hash into the signature so source edits invalidate - // the cache even without git. ~50–200ms; only on this rare path. - let fs_sig = cpp_compute_filesystem_signature(cpp_default_search_dirs()) - return "no-git:{cfg_sig}:fs:{hash(fs_sig)}" - } - var status_buf : string - if (run_and_capture(["git", "-C", das_root, "status", "--porcelain", "--untracked-files=normal"], - status_buf, 5.0) != 0) { - let fs_sig = cpp_compute_filesystem_signature(cpp_default_search_dirs()) - return "no-git:{cfg_sig}:fs:{hash(fs_sig)}" - } - let lines <- split(status_buf, "\n") - let combined = build_string() $(var w) { - w |> write(head_buf) - for (line in lines) { - let path = extract_status_path(line) - if (!empty(path) && is_indexed_path(path)) { - w |> write("{line}:{stat_mtime_or_zero(path_join(das_root, path))}\n") - } +def private is_pruned(path : string; prune_dirs : array) : bool { + for (pr in prune_dirs) { + if (starts_with(path, "{pr}/")) { + return true } - w |> write("{cfg_sig}\n") } - return "{hash(combined)}" + return false } +// Compute the staleness signature for the cpp index. Wraps the shared +// git_signature.compute_signature with cpp's predicate (extension globs +// + vendored-tree pruning) and folds in the cpp_search_config.das mtime +// so config edits invalidate the cache too. def ensure_cpp_index() { - let sig = cpp_compute_index_signature() + let das_root = get_das_root() + let cfg_path = path_join(das_root, CPP_SEARCH_CONFIG_PATH) + let cfg_sig = "cfg:{stat_mtime_or_zero(cfg_path)}" + let prune_dirs <- collect_prune_prefixes(cpp_default_search_dirs()) + let core = compute_signature(das_root, + unsafe(@ capture(& prune_dirs) (p : string) => ends_with_any(p, CPP_SEARCH_INCLUDE_GLOBS) && !is_pruned(p, prune_dirs)), + cpp_default_search_dirs()) + let sig = empty(core) ? "" : "{cfg_sig}:{core}" if (!empty(sig) && sig == cpp_index_signature) { return // cache hit } @@ -978,6 +889,47 @@ def cpp_lookup_by_name(cppName : string; var match : CppMatch&) : bool { return false } +// Like cpp_lookup_by_name but scans `dirs` fresh (no global cache touched). +// Used by `with_cpp_source` callers that want a narrower scan than the +// default CPP_SEARCH_DIRS — typical for tests pointing at a specific subtree +// (e.g. "src/builtin") to avoid the multi-hundred-file index build cost. +// +// Single-symbol callers only — re-scans on every call. Multi-symbol callers +// (e.g. find_symbol) should call cpp_run_scan once and use +// cpp_lookup_by_name_in_entries below to avoid N rescans of the same dirs. +def cpp_lookup_by_name_scoped(cppName : string; dirs : array; var match : CppMatch&) : bool { + if (empty(cppName) || empty(dirs)) { + return false + } + var err : string + let entries <- cpp_run_scan(dirs, "", err) + if (!empty(err)) { + return false + } + return cpp_lookup_by_name_in_entries(cppName, entries, match) +} + +// Lookup by name against a pre-scanned entry list. Use when one tool call +// resolves N symbols against the same dirs — scan once, look up N times. +def cpp_lookup_by_name_in_entries(cppName : string; entries : array; var match : CppMatch&) : bool { + if (empty(cppName)) { + return false + } + let last_sep = rfind(cppName, "::") + let unq = last_sep >= 0 ? slice(cppName, last_sep + 2) : "" + for (i in range(length(entries))) { + let e & = unsafe(entries[i]) + if (empty(e.name)) { + continue + } + if (e.name == cppName || e.qualified == cppName || (!empty(unq) && e.name == unq)) { + match = entries[i] + return true + } + } + return false +} + // Return ALL CppMatch entries indexed under `name` (or its unqualified form). // Empty array if absent. Used by cpp_goto_definition. The returned array // holds copies (not references) so callers can sort freely. diff --git a/utils/mcp/tools/describe_type.das b/utils/mcp/tools/describe_type.das index 9c24a1fe2b..54ff1a65c5 100644 --- a/utils/mcp/tools/describe_type.das +++ b/utils/mcp/tools/describe_type.das @@ -4,250 +4,9 @@ options no_unused_block_arguments = false require common public -def private try_describe_type(name, module_name, preamble, project : string) : TryResult { - let stub_path = make_temp_das_file() - var stub = "options gen2\n" - if (!empty(module_name)) { - stub = "{stub}require {module_name}\n" - } - stub = "{stub}[export]\ndef main() \{\}\n" - var write_ok = false - fopen(stub_path, "w") $(f) { - if (f != null) { - fwrite(f, stub) - write_ok = true - } - } - if (!write_ok) { - remove(stub_path) - return TryResult(text = make_tool_result("Cannot write temp file: {stub_path}", true), ok = false, retryable = false) - } - var ok = false - let res = compile_and_simulate(stub_path, project) $(program; issues) { - ok = true - var result = "" - // search all modules for the type - program_for_each_module(program) $(mod) { - if (!empty(result)) { - return - } - // structs and classes - for_each_structure(mod) $(value) { - if (!empty(result) || value.name != name || - value.flags.isLambda || value.flags._generator || value.flags.generated) { - return - } - result = build_string() $(var w) { - write_struct_detail(w, value, mod) - } - } - // handled types - if (empty(result)) { - module_for_each_annotation(mod) $(value) { - if (!empty(result) || !value.isBasicStructureAnnotation || value.name != name) { - return - } - result = build_string() $(var w) { - write_handled_type_detail(w, value, mod) - } - } - } - // enumerations - if (empty(result)) { - for_each_enumeration(mod) $(value) { - if (!empty(result) || value.name != name) { - return - } - result = build_string() $(var w) { - write_enum_detail(w, value, mod) - } - } - } - // typedefs (bitfields, variants, tuples, aliases) - if (empty(result)) { - for_each_typedef(mod) $(tname, td) { - if (!empty(result) || tname != name) { - return - } - result = build_string() $(var w) { - write_typedef_detail(w, string(tname), td, mod) - } - } - } - } - if (empty(result)) { - return "{preamble}Type '{name}' not found" - } - return "{preamble}{result}" - } - remove(stub_path) - return TryResult(text = res, ok = ok, retryable = true) -} +//! Thin popen wrapper. Real logic lives in subtools/describe_type.das so +//! macro state from compile_file doesn't leak across MCP calls. def do_describe_type(name, module_name : string; project : string = "") : string { - if (empty(name)) { - return make_tool_result("missing 'name' argument", true) - } - let first = try_describe_type(name, module_name, "", project) - if (first.ok || !first.retryable || empty(module_name) || is_qualified_module_name(module_name)) { - return first.text - } - let resolved = "daslib/{module_name}" - let preamble = "[resolved '{module_name}' as '{resolved}']\n" - let second = try_describe_type(name, resolved, preamble, project) - if (second.ok) { - return second.text - } - return first.text -} - -def write_struct_detail(var w : StringBuilderWriter; value; mod : Module?) { - if (value.flags.isClass) { - write(w, "class {value.name}") - } else { - write(w, "struct {value.name}") - } - if (value.parent != null) { - write(w, " : {value.parent.name}") - } - write(w, " // module {mod.name}") - if (int(value.at.line) > 0) { - write(w, ", line {int(value.at.line)}") - } - write(w, "\n") - // fields - if (!empty(value.fields)) { - write(w, "Fields:\n") - for (fld in value.fields) { - write(w, " {fld.name} : {describe(fld._type)}") - if (fld.init != null) { - write(w, " // has default") - } - write(w, "\n") - } - } - // methods - var methods : array - for_each_function(mod, "") $(func) { - if (!func.flags.isClassMethod || empty(func.arguments)) { - return - } - let self_type = func.arguments[0]._type - if (self_type == null || self_type.structType == null || - self_type.structType.name != value.name || - (func.flags.generated && func.fromGeneric == null)) { - return - } - let entry = build_string() $(var ew) { - write(ew, " def {func.name}") - write_func_signature(ew, func) - } - methods |> push(entry) - } - if (!empty(methods)) { - write(w, "Methods:\n") - for (m in methods) { - write(w, "{m}\n") - } - } -} - -def write_handled_type_detail(var w : StringBuilderWriter; value : Annotation const; mod : Module?) { - write(w, "handled {value.name}") - var ann = unsafe(reinterpret value) - basic_struct_for_each_parent(*ann) $(parent) { - write(w, " : {parent.name}") - } - write(w, " // module {mod.name}") - if (!empty(value.cppName)) { - write(w, ", C++ {value.cppName}") - } - write(w, "\n") - var field_count = 0 - for_each_field(*ann) $(fname, cppName, xtype, offset) { - if (field_count == 0) { - write(w, "Fields:\n") - } - field_count++ - write(w, " {fname} : {describe(xtype)}") - if (!empty(cppName) && cppName != fname) { - write(w, " // C++ {cppName}") - } - write(w, "\n") - } - // methods on handled types — functions where first arg matches this type - var methods : array - for_each_function(mod, "") $(func) { - if (func.flags.privateFunction || func.flags._lambda || func.flags.generated || empty(func.arguments)) { - return - } - let first_type = func.arguments[0]._type - if (first_type == null) { - return - } - // check if first arg is this handled type (possibly as pointer) - if (first_type.baseType == Type.tHandle) { - if (first_type.annotation != null && first_type.annotation.name == value.name) { - let entry = build_string() $(var ew) { - write(ew, " def {func.name}") - write_func_signature(ew, func) - } - methods |> push(entry) - } - } - } - if (!empty(methods)) { - write(w, "Functions (first arg is {value.name}):\n") - for (m in methods) { - write(w, "{m}\n") - } - } -} - -def write_enum_detail(var w : StringBuilderWriter; value; mod : Module?) { - write(w, "enum {value.name} // module {mod.name}") - if (int(value.at.line) > 0) { - write(w, ", line {int(value.at.line)}") - } - write(w, "\n") - write(w, "Values:\n") - for (en in value.list) { - write(w, " {en.name}") - if (en.value != null) { - write(w, " = {describe(en.value)}") - } - write(w, "\n") - } -} - -def write_typedef_detail(var w : StringBuilderWriter; name : string; td; mod : Module?) { - let typ = td - if (typ.baseType == Type.tBitfield) { - write(w, "bitfield {name} // module {mod.name}\n") - write(w, "Flags:\n") - for (an in typ.argNames) { - write(w, " {an}\n") - } - } elif (typ.baseType == Type.tVariant) { - write(w, "variant {name} // module {mod.name}\n") - write(w, "Options:\n") - for (an, at in typ.argNames, typ.argTypes) { - write(w, " {an} : {describe(at)}\n") - } - } elif (typ.baseType == Type.tTuple) { - write(w, "tuple {name} // module {mod.name}\n") - write(w, "Fields:\n") - for (at, idx in typ.argTypes, count()) { - var fname = "_{idx}" - if (idx < length(typ.argNames)) { - let n = string(typ.argNames[idx]) - if (!empty(n)) { - fname = n - } - } - write(w, " {fname} : {describe(at)}\n") - } - } else { - write(w, "typedef {name} = {describe(td)} // module {mod.name}\n") - } + return run_mcp_subtool("describe_type", [name, module_name, project]) } diff --git a/utils/mcp/tools/find_symbol.das b/utils/mcp/tools/find_symbol.das index e7499765c9..32dab13022 100644 --- a/utils/mcp/tools/find_symbol.das +++ b/utils/mcp/tools/find_symbol.das @@ -3,320 +3,10 @@ options no_unused_function_arguments = false options no_unused_block_arguments = false require common public -require cpp_common public -require daslib/ast_boost -let MAX_FIND_RESULTS = 50 +//! Thin popen wrapper. Real logic lives in subtools/find_symbol.das so +//! macro state from compile_file doesn't leak across MCP calls. -def module_display_name(mname : string) : string { - return mname == "$" ? "builtin" : mname -} - -def write_location(var w : StringBuilderWriter; at) { - if (int(at.line) > 0) { - write(w, " // line {int(at.line)}") - } -} - -def write_cpp_name(var w : StringBuilderWriter; cppName : string) { - if (!empty(cppName)) { - write(w, " // C++ {cppName}") - } -} - -// When `with_cpp_source` is enabled and the entry has a non-empty cppName, look -// up the C++ source location via the lazily-built cpp index and append a -// " → cpp: path:line" line. On miss: " → cpp: (not located)". -def write_cpp_redirect(var w : StringBuilderWriter; cppName : string; with_cpp_source : bool) { - if (!with_cpp_source || empty(cppName)) { - return - } - var match : CppMatch - if (cpp_lookup_by_name(cppName, match)) { - write(w, "\n → cpp: {match.file}:{match.line}") - } else { - let why = cpp_index_status() - if (!empty(why)) { - write(w, "\n → cpp: {cppName} (index unavailable: {why})") - } else { - write(w, "\n → cpp: {cppName} (not located)") - } - } -} - -def get_func_cpp_name(func : Function?) : string { - if (func.flags.builtIn) { - let bfn = func as BuiltInFunction - if (bfn != null) { - return string(bfn.cppName) - } - } - return "" -} - -def struct_has_matching_field(value; query : string) : bool { - for (fld in value.fields) { - if (symbol_matches(string(fld.name), query)) { - return true - } - } - return false -} - -def handled_has_matching_field(value : Annotation const; query : string) : bool { - var found = false - var ann = unsafe(reinterpret value) - for_each_field(*ann) $(name, cppName, xtype, offset) { - if (symbol_matches(name, query)) { - found = true - } - } - return found -} - -def write_struct_fields_highlighted(var w : StringBuilderWriter; value; query : string; field_search : bool) { - for (fld in value.fields) { - if (field_search && symbol_matches(string(fld.name), query)) { - write(w, " > {fld.name} : {describe(fld._type)}\n") - } else { - write(w, " {fld.name} : {describe(fld._type)}\n") - } - } -} - -def write_handled_fields_highlighted(var w : StringBuilderWriter; value : Annotation const; query : string; field_search : bool) { - var ann = unsafe(reinterpret value) - for_each_field(*ann) $(name, cppName, xtype, offset) { - let prefix = field_search && symbol_matches(name, query) ? " > " : " " - if (!empty(cppName) && cppName != name) { - write(w, "{prefix}{name} : {describe(xtype)} // C++ {cppName}\n") - } else { - write(w, "{prefix}{name} : {describe(xtype)}\n") - } - } -} - -def search_module(var results : array; var total_found : int&; mname : string; mod : Module?; query : string; kind : string; with_cpp_source : bool) { - let dname = module_display_name(mname) - // functions - if (total_found < MAX_FIND_RESULTS && (empty(kind) || kind == "function")) { - for_each_function(mod, "") $(func) { - if (total_found >= MAX_FIND_RESULTS) { - return - } - if (func.flags.privateFunction || func.flags._lambda || func.flags.generated) { - return - } - let fname = string(func.name) - if (!symbol_matches(fname, query)) { - return - } - let cppName = get_func_cpp_name(func) - let entry = build_string() $(var w) { - write(w, " [{dname}] def {fname}") - write_func_signature(w, func) - write_cpp_name(w, cppName) - write_cpp_redirect(w, cppName, with_cpp_source) - } - results |> push(entry) - total_found++ - } - } - // generics - if (total_found < MAX_FIND_RESULTS && (empty(kind) || kind == "generic")) { - for_each_generic(mod) $(func) { - if (total_found >= MAX_FIND_RESULTS) { - return - } - if (func.flags.privateFunction || func.flags.generated) { - return - } - let fname = string(func.name) - if (fname |> starts_with("```") || !symbol_matches(fname, query)) { - return - } - let cppName = get_func_cpp_name(func) - let entry = build_string() $(var w) { - write(w, " [{dname}] def {fname}") - write_func_signature(w, func) - write_cpp_name(w, cppName) - write_cpp_redirect(w, cppName, with_cpp_source) - } - results |> push(entry) - total_found++ - } - } - // structs/classes (match by name or field name) - if (total_found < MAX_FIND_RESULTS && (empty(kind) || kind == "struct" || kind == "field")) { - for_each_structure(mod) $(value) { - if (total_found >= MAX_FIND_RESULTS) { - return - } - if (value.flags.isLambda || value.flags._generator || value.flags.generated || value.flags.privateStructure) { - return - } - let name_match = symbol_matches(string(value.name), query) - let field_match = struct_has_matching_field(value, query) - if (kind == "field" && !field_match) { - return - } - if (kind != "field" && !name_match && !field_match) { - return - } - let entry = build_string() $(var w) { - if (value.flags.isClass) { - write(w, " [{dname}] class {value.name}") - } else { - write(w, " [{dname}] struct {value.name}") - } - write_location(w, value.at) - write(w, "\n") - write_struct_fields_highlighted(w, value, query, field_match && !name_match) - } - results |> push(entry) - total_found++ - } - } - // handled types via annotations (match by name or field name) - if (total_found < MAX_FIND_RESULTS && (empty(kind) || kind == "handled" || kind == "struct" || kind == "field")) { - module_for_each_annotation(mod) $(value) { - if (total_found >= MAX_FIND_RESULTS) { - return - } - let is_basic = value.isBasicStructureAnnotation - let name_match = symbol_matches(string(value.name), query) - let field_match = is_basic && handled_has_matching_field(value, query) - if (kind == "field" && !field_match) { - return - } - if (kind != "field" && !name_match && !field_match) { - return - } - let cppName = string(value.cppName) - let entry = build_string() $(var w) { - write(w, " [{dname}] handled {value.name}") - write_cpp_name(w, cppName) - write_cpp_redirect(w, cppName, with_cpp_source) - write(w, "\n") - if (is_basic) { - write_handled_fields_highlighted(w, value, query, field_match && !name_match) - } - } - results |> push(entry) - total_found++ - } - } - // enums - if (total_found < MAX_FIND_RESULTS && (empty(kind) || kind == "enum")) { - for_each_enumeration(mod) $(value) { - if (total_found >= MAX_FIND_RESULTS) { - return - } - if (value.isPrivate) { - return - } - if (!symbol_matches(string(value.name), query)) { - return - } - let entry = build_string() $(var w) { - write(w, " [{dname}] enum {value.name}") - write_location(w, value.at) - write(w, "\n") - for (en in value.list) { - write(w, " {en.name}\n") - } - } - results |> push(entry) - total_found++ - } - } - // type aliases (typedef, bitfield, variant, tuple) - if (total_found < MAX_FIND_RESULTS && (empty(kind) || kind == "typedef" || kind == "alias")) { - for_each_typedef(mod) $(name : string#; var value : TypeDeclPtr) { - if (total_found >= MAX_FIND_RESULTS) { - return - } - let td = value - if (td.flags.isPrivateAlias) { - return - } - let sname = string(name) - if (!symbol_matches(sname, query)) { - return - } - let entry = build_string() $(var w) { - write(w, " [{dname}] typedef {sname} = {describe(value)}") - write_location(w, td.at) - } - results |> push(entry) - total_found++ - } - } - // globals - if (total_found < MAX_FIND_RESULTS && (empty(kind) || kind == "global")) { - for_each_global(mod) $(value) { - if (total_found >= MAX_FIND_RESULTS) { - return - } - if (value.flags.private_variable) { - return - } - if (!symbol_matches(string(value.name), query)) { - return - } - let entry = build_string() $(var w) { - write(w, " [{dname}] {value.name} : {describe(value._type)}") - write_location(w, value.at) - } - results |> push(entry) - total_found++ - } - } -} - -def do_find_symbol(query : string; kind : string = ""; file : string = ""; project : string = ""; with_cpp_source : bool = false) : string { - if (empty(query)) { - return make_tool_result("'query' must not be empty", true) - } - if (!empty(file)) { - // compile the file and search all its modules (including builtin $) - return compile_and_simulate(file, project) $(program; issues) { - var results : array - var total_found = 0 - program_for_each_module(program) $(mod) { - let mname = string(mod.name) - if (mname == "__main__" || empty(mname)) { - return - } - search_module(results, total_found, mname, mod, query, kind, with_cpp_source) - } - return format_find_results(results, total_found, query, kind) - } - } else { - // search all registered C++ modules (including builtin $) - var results : array - var total_found = 0 - program_for_each_registered_module() $(mod) { - let mname = string(mod.name) - if (mname == "__main__" || empty(mname)) { - return - } - search_module(results, total_found, mname, mod, query, kind, with_cpp_source) - } - return make_tool_result(format_find_results(results, total_found, query, kind)) - } -} - -def format_find_results(results : array; total_found : int; query : string; kind : string) : string { - if (empty(results)) { - return "No symbols found matching '{query}'" + (empty(kind) ? "" : " (kind={kind})") - } - let truncated = total_found >= MAX_FIND_RESULTS ? "\n(truncated at {MAX_FIND_RESULTS} results)" : "" - return build_string() $(var writer) { - write(writer, "Found {length(results)} symbol(s) matching '{query}':\n") - for (r in results) { - write(writer, "{r}\n") - } - write(writer, truncated) - } +def do_find_symbol(query : string; kind : string = ""; file : string = ""; project : string = ""; with_cpp_source : bool = false; cpp_dirs : string = "") : string { + return run_mcp_subtool("find_symbol", [query, kind, file, project, with_cpp_source ? "true" : "false", cpp_dirs]) } diff --git a/utils/mcp/tools/goto_definition.das b/utils/mcp/tools/goto_definition.das index 1189a68e93..e16cb557f2 100644 --- a/utils/mcp/tools/goto_definition.das +++ b/utils/mcp/tools/goto_definition.das @@ -38,7 +38,7 @@ def read_source_snippet(file_path : string; line : int; context_lines : int = 3) while (!feof(f)) { var ln = fgets(f) // strip trailing newline/cr - while (length(ln) > 0) { + while (!empty(ln)) { let last_ch = character_at(ln, length(ln) - 1) // nolint:PERF003 if (last_ch == '\n' || last_ch == '\r') { ln = slice(ln, 0, length(ln) - 1) @@ -49,7 +49,7 @@ def read_source_snippet(file_path : string; line : int; context_lines : int = 3) lines |> push(ln) } fclose(f) - if (length(lines) == 0) { + if (empty(lines)) { return "" } let first = max(0, line - 1 - context_lines) @@ -248,7 +248,7 @@ def resolve_definition(hit : CursorHit) : DefinitionResult { return DefinitionResult() } -def format_result(defn : DefinitionResult; hit : CursorHit; with_cpp_source : bool) : string { +def format_result(defn : DefinitionResult; hit : CursorHit; with_cpp_source : bool; cpp_dirs : array) : string { return build_string() $(var w) { write(w, "Symbol: {defn.name}\n") write(w, "Kind: {defn.kind}\n") @@ -261,15 +261,19 @@ def format_result(defn : DefinitionResult; hit : CursorHit; with_cpp_source : bo write(w, "C++: {defn.cppName}") if (with_cpp_source) { var match : CppMatch - if (cpp_lookup_by_name(defn.cppName, match)) { + let found = empty(cpp_dirs) ? cpp_lookup_by_name(defn.cppName, match) : cpp_lookup_by_name_scoped(defn.cppName, cpp_dirs, match) + if (found) { write(w, " ({match.file}:{match.line})") - } else { + } elif (empty(cpp_dirs)) { let why = cpp_index_status() if (!empty(why)) { write(w, " (index unavailable: {why})") } else { write(w, " (not located)") } + } else { + let dirs_csv = join(cpp_dirs, ",") + write(w, " (not located in {dirs_csv})") } } write(w, "\n") @@ -283,13 +287,22 @@ def format_result(defn : DefinitionResult; hit : CursorHit; with_cpp_source : bo } } -def public do_goto_definition(file : string; line_str, col_str : string; no_opt_str : string = ""; project : string = ""; with_cpp_source : bool = false) : string { +def public do_goto_definition(file : string; line_str, col_str : string; no_opt_str : string = ""; project : string = ""; with_cpp_source : bool = false; cpp_dirs : string = "") : string { let line = to_int(line_str) let col = to_int(col_str) if (line <= 0 || col <= 0) { return make_tool_result("Invalid line or column (must be >= 1)", true) } let no_opt = no_opt_str == "true" + var cpp_dirs_arr : array + if (!empty(cpp_dirs)) { + for (part in split(cpp_dirs, ",")) { + let trimmed = strip(part) + if (!empty(trimmed)) { + cpp_dirs_arr |> push(trimmed) + } + } + } return compile_program(file, true, no_opt, project) $(program; issues) { var hits <- find_at_cursor(program, file, line, col) if (empty(hits)) { @@ -299,7 +312,7 @@ def public do_goto_definition(file : string; line_str, col_str : string; no_opt_ for (hit in hits) { let defn = resolve_definition(hit) if (defn.found) { - return format_result(defn, hit, with_cpp_source) + return format_result(defn, hit, with_cpp_source, cpp_dirs_arr) } } // nothing resolved — show what we found at cursor diff --git a/utils/mcp/tools/lint_tool.das b/utils/mcp/tools/lint_tool.das index 56e025d7cb..3c8e484b6c 100644 --- a/utils/mcp/tools/lint_tool.das +++ b/utils/mcp/tools/lint_tool.das @@ -3,143 +3,10 @@ options no_unused_function_arguments = false options no_unused_block_arguments = false require common public -require daslib/lint -require daslib/perf_lint -require daslib/style_lint -require daslib/ast_boost -require strings -def has_expect_directive(file : string) : bool { - // Files with `expect :` declare intentional compile errors - // for dastest. Lint cannot evaluate them — the program never reaches - // the AST stage paranoid/perf/style passes need. Skip them. - let text = fread(file) - if (empty(text)) { - return false - } - let lines <- text |> split("\n") - for (line in lines) { - let trimmed = line |> strip - if (trimmed |> starts_with("expect ")) { - return true - } - } - return false -} - -def do_lint_single(file : string; project : string = "") : string { - if (has_expect_directive(file)) { - return "SKIP {file} (intentional compile errors via `expect`)" - } - return compile_only(file, true, true, project, true) $(program; issues) { - var compile_warnings = "" - if (!empty(issues)) { - compile_warnings = "Compilation warnings:\n{issues}\n" - } - var all_issues : array - var count = paranoid_collect(program, all_issues) - var perf_issues : array - count += perf_lint_collect(program, perf_issues) - for (w in perf_issues) { - all_issues |> push(w) - } - var style_issues : array - let hygiene = program._options |> find_arg("_comment_hygiene") ?as tBool ?? false - count += style_lint_collect(program, style_issues, false, hygiene) - for (w in style_issues) { - all_issues |> push(w) - } - if (count == 0) { - return "{compile_warnings}No lint issues." - } - return build_string() $(var w) { - if (!empty(compile_warnings)) { - w |> write(compile_warnings) - } - w |> write("{count} lint issue(s):\n") - write_deduped(w, all_issues) - } - } -} - -struct LintFileResult { - file : string - count : int - errors : array - failed : bool -} - -def do_lint_file(file : string; project : string = "") : LintFileResult { - var result = LintFileResult(file = file) - var inscope access <- make_file_access(project) - using() $(var mg : ModuleGroup) { - using() $(var cop : CodeOfPolicies) { - cop.threadlock_context = true - cop.ignore_shared_modules = true - cop.export_all = true - cop.lint_check = true - cop.no_optimizations = true - cop.no_infer_time_folding = true - compile_file(file, access, unsafe(addr(mg)), cop) $(ok; program; issues) { - if (!ok) { - result.failed = true - } else { - result.count = paranoid_collect(program, result.errors) - var perf_issues : array - result.count += perf_lint_collect(program, perf_issues) - for (w in perf_issues) { - result.errors |> push(w) - } - var style_issues : array - let hygiene = program._options |> find_arg("_comment_hygiene") ?as tBool ?? false - result.count += style_lint_collect(program, style_issues, false, hygiene) - for (w in style_issues) { - result.errors |> push(w) - } - } - } - } - } - return <- result -} +//! Thin popen wrapper. Real logic lives in subtools/lint_tool.das so +//! macro state from compile_file doesn't leak across MCP calls. def do_lint(file : string; project : string = "") : string { - var files : array - parse_file_list(file, files) - if (empty(files)) { - return make_tool_result("no files matched: {file}", true) - } - if (length(files) == 1) { - return do_lint_single(files[0], project) - } - // batch mode - var total_issues = 0 - var total_errors = 0 - var total_skipped = 0 - let output = build_string() $(var w) { - for (f in files) { - if (has_expect_directive(f)) { - total_skipped++ - w |> write("SKIP {f} (intentional compile errors via `expect`)\n") - continue - } - let result = do_lint_file(f, project) - if (result.failed) { - total_errors++ - w |> write("FAIL {f}\n") - } elif (result.count > 0) { - total_issues += result.count - w |> write("WARN {f} ({result.count})\n") - write_deduped(w, result.errors) - } else { - w |> write("PASS {f}\n") - } - } - if (total_skipped > 0) { - w |> write("\n{length(files)} files, {total_issues} issue(s), {total_errors} error(s), {total_skipped} skipped\n") - } else { - w |> write("\n{length(files)} files, {total_issues} issue(s), {total_errors} error(s)\n") - } - } - return make_tool_result(output, total_errors > 0) + return run_mcp_subtool("lint_tool", [file, project]) } diff --git a/utils/mcp/tools/list_functions.das b/utils/mcp/tools/list_functions.das index 08b01ac6f6..a36e93c515 100644 --- a/utils/mcp/tools/list_functions.das +++ b/utils/mcp/tools/list_functions.das @@ -4,11 +4,9 @@ options no_unused_block_arguments = false require common public +//! Thin popen wrapper. Real logic lives in subtools/list_functions.das so +//! macro state from compile_file doesn't leak across MCP calls. + def do_list_functions(file : string; project : string = "") : string { - return compile_and_simulate(file, project) $(program; issues) { - var res = build_string() $(var writer) { - write_function_list(writer, program) - } - return empty(res) ? "(no functions found)" : res - } + return run_mcp_subtool("list_functions", [file, project]) } diff --git a/utils/mcp/tools/list_module_api.das b/utils/mcp/tools/list_module_api.das index cb51a96598..ad7d5c0cc1 100644 --- a/utils/mcp/tools/list_module_api.das +++ b/utils/mcp/tools/list_module_api.das @@ -4,406 +4,9 @@ options no_unused_block_arguments = false require common public -def private try_list_module_api(name : string; preamble : string; filter : string; section : string; compact : bool; project : string) : TryResult { - let stub_path = make_temp_das_file() - let stub = "require {name}\n[export]\ndef main\n pass\n" - var write_ok = false - fopen(stub_path, "w") $(f) { - if (f != null) { - fwrite(f, stub) - write_ok = true - } - } - if (!write_ok) { - remove(stub_path) - return TryResult(text = make_tool_result("Cannot write temp file: {stub_path}", true), ok = false, retryable = false) - } - var ok = false - let result = compile_and_simulate(stub_path, project) $(program; issues) { - var target_mod : Module? - program_for_each_module(program) $(mod) { - let mname = string(mod.name) - if (mname == name || "{name}" |> ends_with("/{mname}")) { - target_mod = mod - } - } - if (target_mod == null) { - return "Module '{name}' not found after compilation" - } - let res = build_string() $(var writer) { - if (!empty(preamble)) { - write(writer, preamble) - } - write_module_api(writer, target_mod, filter, section, compact) - } - ok = true - return empty(res) ? "(empty module)" : res - } - remove(stub_path) - return TryResult(text = result, ok = ok, retryable = true) -} +//! Thin popen wrapper. Real logic lives in subtools/list_module_api.das so +//! macro state from compile_file doesn't leak across MCP calls. def do_list_module_api(module_name : string; filter : string = ""; section : string = ""; compact_str : string = ""; project : string = "") : string { - let compact = compact_str == "true" - let first = try_list_module_api(module_name, "", filter, section, compact, project) - if (first.ok || !first.retryable || is_qualified_module_name(module_name)) { - return first.text - } - let resolved = "daslib/{module_name}" - let preamble = "[resolved '{module_name}' as '{resolved}']\n" - let second = try_list_module_api(resolved, preamble, filter, section, compact, project) - if (second.ok) { - return second.text - } - return first.text -} - -def is_operator_function(name : string) : bool { - let ch = first_character(name) - return !is_alpha(ch) && !is_number(ch) && ch != '_' -} - -def name_matches_filter(name : string; filter : string) : bool { - return empty(filter) || symbol_matches(name, filter) -} - -def want_section(section : string; name : string) : bool { - return empty(section) || section == name -} - -def write_compact_func_signature(var w : StringBuilderWriter; func) { - var has_args = false - for (i in range(length(func.arguments))) { - if (!arg_needs_documenting(func.arguments[i]._type)) { - continue - } - if (!has_args) { - write(w, "(") - has_args = true - } else { - write(w, "; ") - } - write(w, "{describe(func.arguments[i]._type)}") - } - if (has_args) { - write(w, ")") - } - if (func.result != null && !func.result.isVoid) { - write(w, " : {describe(func.result)}") - } -} - -def write_module_api(var writer : StringBuilderWriter; mod : Module?; filter : string = ""; section : string = ""; compact : bool = false) { - write(writer, "Module: {mod.name}\n") - var funcs : array - var generics : array - var operators : array - var structs : array - var handled : array - var enums : array - var globals : array - var func_annotations : array - var struct_annotations : array - var call_macros : array - var reader_macros : array - var variant_macros : array - var typeinfo_macros : array - var for_loop_macros : array - var type_macros : array - // functions via AST - if (want_section(section, "functions") || want_section(section, "operators")) { - for_each_function(mod, "") $(func) { - if (func.flags.privateFunction || func.flags._lambda || func.flags.generated) { - return - } - let fname = string(func.name) - if (!name_matches_filter(fname, filter)) { - return - } - let entry = build_string() $(var w) { - write(w, " def {func.name}") - if (compact) { - write_compact_func_signature(w, func) - } else { - write_func_signature(w, func) - } - } - if (is_operator_function(fname) && want_section(section, "operators")) { - operators |> push(entry) - } elif (!is_operator_function(fname) && want_section(section, "functions")) { - funcs |> push(entry) - } - } - } - // generics via AST - if (want_section(section, "generics") || want_section(section, "operators")) { - for_each_generic(mod) $(func) { - if (func.flags.privateFunction || func.flags.generated || string(func.name) |> starts_with("```")) { // nolint:PERF012 - return - } - let fname = string(func.name) - if (!name_matches_filter(fname, filter)) { - return - } - let entry = build_string() $(var w) { - write(w, " def {func.name}") - if (compact) { - write_compact_func_signature(w, func) - } else { - write_func_signature(w, func) - } - } - if (is_operator_function(fname) && want_section(section, "operators")) { - operators |> push(entry) - } elif (!is_operator_function(fname) && want_section(section, "generics")) { - generics |> push(entry) - } - } - } - // structures via AST - if (want_section(section, "structs")) { - for_each_structure(mod) $(value) { - if (value.flags.isLambda || value.flags._generator || value.flags.generated || value.flags.privateStructure || - !name_matches_filter(string(value.name), filter)) { - return - } - let entry = build_string() $(var w) { - if (value.flags.isClass) { - write(w, " class {value.name}") - } else { - write(w, " struct {value.name}") - } - if (value.parent != null) { - write(w, " : {value.parent.name}") - } - if (!compact && int(value.at.line) > 0) { - write(w, " // line {int(value.at.line)}") - } - write(w, "\n") - if (!compact) { - for (fld in value.fields) { - write(w, " {fld.name} : {describe(fld._type)}\n") - } - } - } - structs |> push(entry) - } - } - // handled types via annotations - if (want_section(section, "handled")) { - module_for_each_annotation(mod) $(value) { - if (!value.isBasicStructureAnnotation || !name_matches_filter(string(value.name), filter)) { - return - } - let entry = build_string() $(var w) { - if (compact) { - write(w, " handled {value.name}\n") - } else { - write_handled_type(w, value) - } - } - handled |> push(entry) - } - } - // enumerations via AST - if (want_section(section, "enums")) { - for_each_enumeration(mod) $(value) { - if (value.isPrivate || !name_matches_filter(string(value.name), filter)) { - return - } - let entry = build_string() $(var w) { - write(w, " enum {value.name}") - if (!compact && int(value.at.line) > 0) { - write(w, " // line {int(value.at.line)}") - } - write(w, "\n") - if (!compact) { - for (en in value.list) { - write(w, " {en.name}\n") - } - } - } - enums |> push(entry) - } - } - // annotations (function annotations, structure macros) - if (want_section(section, "annotations")) { - module_for_each_annotation(mod) $(value) { - if (value.isBasicStructureAnnotation || value.isEnumerationAnnotation) { - return // already in handled/enums sections - } - let aname = string(value.name) - if (!name_matches_filter(aname, filter)) { - return - } - if (value.isFunctionAnnotation) { - func_annotations |> push(" [{aname}]\n") - } elif (!value.isTypeAnnotation) { - struct_annotations |> push(" [{aname}]\n") - } - } - } - // call macros - if (want_section(section, "annotations")) { - for_each_call_macro(mod) $(value) { - let mname = string(value) - if (!name_matches_filter(mname, filter)) { - return - } - call_macros |> push(" {mname}\n") - } - } - // reader macros - if (want_section(section, "annotations")) { - for_each_reader_macro(mod) $(value) { - let mname = string(value) - if (!name_matches_filter(mname, filter)) { - return - } - reader_macros |> push(" {mname}\n") - } - } - // variant macros - if (want_section(section, "annotations")) { - for_each_variant_macro(mod) $(value) { - let mname = string(value.name) - if (!name_matches_filter(mname, filter)) { - return - } - variant_macros |> push(" {mname}\n") - } - } - // typeinfo macros - if (want_section(section, "annotations")) { - for_each_typeinfo_macro(mod) $(value) { - let mname = string(value.name) - if (!name_matches_filter(mname, filter)) { - return - } - typeinfo_macros |> push(" {mname}\n") - } - } - // for loop macros - if (want_section(section, "annotations")) { - for_each_for_loop_macro(mod) $(value) { - let mname = string(value.name) - if (!name_matches_filter(mname, filter)) { - return - } - for_loop_macros |> push(" {mname}\n") - } - } - // type macros - if (want_section(section, "annotations")) { - for_each_typemacro(mod) $(value) { - let mname = string(value.name) - if (!name_matches_filter(mname, filter)) { - return - } - type_macros |> push(" {mname}\n") - } - } - // globals via AST - if (want_section(section, "globals")) { - for_each_global(mod) $(value) { - if (value.flags.private_variable || !name_matches_filter(string(value.name), filter)) { - return - } - var line = " {value.name} : {describe(value._type)}" - if (int(value.at.line) > 0) { - line = "{line} // line {int(value.at.line)}" - } - globals |> push("{line}\n") - } - } - if (!empty(funcs)) { - write(writer, "Functions ({length(funcs)}):\n") - for (f in funcs) { - write(writer, "{f}\n") - } - } - if (!empty(generics)) { - write(writer, "Generics ({length(generics)}):\n") - for (g in generics) { - write(writer, "{g}\n") - } - } - if (!empty(operators)) { - write(writer, "Operators ({length(operators)}):\n") - for (o in operators) { - write(writer, "{o}\n") - } - } - if (!empty(structs)) { - write(writer, "Structs/Classes:\n") - for (s in structs) { - write(writer, s) - } - } - if (!empty(handled)) { - write(writer, "Handled types ({length(handled)}):\n") - for (h in handled) { - write(writer, h) - } - } - if (!empty(enums)) { - write(writer, "Enumerations:\n") - for (e in enums) { - write(writer, e) - } - } - if (!empty(globals)) { - write(writer, "Globals:\n") - for (g in globals) { - write(writer, g) - } - } - if (!empty(func_annotations)) { - write(writer, "Function annotations ({length(func_annotations)}):\n") - for (a in func_annotations) { - write(writer, a) - } - } - if (!empty(struct_annotations)) { - write(writer, "Structure annotations ({length(struct_annotations)}):\n") - for (a in struct_annotations) { - write(writer, a) - } - } - if (!empty(call_macros)) { - write(writer, "Call macros ({length(call_macros)}):\n") - for (m in call_macros) { - write(writer, m) - } - } - if (!empty(reader_macros)) { - write(writer, "Reader macros ({length(reader_macros)}):\n") - for (m in reader_macros) { - write(writer, m) - } - } - if (!empty(variant_macros)) { - write(writer, "Variant macros ({length(variant_macros)}):\n") - for (m in variant_macros) { - write(writer, m) - } - } - if (!empty(typeinfo_macros)) { - write(writer, "Typeinfo macros ({length(typeinfo_macros)}):\n") - for (m in typeinfo_macros) { - write(writer, m) - } - } - if (!empty(for_loop_macros)) { - write(writer, "For-loop macros ({length(for_loop_macros)}):\n") - for (m in for_loop_macros) { - write(writer, m) - } - } - if (!empty(type_macros)) { - write(writer, "Type macros ({length(type_macros)}):\n") - for (m in type_macros) { - write(writer, m) - } - } + return run_mcp_subtool("list_module_api", [module_name, filter, section, compact_str, project]) } diff --git a/utils/mcp/tools/list_types.das b/utils/mcp/tools/list_types.das index 4799281a3f..114c46fde6 100644 --- a/utils/mcp/tools/list_types.das +++ b/utils/mcp/tools/list_types.das @@ -4,106 +4,9 @@ options no_unused_block_arguments = false require common public -def do_list_types(file : string; project : string = "") : string { - return compile_and_simulate(file, project) $(program; issues) { - var res = build_string() $(var writer) { - write_type_list(writer, program) - } - return empty(res) ? "(no types found)" : res - } -} +//! Thin popen wrapper. Real logic lives in subtools/list_types.das so +//! macro state from compile_file doesn't leak across MCP calls. -def write_type_list(var writer : StringBuilderWriter; program : smart_ptr) { - let thisMod = get_this_module(program) - var structs : array - var classes : array - var enums : array - var typedefs : array - for_each_structure(thisMod) $(value) { - if (value.flags.privateStructure || value.flags.isLambda || value.flags.generated || value.flags._generator) { - return - } - var entry = build_string() $(var w) { - if (value.flags.isClass) { - write(w, " class {value.name}") - if (value.parent != null) { - write(w, " : {value.parent.name}") - } - write(w, " // line {int(value.at.line)}\n") - for (fld in value.fields) { - if (fld.flags.classMethod || fld.flags.generated || !fld.flags.implemented || fld.flags.parentType || string(fld.name) == "__rtti") { - continue - } - write(w, " {fld.name} : {describe(fld._type)}") - if (fld.init != null) { - write(w, " = {describe(fld.init)}") - } - write(w, "\n") - } - } else { - write(w, " struct {value.name}") - if (value.parent != null) { - write(w, " : {value.parent.name}") - } - write(w, " // line {int(value.at.line)}\n") - for (fld in value.fields) { - if (fld.flags.generated || !fld.flags.implemented || fld.flags.parentType || string(fld.name) == "__rtti") { - continue - } - write(w, " {fld.name} : {describe(fld._type)}") - if (fld.init != null) { - write(w, " = {describe(fld.init)}") - } - write(w, "\n") - } - } - } - if (value.flags.isClass) { - classes |> push(entry) - } else { - structs |> push(entry) - } - } - for_each_enumeration(thisMod) $(value) { - if (value.isPrivate) { - return - } - var entry = build_string() $(var w) { - write(w, " enum {value.name} // line {int(value.at.line)}\n") - for (en in value.list) { - write(w, " {en.name} = {describe(en.value)}\n") - } - } - enums |> push(entry) - } - for_each_typedef(thisMod) $(name, value) { - if (value.flags.isPrivateAlias) { - return - } - typedefs |> push(" {name} = {describe(value)} // line {int(value.at.line)}\n") - } - if (!empty(structs)) { - write(writer, "Structs:\n") - for (s in structs) { - write(writer, s) - } - } - if (!empty(classes)) { - write(writer, "Classes:\n") - for (c in classes) { - write(writer, c) - } - } - if (!empty(enums)) { - write(writer, "Enumerations:\n") - for (e in enums) { - write(writer, e) - } - } - if (!empty(typedefs)) { - write(writer, "Type aliases:\n") - for (t in typedefs) { - write(writer, t) - } - } +def do_list_types(file : string; project : string = "") : string { + return run_mcp_subtool("list_types", [file, project]) } diff --git a/utils/mouse/OVERVIEW.md b/utils/mouse/OVERVIEW.md index a7b7be64fc..7b1e5c87d0 100644 --- a/utils/mouse/OVERVIEW.md +++ b/utils/mouse/OVERVIEW.md @@ -55,27 +55,30 @@ Frontmatter fields: `slug` (stable ID, used for cross-refs), `title` (1-line des | Operation | CLI | MCP tool | Notes | |---|---|---|---| -| Retrieve | `mouse ask ""` | `mouse__ask` | Top-K BM25 ranked. Words OR-joined. | -| Add Q&A | `mouse add "" --body "..."` | `mouse__add` | Dupe-gated by default; pass `--force` / `force=true` to override. | +| Retrieve | `mouse ask ""` | `mouse__ask` | Top-K BM25 ranked, each annotated with a Jaccard title-similarity. Words OR-joined; `--raw-query` / `rawQuery=true` passes raw FTS5 syntax (phrases, NEAR, explicit AND/OR). | +| Add Q&A | `mouse add "" --body "..."` | `mouse__add` | Advisory similar list always; hard-blocks only on Jaccard ≥ 0.7. `--force` / `force=true` overrides the block. | | Get doc | `mouse get ` | `mouse__get` | Body + frontmatter + reverse-link footer. | -| Rebuild | `mouse rebuild` | `mouse__rebuild` | Rescans `/docs/`; idempotent. | +| Rebuild | `mouse rebuild` | `mouse__rebuild` | Force full rescan + signature reset. Normally not needed — every entry point auto-reindexes via the git-staleness check. | | Serve MCP | `mouse serve` | (this _is_ the server) | stdio JSON-RPC. | -`add`'s **dupe-on-add gate** is the corpus-hygiene mechanism. With `force=false` (default), `add` first runs retrieval on the new question and returns the similar docs without writing if any match. The agent decides: extend an existing doc (edit the `.md`) or create a new one (re-call with `force=true`). +**Dupe-on-add gate.** `add` always runs a Jaccard-scored similarity check against the corpus and surfaces the top matches (whether it created or not). With `force=false` (default), it hard-blocks only when the top match scores ≥ 0.7 — a near-paraphrase. Below that threshold, the add proceeds and the similar list is shown for awareness. The caller (LLM or human) is the actual decider; the threshold just stops obvious near-paraphrases from sneaking in. Below 0.5 nothing is surfaced unless content overlap is genuine. ## Storage model The `.md` files under `/docs/` are the **source of truth**. The SQLite index at `/index.db` is rebuildable — `mouse rebuild` repopulates it from disk. Implications: -- The corpus is `git`-friendly. Check it in if you want a shared corpus; `git pull` followed by `mouse rebuild` syncs. -- Hand-edits work. `Edit` an answer, run `mouse rebuild`, the index reflects the change. +- The corpus is `git`-friendly. Check it in if you want a shared corpus; `git pull` and the next `mouse__ask` (or any other entry point) auto-reindexes — no manual `mouse rebuild` needed. +- Hand-edits work. `Edit` an answer, run any mouse command, the index reflects the change. - The DB is disposable. Lose it, regenerate it. +**Auto-reindex.** Every entry point computes a cheap staleness signature, delegated to the shared `utils/common/git_signature` module: per-tree `git rev-parse HEAD:` (the docs subtree's hash at HEAD) + filtered `git status --porcelain` over `/docs/*.md` + per-changed-file mtimes, hashed. **Per-tree HEAD** matters: a `git pull` (or branch switch) that doesn't touch `/docs/` leaves the docs tree hash unchanged, so the index doesn't rebuild — a normal monorepo workflow no longer churns the cache. The signature is persisted in an `index_meta` table; on mismatch, the index rebuilds and the new signature replaces the old. If `` isn't inside a git checkout (or git is unavailable), the fallback is a recursive filesystem walk that collects `(path, mtime)` for every `.md`, **sorts by path**, then hashes — sort makes the signature deterministic across platforms (Windows `_findfirst` and Unix `readdir` don't guarantee stable order). The same module backs the daslang MCP server's cpp source-search staleness tracking. + The SQLite schema (managed via `[sql_migration]` from `sqlite/sqlite_migrate`): - `docs` — slug PK, path, title, created, last_verified, body_hash. - `links` — composite-PK pair `(from_slug, to_slug)` for cross-refs. - `search_idx` — FTS5 virtual table; per-doc concatenation of title + question aliases + body. BM25 ranks via the `@sql_fts_rank` column. +- `index_meta` — `(key, value)` k/v table. Currently stores the staleness signature; future-proof for other persistent metadata. Rebuild is whole-corpus delete+repopulate — simple, correct, fast for small corpora. Incremental update (re-index only changed `body_hash`) is a vNext optimization once the corpus is large enough that whole-rebuild matters. diff --git a/utils/mouse/README.md b/utils/mouse/README.md index 95381272bf..6f27b36a86 100644 --- a/utils/mouse/README.md +++ b/utils/mouse/README.md @@ -5,15 +5,19 @@ Personal Q&A cache MCP server. `.md` answers backed by SQLite/FTS5 retrieval. Lo ## Quick start ```bash -# Rebuild the index from /docs/ (defaults to ./mouse-data, override via --root or $MOUSE_ROOT) -daslang utils/mouse/main.das -- rebuild - -# Search +# Search (defaults to ./mouse-data, override via --root or $MOUSE_ROOT; +# every entry point auto-reindexes via git-staleness — no manual rebuild needed) daslang utils/mouse/main.das -- ask "how do I X" -# Add a Q&A (dupe-gated by default) +# Search with raw FTS5 syntax (phrases, NEAR, explicit AND/OR) +daslang utils/mouse/main.das -- ask '"foo bar" OR baz' --raw-query + +# Add a Q&A. Surfaces top similar docs; hard-blocks only on Jaccard ≥ 0.7. daslang utils/mouse/main.das -- add "how do I X" --body "answer body" +# Force a full rescan + signature reset (rarely needed) +daslang utils/mouse/main.das -- rebuild + # Run as MCP stdio server daslang utils/mouse/main.das -- serve ``` @@ -43,7 +47,7 @@ mouse-data/ index.db -- SQLite, rebuildable from docs/ ``` -`.md` files are checked-in-friendly. `git pull` + `mouse rebuild` re-syncs the index. +`.md` files are checked-in-friendly. `git pull` and the next mouse command auto-reindex via the git-staleness signature (HEAD + porcelain status over `/docs/*.md` + per-changed-file mtimes). No manual `rebuild` needed. ## Development diff --git a/utils/mouse/index.das b/utils/mouse/index.das index 7942b4777a..c57f1c0693 100644 --- a/utils/mouse/index.das +++ b/utils/mouse/index.das @@ -9,9 +9,11 @@ require sqlite/sqlite_boost public require sqlite/sqlite_linq public require sqlite/sqlite_migrate public require daslib/fio public +require daslib/strings_boost public require strings public require math require store public +require ../common/git_signature.das // ─── on-disk layout ────────────────────────────────────────────────── @@ -107,6 +109,20 @@ def migration_002(db : SqlRunner) { db |> create_table(type) } +// Persisted across cold opens: the staleness signature used by +// ensure_index_fresh. Without persistence every CLI invocation would see an +// empty in-memory signature and rebuild from scratch. +[sql_table(name = "index_meta")] +struct IndexMeta { + @sql_primary_key key : string + value : string +} + +[sql_migration(version = 3, description = "add index_meta for staleness signature")] +def migration_003(db : SqlRunner) { + db |> create_table(type) +} + def with_index_db(root : string; blk : block<(db : SqlRunner) : void>) { ensure_root(root) with_latest_sqlite(db_path(root)) $(db) { @@ -127,7 +143,8 @@ struct SearchHit { struct DupeMatch { slug : string title : string - rank : float + rank : float // FTS5 BM25; kept for power users / debugging + similarity : float // Jaccard, 0..1; primary ranking key for dedup decisions } struct AddOutcome { @@ -210,6 +227,60 @@ def rebuild(db : SqlRunner; root : string) : int { return n } +// ─── staleness signature & auto-reindex ────────────────────────────── + +// Persisted (via index_meta) so the cost only pays out on real changes. +let SIGNATURE_KEY = "signature" + +def get_index_signature(db : SqlRunner) : string { + let opt <- _sql(db |> select_from(type) + |> _where(_.key == SIGNATURE_KEY) + |> _first_opt()) + if (opt |> is_some) { + return (opt |> unwrap).value + } + return "" +} + +def set_index_signature(db : SqlRunner; sig : string) { + db |> _sql_upsert( + IndexMeta(key = SIGNATURE_KEY, value = sig), + _.key, + (value = _excluded.value)) +} + +// The canonical staleness signature for a docs root — `compute_signature` +// over `*.md` under `docs/`. Use this everywhere we stamp a fresh signature +// so the input shape stays in lockstep with `ensure_index_fresh`'s read. +def public compute_docs_signature(root : string) : string { + return compute_signature(root, @(p : string) => ends_with(p, ".md"), [docs_dir(root)]) +} + +// Reconcile the SQLite index against on-disk docs. If the staleness signature +// matches what's stored, this is a no-op. On mismatch, rebuilds and stores +// the new signature. Returns the number of docs (re)indexed (0 on cache hit). +// +// Signature compute is delegated to utils/common/git_signature: the predicate +// only narrows by file extension because the shared module pre-narrows status +// output and filesystem walks to paths under `search_dirs_abs = [docs_dir]`. +def ensure_index_fresh(db : SqlRunner; root : string) : int { + let docs_abs = docs_dir(root) + if (!is_directory_path(docs_abs)) { + return 0 + } + let sig = compute_docs_signature(root) + if (empty(sig)) { + return 0 + } + let stored = get_index_signature(db) + if (sig == stored) { + return 0 + } + let n = rebuild(db, root) + set_index_signature(db, sig) + return n +} + // ─── search ────────────────────────────────────────────────────────── // Strip non-alphanumeric chars (except whitespace and `*`) so a free-form @@ -237,39 +308,48 @@ def sanitize_fts5_query(q : string) : string { // scan the whole index. 50 is well above any sane top-k for this corpus. let MAX_SEARCH_K = 50 -def search(db : SqlRunner; query : string; k : int) : array { +def search(db : SqlRunner; query : string; k : int; raw_query : bool = false) : array { var out : array if (k <= 0) { return <- out } let kk = min(k, MAX_SEARCH_K) - // to_lower: FTS5 keywords (OR/AND/NOT/NEAR) are uppercase-only operators. - // A user query like "foo OR bar" would tokenize to [foo, OR, bar] and - // OR-join to `foo OR OR OR bar` — invalid FTS syntax → empty result. - // Downcasing every token makes user-typed keywords inert. - let cleaned = to_lower(strip(sanitize_fts5_query(query))) - if (empty(cleaned)) { - return <- out - } - // Free-form queries → OR-joined so BM25 ranks docs by how many of the - // user's words appear, rather than failing when one word is missing. - // FTS5 default is whitespace-AND, which is wrong for a "find related" - // search. Users who want strict AND/phrase have to wait for - // vNext --raw-query. - let words <- split(cleaned, " ") - var meaningful : array - meaningful |> reserve(length(words)) - for (w in words) { - if (length(w) >= 2) { - meaningful |> push(w) + var fts_query : string + if (raw_query) { + // Power-user path: pass the user's string straight to FTS5. Bad + // syntax surfaces via _try_sql + LOG_WARNING below. + fts_query = strip(query) + if (empty(fts_query)) { + return <- out } + } else { + // to_lower: FTS5 keywords (OR/AND/NOT/NEAR) are uppercase-only operators. + // A user query like "foo OR bar" would tokenize to [foo, OR, bar] and + // OR-join to `foo OR OR OR bar` — invalid FTS syntax → empty result. + // Downcasing every token makes user-typed keywords inert. + let cleaned = to_lower(strip(sanitize_fts5_query(query))) + if (empty(cleaned)) { + return <- out + } + // Free-form queries → OR-joined so BM25 ranks docs by how many of the + // user's words appear, rather than failing when one word is missing. + // FTS5 default is whitespace-AND, which is wrong for a "find related" + // search. Users who want strict AND/phrase pass raw_query=true. + let words <- split(cleaned, " ") + var meaningful : array + meaningful |> reserve(length(words)) + for (w in words) { + if (length(w) >= 2) { + meaningful |> push(w) + } + } + if (empty(meaningful)) { + return <- out + } + fts_query = meaningful |> join(" OR ") } - if (empty(meaningful)) { - return <- out - } - let or_query = meaningful |> join(" OR ") let raw_hits <- _try_sql(db |> select_from(type) - |> _where(_.Text |> text_match(or_query)) + |> _where(_.Text |> text_match(fts_query)) |> _order_by(_.Rank) |> take(kk)) if (raw_hits |> is_err) { @@ -294,13 +374,63 @@ def search(db : SqlRunner; query : string; k : int) : array { return <- out } +// ─── dedup scoring (Jaccard over titles, stop-words filtered) ──────── + +// add_doc treats a hit at or above this as "looks like a duplicate" and +// blocks the create unless force=true. Below this, hits are advisory only — +// shown to the caller, but the add proceeds. The caller (LLM or human) is +// the actual decider; the threshold just stops obvious near-paraphrases +// from sneaking in. +let JACCARD_HARD_BLOCK = 0.7f + +// Conservative English stopword set. Kept small on purpose: filter common +// glue words that flood OR-queries, but bias toward false negatives (let +// borderline words like `do` through) over false positives (filtering rare +// words used semantically). +let STOP_WORDS : table <- { + "a", "an", "the", "is", "are", "was", "were", "be", "been", "being", + "do", "does", "did", "doing", "can", "could", "should", "would", "will", + "may", "might", "of", "in", "on", "at", "to", "for", "with", "from", "by", + "as", "and", "or", "but", "not", "no", "if", "how", "what", "why", "when", + "where", "which", "that", "this", "these", "those", "i", "you", "my" +} + +def tokenize_for_jaccard(s : string) : table { + var out : table + let cleaned = to_lower(strip(sanitize_fts5_query(s))) + if (empty(cleaned)) { + return <- out + } + let parts <- split(cleaned, " ") + for (w in parts) { + if (length(w) >= 2 && !key_exists(STOP_WORDS, w)) { + out |> insert(w) + } + } + return <- out +} + +// Score every BM25 hit by Jaccard on tokenized titles, filter out the +// zero-similarity noise, and return the rest sorted by similarity desc. +// dupe_check no longer makes the block decision — that moved into add_doc +// (and into the response messaging in main.das). Callers see the ranking. def dupe_check(db : SqlRunner; question : string; k : int = 5) : array { let hits <- search(db, question, k) + let qtok = tokenize_for_jaccard(question) var out : array out |> reserve(length(hits)) for (h in hits) { - out |> push(DupeMatch(slug = h.slug, title = h.title, rank = h.rank)) + let ttok = tokenize_for_jaccard(h.title) + let s = jaccard(qtok, ttok) + if (s > 0.0f) { + out |> push(DupeMatch( + slug = h.slug, + title = h.title, + rank = h.rank, + similarity = s)) + } } + sort(out) <| $(a, b) => a.similarity > b.similarity return <- out } @@ -362,11 +492,14 @@ def add_doc(db : SqlRunner; root : string; } if (!force) { var similar <- dupe_check(db, clean_question, 5) - if (!empty(similar)) { + if (!empty(similar) && similar[0].similarity >= JACCARD_HARD_BLOCK) { outcome.similar <- similar outcome.created = false return <- outcome } + // Below threshold: stash the list so the success path can surface + // the closest matches as an awareness hint. + outcome.similar <- similar } var existing : table let all_slugs <- _sql(db |> select_from(type) |> _select(_.slug)) @@ -403,6 +536,7 @@ def add_doc(db : SqlRunner; root : string; return <- outcome } rebuild(db, root) + set_index_signature(db, compute_docs_signature(root)) outcome.slug = slug outcome.created = true outcome.written_path = path diff --git a/utils/mouse/main.das b/utils/mouse/main.das index c0ed64f6fa..27af8ddd7f 100644 --- a/utils/mouse/main.das +++ b/utils/mouse/main.das @@ -61,6 +61,9 @@ struct MouseArgs { @clarg_doc = "Show only queries with no result (log)" misses : bool + @clarg_doc = "Treat the question as raw FTS5 query syntax (no sanitization, no OR-join). See https://www.sqlite.org/fts5.html#full_text_query_syntax" + raw_query : bool + @clarg_short = "?" @clarg_name = "show-help" @clarg_doc = "Show this help and exit" @@ -91,16 +94,18 @@ def cmd_ask(args : MouseArgs) { return } with_index_db(root) $(db) { - let hits <- search(db, query, args.k) + ensure_index_fresh(db, root) + let hits <- search(db, query, args.k, args.raw_query) log_query(db, query, hits, "cli") if (empty(hits)) { print("(no results){HINT_NO_MATCH}\n") return } + let qtok = tokenize_for_jaccard(query) + print("Top {length(hits)} best match(es):\n") for (h in hits) { - print("[{h.rank}] {h.slug} — {h.title}\n") - print(" path: {h.path}\n") - print(" last_verified: {h.last_verified}\n") + let ttok = tokenize_for_jaccard(h.title) + print(fmt_search_hit(h, jaccard(qtok, ttok))) } print("{HINT_HIT}\n") } @@ -118,6 +123,7 @@ def cmd_get(args : MouseArgs) { return } with_index_db(root) $(db) { + ensure_index_fresh(db, root) let path = path_join(docs_dir(root), "{slug}.md") var doc : ParsedDoc var err : string @@ -166,21 +172,30 @@ def cmd_add(args : MouseArgs) { return } with_index_db(root) $(db) { + ensure_index_fresh(db, root) let outcome <- add_doc(db, root, question, args.body, args.slug, args.force) if (!empty(outcome.error)) { to_log(LOG_ERROR, outcome.error) return } if (outcome.created) { - print("created: {outcome.slug}\n") + print("Created: {outcome.slug}\n") print("path: {outcome.written_path}\n") - print("\nHint: if you discover this answer is wrong later, edit the .md at the path above and bump `last_verified`.\n") + if (!empty(outcome.similar)) { + print("\nTop {length(outcome.similar)} similar document(s) (none above the {JACCARD_HARD_BLOCK:.2f} block threshold; surfaced for awareness):\n") + for (s in outcome.similar) { + print(fmt_dupe_match(s)) + } + print("\nHint: similarity > 0.5 may overlap meaningfully — consider whether the new doc could fold into one of these instead. Edit `last_verified` on the merged doc and `mouse rebuild` to drop this one.\n") + } else { + print("\nHint: if you discover this answer is wrong later, edit the .md at the path above and bump `last_verified`.\n") + } } else { - print("not created (use --force to override). similar:\n") + print("Did NOT create — top {length(outcome.similar)} similar document(s) (similarity ≥ {JACCARD_HARD_BLOCK:.2f}, looks like a duplicate):\n") for (s in outcome.similar) { - print(" [{s.rank}] {s.slug} — {s.title}\n") + print(fmt_dupe_match(s)) } - print("\nHint: prefer extending an existing doc by editing its .md (use mouse get to find the path). Re-call with --force only when the new answer is genuinely a different topic.\n") + print("\nSuggestion: prefer extending an existing doc (run `mouse get ` to find the path).\nRe-call with --force if this really is a different topic — the caller's judgment overrides the threshold.\n") } } } @@ -190,6 +205,7 @@ def cmd_rebuild(args : MouseArgs) { var n = 0 with_index_db(root) $(db) { n = rebuild(db, root) + set_index_signature(db, compute_docs_signature(root)) } print("rebuilt: {n} doc(s) in {root}\n") } @@ -198,6 +214,7 @@ def cmd_log(args : MouseArgs) { let root = resolve_root(args.root) let n = args.limit > 0 ? args.limit : 20 with_index_db(root) $(db) { + ensure_index_fresh(db, root) let rows <- recent_queries(db, n, args.misses) if (empty(rows)) { print("(no queries logged yet)\n") @@ -302,8 +319,8 @@ def handle_initialize(id : string) : string { def tools_list_body() : string { return build_string() $(var w) { w |> write("\{\"tools\":[") - w |> write("\{\"name\":\"mouse__ask\",\"description\":\"Search the blind-mouse Q&A cache. Returns top-K matching .md docs ranked by FTS5 BM25.\",\"inputSchema\":\{\"type\":\"object\",\"properties\":\{\"question\":\{\"type\":\"string\"\},\"k\":\{\"type\":\"integer\",\"default\":5\},\"root\":\{\"type\":\"string\"\}\},\"required\":[\"question\"]\}\},") - w |> write("\{\"name\":\"mouse__add\",\"description\":\"Add a Q&A to the cache. With force=false (default), runs dupe-check first and returns similar docs without writing if any are found.\",\"inputSchema\":\{\"type\":\"object\",\"properties\":\{\"question\":\{\"type\":\"string\"\},\"body\":\{\"type\":\"string\"\},\"slug\":\{\"type\":\"string\"\},\"force\":\{\"type\":\"boolean\",\"default\":false\},\"root\":\{\"type\":\"string\"\}\},\"required\":[\"question\",\"body\"]\}\},") + w |> write("\{\"name\":\"mouse__ask\",\"description\":\"Search the blind-mouse Q&A cache. Returns top-K matches ranked by FTS5 BM25, each annotated with a Jaccard title-similarity (sim 0..1) so you can judge relevance at a glance. Set rawQuery=true to pass raw FTS5 syntax (https://www.sqlite.org/fts5.html#full_text_query_syntax) — quoted phrases, NEAR, explicit AND/OR.\",\"inputSchema\":\{\"type\":\"object\",\"properties\":\{\"question\":\{\"type\":\"string\"\},\"k\":\{\"type\":\"integer\",\"default\":5\},\"rawQuery\":\{\"type\":\"boolean\",\"default\":false\},\"root\":\{\"type\":\"string\"\}\},\"required\":[\"question\"]\}\},") + w |> write("\{\"name\":\"mouse__add\",\"description\":\"Add a Q&A to the cache. Runs a Jaccard similarity check on the question; with force=false (default), hard-blocks creation only when the top match scores >= 0.7 (a near-paraphrase) and returns the similar list without writing. Below the threshold, the doc is created and the similar list is returned for awareness. Set force=true to override the hard block.\",\"inputSchema\":\{\"type\":\"object\",\"properties\":\{\"question\":\{\"type\":\"string\"\},\"body\":\{\"type\":\"string\"\},\"slug\":\{\"type\":\"string\"\},\"force\":\{\"type\":\"boolean\",\"default\":false\},\"root\":\{\"type\":\"string\"\}\},\"required\":[\"question\",\"body\"]\}\},") w |> write("\{\"name\":\"mouse__get\",\"description\":\"Fetch a doc by slug. Returns body, frontmatter, and reverse-link footer (which docs link to this one).\",\"inputSchema\":\{\"type\":\"object\",\"properties\":\{\"slug\":\{\"type\":\"string\"\},\"root\":\{\"type\":\"string\"\}\},\"required\":[\"slug\"]\}\},") w |> write("\{\"name\":\"mouse__rebuild\",\"description\":\"Rescan /docs/ and rebuild the SQLite index from disk. .md files are the source of truth.\",\"inputSchema\":\{\"type\":\"object\",\"properties\":\{\"root\":\{\"type\":\"string\"\}\}\}\}") w |> write("]\}") @@ -314,8 +331,14 @@ def handle_tools_list(id : string) : string { return jsonrpc_response(id, tools_list_body()) } -def fmt_search_hit(h : SearchHit) : string { - return "[{h.rank}] {h.slug} — {h.title}\n path: {h.path}\n last_verified: {h.last_verified}\n" +// FTS5 BM25 rank (negative; smaller = better) shown alongside Jaccard +// title-similarity (0..1) so the caller can read both signals at a glance. +def fmt_search_hit(h : SearchHit; sim : float) : string { + return " [BM25 {h.rank:.3f} | sim {sim:.3f}] {h.slug} — {h.title}\n path: {h.path}\n last_verified: {h.last_verified}\n" +} + +def fmt_dupe_match(d : DupeMatch) : string { + return " [sim {d.similarity:.3f}] {d.slug} — {d.title}\n" } let HINT_NO_MATCH = "\nHint: nothing matched. Do the research yourself, then call mouse__add(question, body) so the next session doesn't redo this work." @@ -327,18 +350,23 @@ def tool_ask(args : JsonValue?) : string { return make_tool_result("missing 'question' argument", true) } let k = get_int_arg(args, "k", 5) + let raw_query = get_bool_arg(args, "rawQuery") let root = resolve_root(get_string_arg(args, "root")) var output : string with_index_db(root) $(db) { - let hits <- search(db, question, k) + ensure_index_fresh(db, root) + let hits <- search(db, question, k, raw_query) log_query(db, question, hits, "mcp") if (empty(hits)) { output = "(no results for: {question}){HINT_NO_MATCH}" return } + let qtok = tokenize_for_jaccard(question) output = build_string() $(var w) { + w |> write("Top {length(hits)} best match(es):\n") for (h in hits) { - w |> write(fmt_search_hit(h)) + let ttok = tokenize_for_jaccard(h.title) + w |> write(fmt_search_hit(h, jaccard(qtok, ttok))) } w |> write(HINT_HIT) } @@ -361,6 +389,7 @@ def tool_add(args : JsonValue?) : string { var output : string var is_error = false with_index_db(root) $(db) { + ensure_index_fresh(db, root) let outcome <- add_doc(db, root, question, body, slug_hint, force) if (!empty(outcome.error)) { output = outcome.error @@ -368,14 +397,26 @@ def tool_add(args : JsonValue?) : string { return } if (outcome.created) { - output = "created: {outcome.slug}\npath: {outcome.written_path}\n\nHint: if you discover this answer is wrong later, edit the .md at the path above and bump `last_verified`." + if (!empty(outcome.similar)) { + output = build_string() $(var w) { + w |> write("Created: {outcome.slug}\n") + w |> write("path: {outcome.written_path}\n\n") + w |> write("Top {length(outcome.similar)} similar document(s) (none above the {JACCARD_HARD_BLOCK:.2f} block threshold; surfaced for awareness):\n") + for (s in outcome.similar) { + w |> write(fmt_dupe_match(s)) + } + w |> write("\nHint: similarity > 0.5 may overlap meaningfully — consider whether the new doc could fold into one of these instead. Edit `last_verified` on the merged doc and call mouse__rebuild to drop this one.") + } + } else { + output = "Created: {outcome.slug}\npath: {outcome.written_path}\n\nHint: if you discover this answer is wrong later, edit the .md at the path above and bump `last_verified`." + } } else { output = build_string() $(var w) { - w |> write("not created (use force=true to override). similar:\n") + w |> write("Did NOT create — top {length(outcome.similar)} similar document(s) (similarity ≥ {JACCARD_HARD_BLOCK:.2f}, looks like a duplicate):\n") for (s in outcome.similar) { - w |> write(" [{s.rank}] {s.slug} — {s.title}\n") + w |> write(fmt_dupe_match(s)) } - w |> write("\nHint: prefer extending an existing doc by editing its .md (call mouse__get with one of the slugs above to find the path). Re-call with force=true only when the new answer is genuinely a different topic.") + w |> write("\nSuggestion: prefer extending an existing doc (call mouse__get with one of the slugs above to find the path).\nRe-call with force=true if this really is a different topic — the caller's judgment overrides the threshold.") } } } @@ -394,6 +435,7 @@ def tool_get(args : JsonValue?) : string { var output : string var is_error = false with_index_db(root) $(db) { + ensure_index_fresh(db, root) let path = path_join(docs_dir(root), "{slug}.md") var doc : ParsedDoc var err : string @@ -443,6 +485,7 @@ def tool_rebuild(args : JsonValue?) : string { var n = 0 with_index_db(root) $(db) { n = rebuild(db, root) + set_index_signature(db, compute_docs_signature(root)) } return make_tool_result("rebuilt: {n} doc(s) in {root}", false) } diff --git a/utils/mouse/tests/test_index.das b/utils/mouse/tests/test_index.das index 248daf0e2c..7fed166eb0 100644 --- a/utils/mouse/tests/test_index.das +++ b/utils/mouse/tests/test_index.das @@ -359,16 +359,20 @@ def test_dupe_check_finds_similar(t : T?) { seed_doc(t, root, "doc-a", "Writing typefunction macros", "details", no_links, qs) var n_similar = -1 var top_slug : string + var top_sim = 0.0f with_index_db(root) $(db) { rebuild(db, root) let similar <- dupe_check(db, "typefunction", 5) n_similar = length(similar) if (n_similar > 0) { top_slug = similar[0].slug + top_sim = similar[0].similarity } } t |> success(n_similar > 0, "expected at least one match") t |> equal(top_slug, "doc-a") + // single rare keyword in a 3-word title => Jaccard = 1/3 ≈ 0.33 + t |> success(top_sim >= 0.3f, "similarity should be set: got {top_sim}") cleanup_root(root) } @@ -392,7 +396,40 @@ def test_dupe_check_no_match(t : T?) { } [test] -def test_add_dupe_gate_no_force(t : T?) { +def test_add_dupe_gate_blocks_high_similarity(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["how to write typefunction macros"] + // Title and incoming question share 5 of 6 content tokens after stop-word + // filtering ⇒ Jaccard ≈ 0.83, well above the 0.7 hard-block threshold. + seed_doc(t, root, "doc-a", "How to write typefunction macros", "body", no_links, qs) + var created = true + var n_similar = -1 + var top_sim = 0.0f + var n_files_after = -1 + with_index_db(root) $(db) { + rebuild(db, root) + let outcome <- add_doc(db, root, "how to write typefunction macros for daslang", "answer body", "", false) + created = outcome.created + n_similar = length(outcome.similar) + if (n_similar > 0) { + top_sim = outcome.similar[0].similarity + } + let files <- list_doc_files(root) + n_files_after = length(files) + } + t |> success(!created, "high-similarity dupe should block create") + t |> success(n_similar > 0, "should return the similar doc") + t |> success(top_sim >= 0.7f, "top similarity above hard block: got {top_sim}") + t |> equal(n_files_after, 1) // only doc-a; no new file written + cleanup_root(root) +} + +[test] +def test_add_dupe_gate_allows_low_similarity(t : T?) { let root = make_temp_root(t) if (empty(root)) { return @@ -400,20 +437,57 @@ def test_add_dupe_gate_no_force(t : T?) { let no_links : array let qs <- ["how to typefunction"] seed_doc(t, root, "doc-a", "Typefunction", "typefunction body", no_links, qs) - var created = true + var created = false var n_similar = -1 + var top_sim = -1.0f var n_files_after = -1 with_index_db(root) $(db) { rebuild(db, root) + // "typefunction question paraphrase" vs title "Typefunction" => + // Jaccard 1/3 ≈ 0.33, BELOW the 0.7 hard block. Add proceeds. let outcome <- add_doc(db, root, "typefunction question paraphrase", "answer body", "", false) created = outcome.created n_similar = length(outcome.similar) + if (n_similar > 0) { + top_sim = outcome.similar[0].similarity + } let files <- list_doc_files(root) n_files_after = length(files) } - t |> success(!created, "should not create when similar exists") - t |> success(n_similar > 0, "should return at least one similar") - t |> equal(n_files_after, 1) // only doc-a, no new file written + t |> success(created, "low-similarity match should NOT block; add must proceed") + t |> success(n_similar > 0, "advisory similar list still returned") + t |> success(top_sim < 0.7f, "top similarity below hard block: got {top_sim}") + t |> equal(n_files_after, 2) // doc-a + the new doc + cleanup_root(root) +} + +// Common glue words ("how", "do", "I") shouldn't drag rare-content queries +// across the threshold. After stop-word filtering, the rare-token overlap +// here is empty. +[test] +def test_dupe_check_jaccard_filters_common_words(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["how to write tests"] + seed_doc(t, root, "doc-a", "how do I write tests", "body", no_links, qs) + var n_similar = -1 + var top_sim = 0.0f + with_index_db(root) $(db) { + rebuild(db, root) + let similar <- dupe_check(db, "how do I add git tracking", 5) + n_similar = length(similar) + if (n_similar > 0) { + top_sim = similar[0].similarity + } + } + // Even if FTS5 returns the doc (matches "how"/"do"), the Jaccard score + // after stop-word filter is 0 — and dupe_check drops zero-similarity + // hits — so n_similar may be 0. Either way, we must not look like a + // duplicate. + t |> success(top_sim < 0.2f, "common-word overlap should not look like a duplicate: n_similar={n_similar}, top_sim={top_sim}") cleanup_root(root) } @@ -780,3 +854,119 @@ def test_add_dupe_gate_force(t : T?) { t |> success(found_via_search, "newly added doc retrievable via search") cleanup_root(root) } + +// ─── auto-reindex via signature ─────────────────────────────────────── + +[test] +def test_ensure_index_fresh_picks_up_new_md(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs1 <- ["alpha question"] + seed_doc(t, root, "alpha", "Alpha title", "alpha body", no_links, qs1) + var n_initial = -1 + var n_after = -1 + var found = false + with_index_db(root) $(db) { + ensure_index_fresh(db, root) + let rows1 <- _sql(db |> select_from(type)) + n_initial = length(rows1) + // Drop a new .md straight onto disk, simulating a `git pull`. + let qs2 <- ["beta question"] + seed_doc(t, root, "beta", "Beta title", "beta body", no_links, qs2) + ensure_index_fresh(db, root) + let rows2 <- _sql(db |> select_from(type)) + n_after = length(rows2) + let hits <- search(db, "beta", 5) + for (h in hits) { + if (h.slug == "beta") { + found = true + } + } + } + t |> equal(n_initial, 1) + t |> equal(n_after, 2) + t |> success(found, "newly-dropped doc must be retrievable after ensure_index_fresh") + cleanup_root(root) +} + +[test] +def test_ensure_index_fresh_noop_when_unchanged(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["alpha q"] + seed_doc(t, root, "alpha", "Alpha", "body", no_links, qs) + var sig1 : string + var sig2 : string + var rebuild_count = -1 + with_index_db(root) $(db) { + ensure_index_fresh(db, root) + sig1 = get_index_signature(db) + // No filesystem changes — second call should hit the cache and + // return 0, AND the persisted signature should still equal the first. + rebuild_count = ensure_index_fresh(db, root) + sig2 = get_index_signature(db) + } + t |> success(!empty(sig1), "signature populated on first ensure") + t |> equal(sig1, sig2) + t |> equal(rebuild_count, 0) // 0 = cache hit, no rebuild + cleanup_root(root) +} + +// ─── --raw-query FTS5 passthrough ───────────────────────────────────── + +[test] +def test_search_raw_query(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs1 <- ["title alpha question"] + let qs2 <- ["title bravo question"] + seed_doc(t, root, "alpha", "alphakeyword content", "alpha body", no_links, qs1) + seed_doc(t, root, "bravo", "bravokeyword content", "bravo body", no_links, qs2) + var raw_n = 0 + var sanitized_n = 0 + with_index_db(root) $(db) { + rebuild(db, root) + // Raw-query path: explicit FTS5 OR — both docs match. + let raw_hits <- search(db, "alphakeyword OR bravokeyword", 10, true) + raw_n = length(raw_hits) + // Sanitized path lowercases "OR" to "or" (a 2-char regular token); + // it gets joined with the keywords via OR, so still matches both. + // What we really want to assert is that the raw path works; the + // sanitized path's exact behavior is tested elsewhere. + let san_hits <- search(db, "alphakeyword OR bravokeyword", 10, false) + sanitized_n = length(san_hits) + } + t |> equal(raw_n, 2, "raw FTS5 OR returns both docs") + t |> success(sanitized_n >= 2, "sanitized path also returns both via OR-join: got {sanitized_n}") + cleanup_root(root) +} + +[test] +def test_search_raw_query_phrase(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["the alpha bravo charlie sequence"] + seed_doc(t, root, "alpha", "alpha bravo charlie", "the order matters: alpha bravo charlie", no_links, qs) + var quoted_hits = -1 + with_index_db(root) $(db) { + rebuild(db, root) + // Phrase queries are valuable raw-query users. Sanitizer would strip + // the quotes — raw_query=true preserves them. + let hits <- search(db, "\"alpha bravo\"", 10, true) + quoted_hits = length(hits) + } + t |> equal(quoted_hits, 1) + cleanup_root(root) +}