diff --git a/.gitignore b/.gitignore index 52e550081e..1035a1b4ab 100644 --- a/.gitignore +++ b/.gitignore @@ -29,6 +29,13 @@ _aot_generated/ opencoede.json +# blind-mouse — ignore the regenerable SQLite index, but check in the .md +# corpus under mouse-data/docs/ so curated answers ship in PRs. +mouse-data/index.db +mouse-data/index.db-journal +mouse-data/*.db-wal +mouse-data/*.db-shm + modules/dasSFML/libsfml/ site/ doc/sphinx-build/ diff --git a/CLAUDE.md b/CLAUDE.md index d5f00e39fa..6d50ee46de 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -35,6 +35,21 @@ Same applies to lint/format: `mcp__daslang__lint` / `format_file`, not shell `bi Fall back to `Bash`/`Grep`/`Read` only when the MCP tool reports an error or the question is genuinely outside MCP coverage (RST prose, CMake, Python tooling). +## Asking blind-mouse + +Before doing significant research on a "how do I X?" / "what's the pattern for Y?" / "why does Z behave this way?" question, ask `mouse__ask`. blind-mouse (`utils/mouse/`) is a personal Q&A cache backed by curated `.md` answers — full vision in `utils/mouse/OVERVIEW.md`. Same deferred-tool dance as the daslang MCP: `ToolSearch select:mcp__mouse__` → invoke. + +**During plan mode / planning phase, ask the mouse early and often.** Planning is exactly the phase where prior-session research has the highest leverage: each "what's the pattern for X" / "where do we usually put Y" / "why did we pick Z" answer that's already in the cache saves a research detour, and each new finding worth keeping is one `mouse__add` away from being free next time. Concrete planning-phase prompts: design questions ("what's the right pattern for adding a new `[sql_*]` annotation?"), prior-art questions ("have we hit this glob-vs-rfind path bug before?"), gotcha-recall ("what's the const-stripping reinterpret incantation?"), trade-off recall ("why did we pick (a) over (b) last time?"). If the cache has nothing useful, do the research yourself — then `mouse__add` the answer before moving on, even if rough. The cost of writing a brief `.md` is far smaller than re-researching the same thing. + +| Reach for the mouse when… | Don't, when… | +|---|---| +| Planning a non-trivial change — sweep `mouse__ask` across the open questions before diving in | symbol lookup — use the daslang MCP (`find_symbol`, `grep_usage`, `find_references`) | +| "how do I write a `[typefunction]` macro?" / "what's the right pattern for X?" / "why does Y behave this way?" | categorical conventions — those belong in `skills/*.md` / `CLAUDE.md` | +| Discovered facts that don't fit any `skills/*.md` slot | project state, branch status, who's doing what — use git/issues/memory | +| Recurring questions you remember answering before but forget the answer | | + +If `mouse__ask` returns nothing relevant and you do the research yourself, finish with `mouse__add` so the next session doesn't redo the work. If a returned answer is stale or wrong, edit the `.md` directly under `mouse-data/docs/` (it's a regular file, `Edit` works) and bump `last_verified`. + ## Skill Files (REQUIRED) Task-specific instructions are split into skill files under `skills/`. You MUST read the relevant skill file(s) before performing the corresponding task. diff --git a/CMakeLists.txt b/CMakeLists.txt index 76106a5a4e..873a09a406 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1559,6 +1559,20 @@ install(DIRECTORY ${PROJECT_SOURCE_DIR}/utils/find-dupe/tests/ FILES_MATCHING PATTERN "*.das" ) +# Install blind-mouse (personal Q&A cache MCP server). +file(GLOB DAS_MOUSE_FILES ${PROJECT_SOURCE_DIR}/utils/mouse/*.das) +install(FILES ${DAS_MOUSE_FILES} DESTINATION utils/mouse) +install(FILES + ${PROJECT_SOURCE_DIR}/utils/mouse/README.md + ${PROJECT_SOURCE_DIR}/utils/mouse/OVERVIEW.md + ${PROJECT_SOURCE_DIR}/utils/mouse/.das_package + DESTINATION utils/mouse +) +install(DIRECTORY ${PROJECT_SOURCE_DIR}/utils/mouse/tests/ + DESTINATION utils/mouse/tests + FILES_MATCHING PATTERN "*.das" PATTERN "*.md" +) + # Install daspkg (package manager) file(GLOB DAS_DASPKG_FILES ${PROJECT_SOURCE_DIR}/utils/daspkg/*.das) install(FILES ${DAS_DASPKG_FILES} DESTINATION utils/daspkg) diff --git a/daslib/perf_lint.das b/daslib/perf_lint.das index a534b632af..5cc36aabd8 100644 --- a/daslib/perf_lint.das +++ b/daslib/perf_lint.das @@ -25,6 +25,11 @@ module perf_lint shared private //! PERF010 — unnecessary get_ptr() for null comparison //! PERF011 — unnecessary get_ptr() for field access //! PERF012 — string(das_string) passed to strings module function +//! PERF013 — a += 1 / a -= 1 — use a++ / a-- +//! PERF014 — closed-interval char-class range — see is_alpha / is_alnum / is_number etc. +//! PERF015 — ternary min/max — use min(a, b) / max(a, b) +//! PERF016 — ternary abs — use abs(x) +//! PERF017 — length(s) == 0 / != 0 — use empty(s) / !empty(s) require daslib/ast_boost require strings @@ -108,7 +113,10 @@ class PerfLintVisitor : AstVisitor { } def perf_warning(text : string; at : LineInfo) : void { - if (in_template) { + // Suppress in templates and inside macro-generated functions — + // the user didn't write that code. + if (in_template + || (current_function != null && current_function.flags.generated)) { return } // Deduplicate warnings — same source location reported once, regardless of generic instantiations @@ -123,7 +131,7 @@ class PerfLintVisitor : AstVisitor { } warning_count++ var msg = text - if (current_function != null && current_function.fromGeneric != null && length(current_function.inferStack) > 0) { + if (current_function != null && current_function.fromGeneric != null && !empty(current_function.inferStack)) { msg = build_string() $(var w) { w |> write(text) w |> write("\n while compiling {current_function.name}") @@ -156,10 +164,7 @@ class PerfLintVisitor : AstVisitor { } def is_defined_outside_loop(v : Variable?) : bool { - if (v == null || loop_depth == 0) { - return false - } - if (self->is_loop_variable(v)) { + if (v == null || loop_depth == 0 || self->is_loop_variable(v)) { return false } for (entry in var_stack) { @@ -186,10 +191,8 @@ class PerfLintVisitor : AstVisitor { def is_array_func(expr : ExprCall?; fname : string) : bool { // Builtin generics (push, reserve, etc.) have fromGeneric set. // Verify first argument is an array type. - if (expr.func == null || expr.func.fromGeneric == null || expr.func.fromGeneric.name != fname) { - return false - } - if (length(expr.arguments) < 1) { + if (expr.func == null || expr.func.fromGeneric == null + || expr.func.fromGeneric.name != fname || empty(expr.arguments)) { return false } let arg = expr.arguments[0] @@ -280,12 +283,9 @@ class PerfLintVisitor : AstVisitor { } return false if (inner == null || !(inner is ExprCall)) var call = inner as ExprCall - if (length(call.arguments) < 1) { - return false - } // After inference, string() calls may have FakeContext/FakeLineInfo extra args. // Check the function name and first argument type. - if (call.name != "string") { + if (empty(call.arguments) || call.name != "string") { return false } var arg = call.arguments[0] @@ -307,10 +307,9 @@ class PerfLintVisitor : AstVisitor { def is_get_ptr_vs_null(maybe_get_ptr : Expression?; maybe_null : Expression?) : bool { //! Returns true if maybe_get_ptr is get_ptr() and maybe_null is null. - if (!self->is_get_ptr_of_smart_ptr(maybe_get_ptr)) { + if (!self->is_get_ptr_of_smart_ptr(maybe_get_ptr) || maybe_null == null) { return false } - return false if (maybe_null == null) if (maybe_null is ExprConstPtr) { var cptr = maybe_null as ExprConstPtr return cptr.value == null @@ -318,6 +317,115 @@ class PerfLintVisitor : AstVisitor { return false } + // --- generic constant / structural helpers (PERF013-017) --- + + def is_const_zero(expr : Expression? const) : bool { + if (expr == null) { + return false + } + if (expr is ExprConstInt) { + return (expr as ExprConstInt).value == 0 + } + if (expr is ExprConstUInt) { + return (expr as ExprConstUInt).value == 0u + } + if (expr is ExprConstInt64) { + return (expr as ExprConstInt64).value == 0l + } + if (expr is ExprConstUInt64) { + return (expr as ExprConstUInt64).value == 0ul + } + if (expr is ExprConstFloat) { + return (expr as ExprConstFloat).value == 0.0f + } + if (expr is ExprConstDouble) { + return (expr as ExprConstDouble).value == 0.0lf + } + return false + } + + def is_const_one(expr : Expression? const) : bool { + if (expr == null) { + return false + } + if (expr is ExprConstInt) { + return (expr as ExprConstInt).value == 1 + } + if (expr is ExprConstUInt) { + return (expr as ExprConstUInt).value == 1u + } + if (expr is ExprConstInt64) { + return (expr as ExprConstInt64).value == 1l + } + if (expr is ExprConstUInt64) { + return (expr as ExprConstUInt64).value == 1ul + } + if (expr is ExprConstFloat) { + return (expr as ExprConstFloat).value == 1.0f + } + if (expr is ExprConstDouble) { + return (expr as ExprConstDouble).value == 1.0lf + } + return false + } + + def is_const_neg_one(expr : Expression? const) : bool { + if (expr == null) { + return false + } + if (expr is ExprConstInt) { + return (expr as ExprConstInt).value == -1 + } + if (expr is ExprConstInt64) { + return (expr as ExprConstInt64).value == -1l + } + if (expr is ExprConstFloat) { + return (expr as ExprConstFloat).value == -1.0f + } + if (expr is ExprConstDouble) { + return (expr as ExprConstDouble).value == -1.0lf + } + return false + } + + def expr_equal_struct(a : Expression? const; b : Expression? const; require_pure : bool = true) : bool { + //! Structural equality via describe(). With `require_pure=true` (default), + //! returns false when either side has side effects — protects rules that + //! suggest collapsing duplicated subexpressions (PERF014/015/016) from + //! silently changing evaluation count. + if ((a == null || b == null) + || (require_pure && (!a.flags.noSideEffects || !b.flags.noSideEffects))) { + return false + } + return describe(a) == describe(b) + } + + def is_workhorse_numeric(t : TypeDecl?) : bool { + //! True if the type is one of the six numeric workhorse scalars (int, uint, + //! int64, uint64, float, double). Vectors/bitfields/enums do NOT qualify. + if (t == null || !empty(t.dim)) { + return false + } + let bt = t.baseType + return (bt == Type.tInt || bt == Type.tUInt + || bt == Type.tInt64 || bt == Type.tUInt64 + || bt == Type.tFloat || bt == Type.tDouble) + } + + def is_collection_length_call(expr : Expression? const) : bool { + //! True for length(string) / length(das_string) / length(array) / length(table). + //! Excludes math::length(float2/3/4) which returns vector magnitude. + if (expr == null || !(expr is ExprCall)) { + return false + } + let call = expr as ExprCall + if (call.func == null || call.func.name != "length") { + return false + } + let modname = string(call.func._module.name) + return modname == "strings" || modname == "$" || modname == "builtin" + } + // --- function tracking --- def override preVisitFunction(var fn : FunctionPtr) : void { @@ -345,12 +453,12 @@ class PerfLintVisitor : AstVisitor { def override visitExprBlock(var blk : ExprBlock?) : ExpressionPtr { if (blk.blockFlags.isClosure) { in_closure-- - if (length(string_builder_save_stack) > 0) { + if (!empty(string_builder_save_stack)) { string_builder_target_var = string_builder_save_stack |> back() string_builder_save_stack |> pop() } } - if (length(scope_stack) > 0) { + if (!empty(scope_stack)) { var_stack |> resize(scope_stack |> back()) scope_stack |> pop() } @@ -419,17 +527,12 @@ class PerfLintVisitor : AstVisitor { return false } let bt = src._type.baseType - if (bt == Type.tArray || bt == Type.tString) { - return true - } - if (bt == Type.tRange || bt == Type.tRange64 || bt == Type.tURange || bt == Type.tURange64) { - return true - } - // fixed arrays have dim > 0 - if (src._type.dim |> length > 0) { - return true - } - return false + // fixed arrays have dim > 0; otherwise we accept any baseType whose + // length is determined at the start of the loop. + return (bt == Type.tArray || bt == Type.tString + || bt == Type.tRange || bt == Type.tRange64 + || bt == Type.tURange || bt == Type.tURange64 + || !empty(src._type.dim)) } def override preVisitExprForSource(expr : ExprFor?; src : ExpressionPtr; last : bool) : void { @@ -456,14 +559,14 @@ class PerfLintVisitor : AstVisitor { if (current_for_known_length) { known_length_loop_depth-- } - if (length(scope_stack) > 0) { + if (!empty(scope_stack)) { var_stack |> resize(scope_stack |> back()) scope_stack |> pop() } - if (length(if_depth_stack) > 0) { + if (!empty(if_depth_stack)) { if_depth_stack |> pop() } - if (length(loop_has_early_exit) > 0) { + if (!empty(loop_has_early_exit)) { loop_has_early_exit |> pop() } } @@ -487,14 +590,14 @@ class PerfLintVisitor : AstVisitor { def override visitExprWhile(var expr : ExprWhile?) : ExpressionPtr { if (in_closure == 0) { loop_depth-- - if (length(scope_stack) > 0) { + if (!empty(scope_stack)) { var_stack |> resize(scope_stack |> back()) scope_stack |> pop() } - if (length(if_depth_stack) > 0) { + if (!empty(if_depth_stack)) { if_depth_stack |> pop() } - if (length(loop_has_early_exit) > 0) { + if (!empty(loop_has_early_exit)) { loop_has_early_exit |> pop() } } @@ -505,13 +608,13 @@ class PerfLintVisitor : AstVisitor { // --- break/continue tracking (for PERF006 early-exit suppression) --- def override preVisitExprBreak(expr : ExprBreak?) : void { - if (in_closure == 0 && length(loop_has_early_exit) > 0) { + if (in_closure == 0 && !empty(loop_has_early_exit)) { loop_has_early_exit[length(loop_has_early_exit) - 1] = true } } def override preVisitExprContinue(expr : ExprContinue?) : void { - if (in_closure == 0 && length(loop_has_early_exit) > 0) { + if (in_closure == 0 && !empty(loop_has_early_exit)) { loop_has_early_exit[length(loop_has_early_exit) - 1] = true } } @@ -519,13 +622,13 @@ class PerfLintVisitor : AstVisitor { // --- if-depth tracking (for PERF006 conditional suppression) --- def override preVisitExprIfThenElse(ifte : ExprIfThenElse?) : void { - if (length(if_depth_stack) > 0) { + if (!empty(if_depth_stack)) { if_depth_stack[length(if_depth_stack) - 1]++ } } def override visitExprIfThenElse(var ifte : ExprIfThenElse?) : ExpressionPtr { - if (length(if_depth_stack) > 0) { + if (!empty(if_depth_stack)) { if_depth_stack[length(if_depth_stack) - 1]-- } return <- ifte @@ -536,9 +639,10 @@ class PerfLintVisitor : AstVisitor { // --- PERF008: unnecessary get_ptr() for is/as --- def override preVisitExprOp2(expr : ExprOp2?) : void { - if (in_closure > 0) { - return - } + // No in_closure check needed — loop_depth doesn't increment inside + // closure bodies (see preVisitExprFor/While), so PERF001's loop check + // is already correctly scoped. The other rules below are syntactic + // patterns that misbehave identically inside or outside closures. if (loop_depth > 0 && expr.op == "+=") { let v = self->find_string_var_from_expr(expr.left) if (v != null && self->is_defined_outside_loop(v)) { @@ -571,6 +675,327 @@ class PerfLintVisitor : AstVisitor { if (self->is_get_ptr_vs_null(expr.left, expr.right) || self->is_get_ptr_vs_null(expr.right, expr.left)) { self->perf_warning("PERF010: get_ptr() is unnecessary for null comparison; smart_ptr supports == and != null directly", expr.at) } + // PERF013: a += ±1 / a -= ±1 → a++ / a-- + // Derive the actual delta from (op, RHS) so each branch can suggest the + // correct postfix operator. RHS is one of {+1, -1}; op flips the sign. + if (expr.op == "+=" || expr.op == "-=") { + if (expr.left != null && self->is_workhorse_numeric(expr.left._type)) { + let rhs_one = self->is_const_one(expr.right) + let rhs_neg = self->is_const_neg_one(expr.right) + var delta = 0 + if (rhs_one) { + delta = expr.op == "+=" ? 1 : -1 + } elif (rhs_neg) { + delta = expr.op == "+=" ? -1 : 1 + } + if (delta == 1) { + self->perf_warning("PERF013: '{expr.op} {rhs_one ? 1 : -1}' on a numeric scalar; use postfix '++' for a faster, idiomatic increment", expr.at) + } elif (delta == -1) { + self->perf_warning("PERF013: '{expr.op} {rhs_one ? 1 : -1}' on a numeric scalar; use postfix '--' for a faster, idiomatic decrement", expr.at) + } + } + } + // PERF014: closed-interval char-class range check + if (expr.op == "&&") { + self->check_perf014_char_class(expr) + } + // PERF017: length(s) 0 / 1 → empty(s) / !empty(s) + if (expr.op == "==" || expr.op == "!=" || expr.op == "<" || expr.op == "<=" || expr.op == ">" || expr.op == ">=") { + self->check_perf017_length_zero(expr) + } + } + + // --- PERF014: closed-interval char-class range checks --- + + def parse_range_leg(leg : Expression?; var v_out : Expression?&; var bound_out : int&; var is_hi_out : bool&) : bool { + //! Parse `var const` or `const var` where op is `<=` or `>=`. + //! Returns true and fills outputs with: the variable expression, + //! the int bound, and whether this leg expresses an upper bound on var. + if (leg == null || !(leg is ExprOp2)) { + return false + } + var op2 = leg as ExprOp2 + let opname = string(op2.op) + if (opname != "<=" && opname != ">=") { + return false + } + if (op2.right is ExprConstInt && op2.left != null) { + v_out = op2.left + bound_out = (op2.right as ExprConstInt).value + is_hi_out = opname == "<=" + return true + } + if (op2.left is ExprConstInt && op2.right != null) { + v_out = op2.right + bound_out = (op2.left as ExprConstInt).value + is_hi_out = opname == ">=" + return true + } + return false + } + + def is_known_char_class_range(lo : int; hi : int) : bool { + //! Three closed ranges hint at the strings is_*() helpers. + //! Hex extras ('a'..'f', 'A'..'F') are excluded — is_hex is broader. + return ((lo == 48 && hi == 57) // '0'..'9' — is_number + || (lo == 97 && hi == 122) // 'a'..'z' — is_alpha (lower half) + || (lo == 65 && hi == 90)) // 'A'..'Z' — is_alpha (upper half) + } + + def check_perf014_char_class(expr : ExprOp2?) : void { + var va : Expression? = null + var vb : Expression? = null + var bound_a = 0 + var bound_b = 0 + var hi_a = false + var hi_b = false + // Both legs must parse, constrain the SAME expression, and split into + // one upper / one lower bound. + if (!self->parse_range_leg(expr.left, va, bound_a, hi_a) + || !self->parse_range_leg(expr.right, vb, bound_b, hi_b) + || !self->expr_equal_struct(va, vb) + || hi_a == hi_b) { + return + } + let lo = hi_a ? bound_b : bound_a + let hi = hi_a ? bound_a : bound_b + if (lo >= hi || !self->is_known_char_class_range(lo, hi)) { + return + } + // Pick the helper that matches the detected range. Note: is_alpha covers + // BOTH cases, so for a single-case range ('a'..'z' or 'A'..'Z') it's a + // broader replacement, not an exact one — call that out so the user + // doesn't replace lower-only with a both-cases check. + if (lo == 48 && hi == 57) { + self->perf_warning("PERF014: char-class range check '0'..'9' — use 'is_number(c)' from strings module", expr.at) + } else { + // 'a'..'z' or 'A'..'Z' + let case_kind = (lo == 97) ? "lowercase" : "uppercase" + self->perf_warning("PERF014: char-class range check ({case_kind} only) — consider 'is_alpha(c)' from strings module (note: is_alpha matches both cases, broader than this range)", expr.at) + } + } + + // --- PERF017: length(collection) 0/1 → empty / !empty --- + + def peel_ref2value(e : Expression? const) : Expression? const { + if (e != null && e is ExprRef2Value) { + return (e as ExprRef2Value.subexpr) + } + return e + } + + def const_int_value(e : Expression? const; var v : int&) : bool { + let inner = self->peel_ref2value(e) + if (inner == null) { + return false + } + if (inner is ExprConstInt) { + v = (inner as ExprConstInt).value + return true + } + if (inner is ExprConstUInt) { + v = int((inner as ExprConstUInt).value) + return true + } + return false + } + + def check_perf017_length_zero(expr : ExprOp2?) : void { + // Skip the canonical implementation of empty() in daslib/builtin.das — + // empty(x) is literally `return length(x) == 0`, and the rule's whole job + // is to suggest `empty(x)`. Self-flagging the suggestion's own body is + // noise. Generic instantiations of empty hit this path with + // current_function.fromGeneric pointing at the source `empty`. Both arms + // also gate on _module.name == "builtin" so unrelated user methods named + // empty() (e.g. delegate.empty(), AOT cache empty()) keep getting linted. + if (current_function != null) { + let direct_match = (current_function.name == "empty" + && current_function._module != null + && current_function._module.name == "builtin") + let generic_match = (current_function.fromGeneric != null + && current_function.fromGeneric.name == "empty" + && current_function.fromGeneric._module != null + && current_function.fromGeneric._module.name == "builtin") + if (direct_match || generic_match) { + return + } + } + let lhs = self->peel_ref2value(expr.left) + let rhs = self->peel_ref2value(expr.right) + var len_on_left = true + if (self->is_collection_length_call(lhs)) { + len_on_left = true + } elif (self->is_collection_length_call(rhs)) { + len_on_left = false + } else { + return + } + let const_side = len_on_left ? rhs : lhs + var k = 0 + if (!self->const_int_value(const_side, k) || (k != 0 && k != 1)) { + return + } + let opname = string(expr.op) + // Canonicalize so that length is on the left; flip op if needed. + var canon_op = opname + if (!len_on_left) { + if (opname == "<") { + canon_op = ">" + } elif (opname == "<=") { + canon_op = ">=" + } elif (opname == ">") { + canon_op = "<" + } elif (opname == ">=") { + canon_op = "<=" + } + } + // Map (canon_op, k) → empty / !empty + // k=0: ==, <= → empty + // !=, > → !empty + // k=1: < → empty (length < 1 means length == 0) + // >= → !empty (length >= 1 means length > 0) + var is_empty_pattern = false + var is_nonempty_pattern = false + if (k == 0) { + if (canon_op == "==" || canon_op == "<=") { + is_empty_pattern = true + } elif (canon_op == "!=" || canon_op == ">") { + is_nonempty_pattern = true + } + } else { + if (canon_op == "<") { + is_empty_pattern = true + } elif (canon_op == ">=") { + is_nonempty_pattern = true + } + } + // Diagnostic uses canon_op so Yoda forms (e.g. `1 <= length(x)`) print + // the canonical equivalent (`length(x) >= 1`), matching what we matched. + if (is_empty_pattern) { + self->perf_warning("PERF017: 'length(x) {canon_op} {k}' — use 'empty(x)' (avoids unnecessary strlen on strings)", expr.at) + } elif (is_nonempty_pattern) { + self->perf_warning("PERF017: 'length(x) {canon_op} {k}' — use '!empty(x)' (avoids unnecessary strlen on strings)", expr.at) + } + } + + // --- PERF015 / PERF016: ternary min/max/abs --- + + def override preVisitExprOp3(expr : ExprOp3?) : void { + if (in_closure > 0 || expr.op != "?" + || expr.subexpr == null || !(expr.subexpr is ExprOp2)) { + return + } + var cmp = expr.subexpr as ExprOp2 + let cop = string(cmp.op) + if ((cop != "<" && cop != "<=" && cop != ">" && cop != ">=") + || expr.left == null || expr.right == null) { + return + } + let t_branch = expr.left + let f_branch = expr.right + // PERF016 first — a ternary that matches abs may also superficially match + // min/max if x and -x ever describe-equal, but they can't. So order is fine. + if (self->try_perf016_abs(expr, cmp, t_branch, f_branch, cop)) { + return + } + self->try_perf015_minmax(expr, cmp, t_branch, f_branch, cop) + } + + def try_perf015_minmax(expr : ExprOp3?; cmp : ExprOp2?; t_branch, f_branch : Expression? const; cop : string) : bool { + let cl = cmp.left + let cr = cmp.right + if (cl == null || cr == null) { + return false + } + // ternary branches must be a permutation of (cl, cr) + let tcl = self->expr_equal_struct(t_branch, cl) + let tcr = self->expr_equal_struct(t_branch, cr) + let fcl = self->expr_equal_struct(f_branch, cl) + let fcr = self->expr_equal_struct(f_branch, cr) + let same_order = tcl && fcr // T==L, F==R + let swap_order = tcr && fcl // T==R, F==L + if (!same_order && !swap_order) { + return false + } + // Mapping + // < / <= : T==L,F==R → min; T==R,F==L → max + // > / >= : T==L,F==R → max; T==R,F==L → min + var is_min = false + if (cop == "<" || cop == "<=") { + is_min = same_order + } else { + is_min = swap_order + } + let which = is_min ? "min" : "max" + self->perf_warning("PERF015: ternary {which} — use '{which}(a, b)' from math module", expr.at) + return true + } + + def try_perf016_abs(expr : ExprOp3?; cmp : ExprOp2?; t_branch, f_branch : Expression? const; cop : string) : bool { + // One side of the comparison must be const-zero + var x_on_left = false + if (self->is_const_zero(cmp.right)) { + x_on_left = true + } elif (self->is_const_zero(cmp.left)) { + x_on_left = false + } else { + return false + } + let x_expr = x_on_left ? cmp.left : cmp.right + if (x_expr == null) { + return false + } + // Identify negated branch (-x) and plain branch (x) + var neg_then = false + var ok = false + if (self->is_neg_of(t_branch, x_expr) && self->expr_equal_struct(f_branch, x_expr)) { + neg_then = true + ok = true + } elif (self->expr_equal_struct(t_branch, x_expr) && self->is_neg_of(f_branch, x_expr)) { + neg_then = false + ok = true + } + if (!ok) { + return false + } + // Determine canonical sign of x in comparison: x < 0 or x > 0 etc. + // Reduce to: "is x's value-relation `negative` or `positive`" in the true branch + // Forms that produce abs: + // x < 0 → -x (T branch is -x, x is on left) — neg_then=true , x_on_left=true , cop in {<, <=} + // x > 0 → x (T branch is x, x is on left) — neg_then=false, x_on_left=true , cop in {>, >=} + // 0 > x → -x — neg_then=true , x_on_left=false, cop in {>, >=} + // 0 < x → x — neg_then=false, x_on_left=false, cop in {<, <=} + var is_abs = false + if (x_on_left) { + if ((cop == "<" || cop == "<=") && neg_then) { + is_abs = true + } elif ((cop == ">" || cop == ">=") && !neg_then) { + is_abs = true + } + } else { + if ((cop == ">" || cop == ">=") && neg_then) { + is_abs = true + } elif ((cop == "<" || cop == "<=") && !neg_then) { + is_abs = true + } + } + if (!is_abs) { + return false + } + self->perf_warning("PERF016: ternary abs — use 'abs(x)' from math module", expr.at) + return true + } + + def is_neg_of(maybe_neg : Expression? const; ref : Expression? const) : bool { + //! True when `maybe_neg` is `-ref` structurally. + if (maybe_neg == null || !(maybe_neg is ExprOp1)) { + return false + } + let op1 = maybe_neg as ExprOp1 + if (op1.op != "-") { + return false + } + return self->expr_equal_struct(op1.subexpr, ref) } // --- PERF011: unnecessary get_ptr() for field access --- @@ -644,8 +1069,8 @@ class PerfLintVisitor : AstVisitor { } } } - let in_conditional = length(if_depth_stack) > 0 && if_depth_stack[length(if_depth_stack) - 1] > 0 - let has_early_exit = length(loop_has_early_exit) > 0 && loop_has_early_exit[length(loop_has_early_exit) - 1] + let in_conditional = !empty(if_depth_stack) && if_depth_stack[length(if_depth_stack) - 1] > 0 + let has_early_exit = !empty(loop_has_early_exit) && loop_has_early_exit[length(loop_has_early_exit) - 1] if (in_closure == 0 && known_length_loop_depth > 0 && !in_conditional && !has_early_exit) { if (self->is_array_func(expr, "push") || self->is_array_func(expr, "push_clone") || self->is_array_func(expr, "emplace")) { var path = "" diff --git a/daslib/style_lint.das b/daslib/style_lint.das index 4b5ed096d0..5a5986409d 100644 --- a/daslib/style_lint.das +++ b/daslib/style_lint.das @@ -24,6 +24,10 @@ module style_lint shared private //! STYLE013 — struct var with default/empty init followed by a run of field assignments //! STYLE014 — comment block exceeds 3 lines at module/public scope (opt-in via ``options _comment_hygiene = true``, suppress with '//!@nolint' or '// nolint:STYLE014') //! STYLE015 — comment block exceeds 1 line inside a 'def private' (opt-in via ``options _comment_hygiene = true``, suppress with '// nolint:STYLE015') +//! STYLE016 — adjacent guards leading to identical early-exit can be combined with '||' +//! STYLE017 — 'if (cond) return true; else return false' — use 'return cond' (or 'return !cond') +//! STYLE018 — redundant boolean comparison ('b == true' / 'b != false') — use 'b' / '!b' +//! STYLE019 — nested min/max — use 'clamp(x, lo, hi)' from math module require daslib/ast_boost require strings @@ -76,6 +80,10 @@ class StyleLintVisitor : AstVisitor { } def style_warning(text : string; at : LineInfo) : void { + // Suppress warnings for macro-generated functions — the user didn't write that code. + if (current_function != null && current_function.flags.generated) { + return + } let key = self->location_key(at) if (reported_locations |> has_value(key)) { return @@ -86,7 +94,7 @@ class StyleLintVisitor : AstVisitor { } warning_count++ var msg = text - if (current_function != null && current_function.fromGeneric != null && length(current_function.inferStack) > 0) { + if (current_function != null && current_function.fromGeneric != null && !empty(current_function.inferStack)) { msg = build_string() $(var w) { w |> write(text) w |> write("\n while compiling {current_function.name}") @@ -233,6 +241,45 @@ class StyleLintVisitor : AstVisitor { def override preVisitExprCall(var expr : ExprCall?) : void { self->check_block_pipe(expr.at, expr.arguments) + self->check_style019_clamp(expr) + } + + // --- STYLE019: nested min/max → clamp --- + + def is_math_call(expr : Expression? const; fname : string) : bool { + //! True iff `expr` is `math::(_, _)` with exactly two arguments + //! (peeling ExprRef2Value). Conservatively skips any non-math overload. + let inner = self->peel_ref2value_const(expr) + if (inner == null || !(inner is ExprCall)) { + return false + } + let call = inner as ExprCall + if (call.func == null || call.func.name != fname + || call.func._module.name != "math") { + return false + } + return length(call.arguments) == 2 + } + + def check_style019_clamp(expr : ExprCall?) : void { + if (expr.func == null || expr.func._module.name != "math" + || length(expr.arguments) != 2) { + return + } + let outer_name = string(expr.func.name) + if (outer_name != "min" && outer_name != "max") { + return + } + let inner_name = outer_name == "min" ? "max" : "min" + // min(max(x, lo), hi) — inner is arg[0] + // max(min(x, hi), lo) — inner is arg[0] (mirror) + if (self->is_math_call(expr.arguments[0], inner_name)) { + self->style_warning("STYLE019: nested {outer_name}({inner_name}(...)) — use 'clamp(x, lo, hi)' from math module", expr.at) + return + } + if (self->is_math_call(expr.arguments[1], inner_name)) { + self->style_warning("STYLE019: nested {outer_name}({inner_name}(...)) — use 'clamp(x, lo, hi)' from math module", expr.at) + } } def override preVisitExprInvoke(var expr : ExprInvoke?) : void { @@ -290,16 +337,14 @@ class StyleLintVisitor : AstVisitor { return } var eblk = blk as ExprBlock - if (!eblk.blockFlags.isClosure || eblk.blockFlags.isLambdaBlock || length(eblk.arguments) != 0) { + if (!eblk.blockFlags.isClosure || eblk.blockFlags.isLambdaBlock || !empty(eblk.arguments)) { return } let src = self->source_line_between(expr.at, eblk.at) let dollar_pos = find(src, "$()") - if (dollar_pos < 0) { - return - } - // Skip if <| pipe is present — STYLE002 already covers that case - if (find(src, "<|") >= 0) { + // Skip when there is no $() at all, or when <| pipe is present + // (STYLE002 already covers the piped form). + if (dollar_pos < 0 || find(src, "<|") >= 0) { return } // Skip if explicit return type follows: $() : Type { ... } @@ -311,18 +356,137 @@ class StyleLintVisitor : AstVisitor { } // --- STYLE006: string(__rtti) == "..." should use `is` --- + // --- STYLE018: b == true / b == false / b != true / b != false --- - def override preVisitExprOp2(expr : ExprOp2?) : void { - if (expr.op != "==") { - return + def peel_ref2value_const(e : Expression? const) : Expression? const { + if (e != null && e is ExprRef2Value) { + return (e as ExprRef2Value.subexpr) + } + return e + } + + def expr_equal_struct(a : Expression? const; b : Expression? const) : bool { + //! Structural equality via describe(). No purity filter. + if (a == null || b == null) { + return false } - if (self->is_string_rtti_comparison(expr.left) || self->is_string_rtti_comparison(expr.right)) { - self->style_warning("STYLE006: string(__rtti) comparison should use `is` operator; e.g. expr is ExprFoo", expr.at) + return describe(a) == describe(b) + } + + def override preVisitExprOp2(expr : ExprOp2?) : void { + if (expr.op == "==") { + if (self->is_string_rtti_comparison(expr.left) || self->is_string_rtti_comparison(expr.right)) { + self->style_warning("STYLE006: string(__rtti) comparison should use `is` operator; e.g. expr is ExprFoo", expr.at) + } + } + // STYLE018: b == true / b == false / b != true / b != false + if (expr.op == "==" || expr.op == "!=") { + let lhs = self->peel_ref2value_const(expr.left) + let rhs = self->peel_ref2value_const(expr.right) + let lbool = lhs != null && lhs is ExprConstBool + let rbool = rhs != null && rhs is ExprConstBool + // Avoid double-flagging `true == false` on both sides — flag only when + // exactly one side is a bool literal (the other side is the bool var/expr). + if (lbool != rbool) { + let const_side = lbool ? lhs : rhs + let cval = (const_side as ExprConstBool).value + let positive = (expr.op == "==" && cval) || (expr.op == "!=" && !cval) + let suggestion = positive ? "b" : "!b" + self->style_warning("STYLE018: redundant boolean comparison — use '{suggestion}' directly (drop the '{expr.op} {cval}')", expr.at) + } } } // --- STYLE005: postfix conditionals (configurable) --- // --- STYLE010: if (true) should be bare block --- + // --- STYLE016(b): if (a) { return X } else if (b) { return X } --- + // --- STYLE017(a): if (cond) return true else return false --- + + def single_terminator_body(body : Expression? const) : Expression? const { + //! If `body` is a bare ExprReturn/Break/Continue, or a 1-statement ExprBlock + //! wrapping one, return the inner terminator. Otherwise null. + if (body == null) { + return null + } + if (body is ExprReturn || body is ExprBreak || body is ExprContinue) { + return body + } + if (body is ExprBlock) { + let blk = body as ExprBlock + if (length(blk.list) == 1 && empty(blk.finalList)) { + let stmt = blk.list[0] + if (stmt is ExprReturn || stmt is ExprBreak || stmt is ExprContinue) { + return stmt + } + } + } + return null + } + + def terminators_payload_equal(a : Expression? const; b : Expression? const) : bool { + //! Two terminators have the same payload iff they are the same kind and + //! their return-subexpr (if any) is structurally equal. Break==Break and + //! Continue==Continue have no payload. + if (a == null || b == null) { + return false + } + if (a is ExprBreak) { + return b is ExprBreak + } + if (a is ExprContinue) { + return b is ExprContinue + } + if (a is ExprReturn) { + if (!(b is ExprReturn)) { + return false + } + let ra = a as ExprReturn + let rb = b as ExprReturn + if (ra.subexpr == null && rb.subexpr == null) { + return true + } + if (ra.subexpr == null || rb.subexpr == null) { + return false + } + return self->expr_equal_struct(ra.subexpr, rb.subexpr) + } + return false + } + + def is_const_bool_return(term : Expression? const; var b_out : bool&) : bool { + //! True if `term` is `return ExprConstBool(b)`. Writes the bool to b_out. + if (term == null || !(term is ExprReturn)) { + return false + } + let ret = term as ExprReturn + let sub = self->peel_ref2value_const(ret.subexpr) + if (sub == null || !(sub is ExprConstBool)) { + return false + } + b_out = (sub as ExprConstBool).value + return true + } + + def unwrap_inner_ifte(if_false : Expression? const) : ExprIfThenElse? { + //! Strip a 1-stmt ExprBlock wrapping an ExprIfThenElse, returning the inner if. + //! Otherwise return the if_false directly cast to ExprIfThenElse?, or null. + if (if_false == null) { + return null + } + if (if_false is ExprIfThenElse) { + return (if_false as ExprIfThenElse) + } + if (if_false is ExprBlock) { + let blk = if_false as ExprBlock + if (length(blk.list) == 1 && empty(blk.finalList)) { + let stmt = blk.list[0] + if (stmt is ExprIfThenElse) { + return (stmt as ExprIfThenElse) + } + } + } + return null + } def override preVisitExprIfThenElse(ifte : ExprIfThenElse?) : void { if (ifte.if_flags.isStatic) { @@ -336,18 +500,33 @@ class StyleLintVisitor : AstVisitor { self->style_warning("STYLE010: if (true) is always taken; use a bare block instead", ifte.at) } } - if (postfix_conditionals && ifte.if_false == null) { - var then_expr = ifte.if_true - if (then_expr is ExprBlock) { - var blk = then_expr as ExprBlock - if (length(blk.list) == 1 && length(blk.finalList) == 0) { - var stmt = blk.list[0] - if (stmt is ExprReturn || stmt is ExprBreak || stmt is ExprContinue) { - // Skip if already on one line (already postfix or one-liner) - if (ifte.at.line != stmt.at.line) { - self->style_warning("STYLE005: single-statement if can use postfix form; e.g. return val if (cond)", ifte.at) - } - } + // Detect terminator in the `then` branch — used by STYLE005, STYLE016(b), STYLE017(a) + let outer_term = self->single_terminator_body(ifte.if_true) + if (postfix_conditionals && ifte.if_false == null && outer_term != null) { + // Reproduce STYLE005's "skip if already on one line" check + if (ifte.at.line != outer_term.at.line) { + self->style_warning("STYLE005: single-statement if can use postfix form; e.g. return val if (cond)", ifte.at) + } + } + // STYLE016(b): if (a) { return X } else if (b) { return X } + if (outer_term != null && ifte.if_false != null) { + let inner = self->unwrap_inner_ifte(ifte.if_false) + if (inner != null && inner.if_false == null) { + let inner_term = self->single_terminator_body(inner.if_true) + if (inner_term != null && self->terminators_payload_equal(outer_term, inner_term)) { + self->style_warning("STYLE016: adjacent guards leading to identical early-exit can be combined with '||'", ifte.at) + } + } + } + // STYLE017(a): if (cond) return true else return false (or vice versa) + if (outer_term != null && ifte.if_false != null) { + let else_term = self->single_terminator_body(ifte.if_false) + if (else_term != null) { + var b1 = false + var b2 = false + if (self->is_const_bool_return(outer_term, b1) && self->is_const_bool_return(else_term, b2) && b1 != b2) { + let suggestion = b1 ? "return cond" : "return !cond" + self->style_warning("STYLE017: 'if (cond) return {b1}; else return {b2}' — use '{suggestion}' directly", ifte.at) } } } @@ -370,8 +549,48 @@ class StyleLintVisitor : AstVisitor { return (inner as ExprVar.variable) } + // --- STYLE016(a): adjacent guards `if (a) { return X }; if (b) { return X }` --- + // --- STYLE017(b): adjacent `if (cond) { return b1 }; return b2` (b1 != b2) --- + + def override preVisitExprBlock(blk : ExprBlock?) : void { + let n = length(blk.list) + for (i in range(n - 1)) { + let cur = blk.list[i] + let nxt = blk.list[i + 1] + if (!(cur is ExprIfThenElse)) { + continue + } + let outer_if = cur as ExprIfThenElse + if (outer_if.if_false != null) { + continue + } + let outer_term = self->single_terminator_body(outer_if.if_true) + if (outer_term == null) { + continue + } + // STYLE016(a): adjacent guard with same payload + if (nxt is ExprIfThenElse) { + let nxt_if = nxt as ExprIfThenElse + if (nxt_if.if_false == null) { + let nxt_term = self->single_terminator_body(nxt_if.if_true) + if (nxt_term != null && self->terminators_payload_equal(outer_term, nxt_term)) { + self->style_warning("STYLE016: adjacent guards leading to identical early-exit can be combined with '||'", outer_if.at) + continue + } + } + } + // STYLE017(b): adjacent return-true-then-return-false + var b1 = false + var b2 = false + if (self->is_const_bool_return(outer_term, b1) && self->is_const_bool_return(nxt, b2) && b1 != b2) { + let suggestion = b1 ? "return cond" : "return !cond" + self->style_warning("STYLE017: 'if (cond) return {b1}; return {b2}' — use '{suggestion}' directly", outer_if.at) + } + } + } + def override preVisitExprBlockExpression(blk : ExprBlock?; expr : ExpressionPtr) : void { - if (length(pending_uninit_vars) > 0) { + if (!empty(pending_uninit_vars)) { if (expr is ExprCopy) { let v = self->find_assign_var((expr as ExprCopy).left) if (v != null && pending_uninit_vars |> has_value(v)) { @@ -415,25 +634,18 @@ class StyleLintVisitor : AstVisitor { return } for (v in elet.variables) { - if (v.init != null) { - continue - } - if (v.flags.generated || v.flags.inScope) { - continue - } - if (v._type == null || v._type.baseType != Type.tArray) { - continue - } - if (length(v._type.dim) != 0) { + // Only flag uninitialized array locals from user code (skip generic + // host instantiations and compiler-synthesized vars). + if (v.init != null + || v.flags.generated || v.flags.inScope + || v._type == null || v._type.baseType != Type.tArray + || !empty(v._type.dim)) { continue } var count = 0 for (j in range(idx + 1, n)) { let stmt = blk.list[j] - if (!(stmt is ExprCall)) { - break - } - if (!self->is_push_or_emplace_of(stmt as ExprCall, v)) { + if (!(stmt is ExprCall) || !self->is_push_or_emplace_of(stmt as ExprCall, v)) { break } count++ @@ -462,16 +674,10 @@ class StyleLintVisitor : AstVisitor { return } for (v in elet.variables) { - if (v.flags.generated || v.flags.inScope) { - continue - } - if (v._type == null || v._type.baseType != Type.tStructure) { - continue - } - if (length(v._type.dim) != 0) { - continue - } - if (!self->is_default_struct_init(v)) { + // Skip generic-host instantiations and any non-struct or non-default-init local + if (v.flags.generated || v.flags.inScope + || v._type == null || v._type.baseType != Type.tStructure + || !empty(v._type.dim) || !self->is_default_struct_init(v)) { continue } var count = 0 @@ -524,13 +730,9 @@ class StyleLintVisitor : AstVisitor { } def is_push_or_emplace_of(call : ExprCall?; target : Variable?) : bool { - if (call.func == null || call.func.fromGeneric == null) { - return false - } - if (call.func.fromGeneric.name != "push" && call.func.fromGeneric.name != "emplace") { - return false - } - if (length(call.arguments) < 2) { + if (call.func == null || call.func.fromGeneric == null + || (call.func.fromGeneric.name != "push" && call.func.fromGeneric.name != "emplace") + || length(call.arguments) < 2) { return false } var arg = call.arguments[0] @@ -557,13 +759,9 @@ class StyleLintVisitor : AstVisitor { var fn_ends : array var fn_private : array for_each_function(mod, "") $(var fn) { - if (fn.body == null || fn.at.fileInfo == null || fn.at.line == 0u) { - return - } - if (fn.flags.generated || fn.fromGeneric != null || fn._module != mod) { - return - } - if (fn.moreFlags.isTemplate) { + if (fn.body == null || fn.at.fileInfo == null || fn.at.line == 0u + || fn.flags.generated || fn.fromGeneric != null + || fn._module != mod || fn.moreFlags.isTemplate) { return } let fname = string(fn.at.fileInfo.name) @@ -586,10 +784,8 @@ class StyleLintVisitor : AstVisitor { fn_private |> push(fn.flags.privateFunction) } for_each_structure(mod) $(var st) { - if (st.at.fileInfo == null || st.at.line == 0u) { - return - } - if (st.flags.generated || st._module != mod || st.flags.isTemplate) { + if (st.at.fileInfo == null || st.at.line == 0u + || st.flags.generated || st._module != mod || st.flags.isTemplate) { return } var is_template_instance = false @@ -632,10 +828,7 @@ class StyleLintVisitor : AstVisitor { } } for_each_global(mod) $(var v) { - if (v.at.fileInfo == null || v.at.line == 0u) { - return - } - if (v.flags.generated) { + if (v.at.fileInfo == null || v.at.line == 0u || v.flags.generated) { return } let fname = string(v.at.fileInfo.name) @@ -672,17 +865,12 @@ class StyleLintVisitor : AstVisitor { } var seen : table for_each_function(mod, "") $(var fn) { - if (fn.at.fileInfo == null || fn._module != mod) { - return - } - if (fn.flags.generated || fn.fromGeneric != null || fn.moreFlags.isTemplate) { + if (fn.at.fileInfo == null || fn._module != mod + || fn.flags.generated || fn.fromGeneric != null || fn.moreFlags.isTemplate) { return } let fname = string(fn.at.fileInfo.name) - if (seen |> key_exists(fname)) { - return - } - if (!file_first_decl |> key_exists(fname)) { + if ((seen |> key_exists(fname)) || !file_first_decl |> key_exists(fname)) { return } seen |> insert(fname, true) @@ -691,10 +879,8 @@ class StyleLintVisitor : AstVisitor { fn_file, fn_starts, fn_ends, fn_private) } for_each_structure(mod) $(var st) { - if (st.at.fileInfo == null || st._module != mod) { - return - } - if (st.flags.generated || st.flags.isTemplate) { + if (st.at.fileInfo == null || st._module != mod + || st.flags.generated || st.flags.isTemplate) { return } var is_template_instance = false @@ -705,10 +891,7 @@ class StyleLintVisitor : AstVisitor { return } let fname = string(st.at.fileInfo.name) - if (seen |> key_exists(fname)) { - return - } - if (!file_first_decl |> key_exists(fname)) { + if ((seen |> key_exists(fname)) || !file_first_decl |> key_exists(fname)) { return } seen |> insert(fname, true) @@ -721,10 +904,7 @@ class StyleLintVisitor : AstVisitor { return } let fname = string(en.at.fileInfo.name) - if (seen |> key_exists(fname)) { - return - } - if (!file_first_decl |> key_exists(fname)) { + if ((seen |> key_exists(fname)) || !file_first_decl |> key_exists(fname)) { return } seen |> insert(fname, true) @@ -737,10 +917,7 @@ class StyleLintVisitor : AstVisitor { return } let fname = string(v.at.fileInfo.name) - if (seen |> key_exists(fname)) { - return - } - if (!file_first_decl |> key_exists(fname)) { + if ((seen |> key_exists(fname)) || !file_first_decl |> key_exists(fname)) { return } seen |> insert(fname, true) @@ -753,10 +930,7 @@ class StyleLintVisitor : AstVisitor { return } let fname = string(value.at.fileInfo.name) - if (seen |> key_exists(fname)) { - return - } - if (!file_first_decl |> key_exists(fname)) { + if ((seen |> key_exists(fname)) || !file_first_decl |> key_exists(fname)) { return } seen |> insert(fname, true) @@ -831,13 +1005,11 @@ class StyleLintVisitor : AstVisitor { first_line_text : string; fn_file : array; fn_starts, fn_ends : array; fn_private : array) : void { - if (line_count <= 1) { - return - } - if (block_start < first_decl_line) { - return - } - if (self->has_nolint_directive(first_line_text)) { + // Skip when the block is too short, lives before the first AST decl + // (module-leading docstring), or carries an explicit @nolint marker. + if (line_count <= 1 + || block_start < first_decl_line + || self->has_nolint_directive(first_line_text)) { return } var enclosing_priv = false @@ -888,7 +1060,7 @@ class StyleLintVisitor : AstVisitor { } return false if (inner == null || !(inner is ExprCall)) var call = inner as ExprCall - if (call.name != "string" || length(call.arguments) < 1) { + if (call.name != "string" || empty(call.arguments)) { return false } var arg = call.arguments[0] diff --git a/doc/source/reference/language/lint.rst b/doc/source/reference/language/lint.rst index 19ad40e398..1009a367a1 100644 --- a/doc/source/reference/language/lint.rst +++ b/doc/source/reference/language/lint.rst @@ -501,6 +501,111 @@ string reference. pos = find(s, "foo") } +PERF013 — ``a += 1`` / ``a -= 1`` should be ``a++`` / ``a--`` +============================================================= + +``a += 1`` lowers to a 2-node read-modify-write in interpreted mode. The +postfix ``++`` / ``--`` collapses to a single ``SimNode_op1`` and reads as +the canonical inc/dec idiom. Applies to the six numeric workhorse scalars +(``int``, ``uint``, ``int64``, ``uint64``, ``float``, ``double``); vectors +(``int2``, ``float3``, …) do **not** support ``++``/``--`` so they are +skipped. ``+= -1`` is also flagged (same effect as ``-= 1``). + +.. code-block:: das + + // Bad + a += 1 // PERF013 + a -= 1 // PERF013 + a += -1 // PERF013 + + // Good + a ++ + a -- + +PERF014 — closed-interval char-class range check +================================================== + +Hand-rolled ranges like ``c >= 'a' && c <= 'z'`` reimplement +``strings::is_alpha``/``is_alnum``/``is_number``/``is_white_space``/etc. +The helper functions read clearer and centralise locale/codepoint +behaviour. Only three closed ranges are flagged: + +* ``'0'..'9'`` (48..57) — ``is_number`` +* ``'a'..'z'`` (97..122) — ``is_alpha`` lower half +* ``'A'..'Z'`` (65..90) — ``is_alpha`` upper half + +The hex extras ``'a'..'f'`` / ``'A'..'F'`` are deliberately **not** +flagged — ``is_hex`` is broader. Open intervals (``c > '0' && c < '9'``) +have different endpoints, so they are also skipped. + +.. code-block:: das + + // Bad + if (c >= 'a' && c <= 'z') { ... } // PERF014 + if (c >= 48 && c <= 57) { ... } // PERF014 (raw int form) + + // Good + if (is_alpha(c)) { ... } + if (is_number(c)) { ... } + +PERF015 — ternary min / max +============================ + +``a < b ? a : b`` reimplements ``min(a, b)``. The math builtins are +vec-friendly and the intent is clearer. All eight orientations of +``< / <= / > / >=`` × ``T==L,F==R`` / ``T==R,F==L`` are flagged. + +.. code-block:: das + + // Bad + let smaller = a < b ? a : b // PERF015 — min + let larger = a > b ? a : b // PERF015 — max + + // Good + let smaller = min(a, b) + let larger = max(a, b) + +PERF016 — ternary abs +====================== + +``x < 0 ? -x : x`` reimplements ``abs(x)``. ``abs`` exists for every +signed numeric type. Only the four orientations that match ``abs`` are +flagged; the negabs shape (``x < 0 ? x : -x``) is **not** — it is a +different function. + +.. code-block:: das + + // Bad + let positive = x < 0 ? -x : x // PERF016 + let positive_alt = x > 0 ? x : -x // PERF016 + + // Good + let positive = abs(x) + +PERF017 — ``length(s) == 0`` should be ``empty(s)`` +==================================================== + +For strings, ``length`` walks the whole string (``strlen``); ``empty`` +checks one byte. For arrays/tables both are O(1) but ``empty`` is the +idiomatic form. Six comparison ops are mapped to either ``empty(x)`` or +``!empty(x)``: + +* ``length(x) == 0``, ``length(x) <= 0``, ``length(x) < 1`` → ``empty(x)`` +* ``length(x) != 0``, ``length(x) > 0``, ``length(x) >= 1`` → ``!empty(x)`` + +Vector magnitude (``length(float3_var)`` from the math module) is **not** +flagged — different semantics, no ``empty`` for vectors. + +.. code-block:: das + + // Bad + if (length(s) == 0) { ... } // PERF017 + if (length(arr) > 0) { ... } // PERF017 + + // Good + if (empty(s)) { ... } + if (!empty(arr)) { ... } + .. _style_lint: ----------- @@ -756,6 +861,92 @@ block. ... } +STYLE016 — adjacent guards leading to identical early-exit +============================================================ + +Two adjacent ``if`` guards with the same exit (``return`` with the same +payload, or ``break``/``continue``) read as one decision. Combine them +with ``||``. Two AST shapes are detected: + +* two adjacent ``if (a) { return X }`` statements in the same block +* the ``if (a) { return X } else if (b) { return X }`` chain + +.. code-block:: das + + // Bad + if (name == "." || name == "..") { // STYLE016 + return + } + if (name |> starts_with("_")) { + return + } + + // Good + if (name == "." || name == ".." || name |> starts_with("_")) { + return + } + +STYLE017 — ``if (cond) return true; else return false`` should be ``return cond`` +================================================================================== + +Three lines (or two if-else branches) that just propagate the boolean +condition unchanged. Read better as a single ``return``. Detection covers +both forms: + +* ``if (cond) return b1 else return b2`` (b1 ≠ b2) +* ``if (cond) return b1`` immediately followed by ``return b2`` (b1 ≠ b2) + +.. code-block:: das + + // Bad + if (cond) { // STYLE017 + return true + } else { + return false + } + + // Good + return cond + + // Good (negated) + return !cond + +STYLE018 — redundant boolean comparison +======================================== + +Comparing a bool to a boolean literal is redundant — the bool already IS +the value. Drop the comparison. Both Yoda forms (``true == flag``) are +detected. + +.. code-block:: das + + // Bad + if (flag == true) { ... } // STYLE018 + if (flag != false) { ... } // STYLE018 + if (flag == false) { ... } // STYLE018 + if (true == flag) { ... } // STYLE018 (Yoda) + + // Good + if (flag) { ... } + if (!flag) { ... } + +STYLE019 — nested ``min(max(...))`` should be ``clamp(...)`` +============================================================== + +``min(max(x, lo), hi)`` reads as a clamp; the math builtin says so +directly. Both orientations (and the mirror form) are detected — the +inner call must resolve to the math module's ``min`` / ``max``, not a +user overload. + +.. code-block:: das + + // Bad + let bounded = min(max(x, lo), hi) // STYLE019 + let bounded_alt = max(min(x, hi), lo) // STYLE019 (mirror) + + // Good + let bounded = clamp(x, lo, hi) + ----- Tests ----- diff --git a/mouse-data/docs/when-to-consult-mouse.md b/mouse-data/docs/when-to-consult-mouse.md new file mode 100644 index 0000000000..3a9a353aef --- /dev/null +++ b/mouse-data/docs/when-to-consult-mouse.md @@ -0,0 +1,22 @@ +--- +slug: when-to-consult-mouse +title: When should I consult blind-mouse? +created: 2026-05-08 +last_verified: 2026-05-08 +links: [] +--- + +Reach for `mouse__ask` for **how-do-I** / **what's-the-pattern-for** / **why-does-X-behave-Y** shaped questions — the long-tail "discovered facts" that don't live in `skills/CLAUDE.md` and aren't direct symbol lookups. + +Do NOT use the mouse for: +- Symbol definitions / call sites — use the daslang MCP (`find_symbol`, `grep_usage`, `find_references`). +- Categorical conventions (gen2 syntax, formatting, build flags) — these belong in `skills/*.md` or CLAUDE.md. +- Project state (in-progress work, branch status, who's doing what) — use git, the issue tracker, or session memory. + +If a question genuinely doesn't fit any of those slots, ask the mouse first. If the answer is missing, do the research, then `mouse__add` it back so the next session doesn't redo the work. + +## Questions +- When should I consult blind-mouse? +- mouse vs grep +- when to use mouse +- when to ask mouse vs MCP vs skills diff --git a/mouse-data/docs/why-does-expr-left-give-me-an-expression-const-i-can-t-reassign-or-pass-to-a-non-const-helper.md b/mouse-data/docs/why-does-expr-left-give-me-an-expression-const-i-can-t-reassign-or-pass-to-a-non-const-helper.md new file mode 100644 index 0000000000..ff93a2cb8a --- /dev/null +++ b/mouse-data/docs/why-does-expr-left-give-me-an-expression-const-i-can-t-reassign-or-pass-to-a-non-const-helper.md @@ -0,0 +1,47 @@ +--- +slug: why-does-expr-left-give-me-an-expression-const-i-can-t-reassign-or-pass-to-a-non-const-helper +title: Why does `expr.left` give me an `Expression? const` I can't reassign or pass to a non-const helper? +created: 2026-05-08 +last_verified: 2026-05-08 +links: [] +--- + +When a function parameter is **not** declared `var`, gen2 propagates const through every accessor. So in `def override preVisitExprOp2(expr : ExprOp2?)`, the type of `expr.left` is `Expression? const` — a constant pointer. Two failure modes: + +1. **Helper signature mismatch**: passing `expr.left` to `def helper(e : Expression?)` fails with `error[30187]: incompatible argument 2 ... ast_core::Expression const?& vs ast_core::Expression? const`. +2. **Local reassignment**: `var x = expr.left ; x = (x as ExprRef2Value.subexpr)` fails with `error[30915]: can only copy compatible type ; ... -const = ... const&`. + +Two fixes, picked by what the call site needs: + +``` +// (a) If the helper only READS the expr, declare its param as `Expression? const`: +def expr_equal_struct(a : Expression? const ; b : Expression? const) : bool { ... } + +// (b) If the *visitor* needs mutable access (writing to fields), take the +// override param as `var`. Then expr.left is non-const all the way down: +def override preVisitExprOp2(var expr : ExprOp2?) : void { ... } +``` + +For the local-reassignment case, prefer chaining helpers that take/return `Expression? const`: + +``` +def peel_ref2value(e : Expression? const) : Expression? const { + if (e != null && e is ExprRef2Value) { + return (e as ExprRef2Value.subexpr) + } + return e +} +let lhs = self->peel_ref2value(expr.left) // lhs is `Expression? const`, immutable +``` + +Avoid `unsafe(reinterpret(...))` to strip const — it works but it's the wrong tool. Reach for it only when you genuinely cannot make the parent `var`. + +Related: visitor override signatures (`preVisitExprOp2`, `preVisitExprIfThenElse`, etc.) are usually **non-`var`** by convention, even though daslib's own visitors sometimes use `var` selectively when they mutate. Match the existing pattern in the file you're editing. + +## Questions +- Why does `expr.left` give me an `Expression? const` I can't reassign or pass to a non-const helper? +- expr.left const propagation +- ast_core::Expression const?& vs Expression? const error 30187 +- why can't I assign expr.left to a local var +- visitor parameter const stripping AST +- error 30915 can only copy compatible type Expression diff --git a/mouse-data/docs/why-does-my-single-line-if-cond-return-x-get-a-parse-error.md b/mouse-data/docs/why-does-my-single-line-if-cond-return-x-get-a-parse-error.md new file mode 100644 index 0000000000..e64d49d804 --- /dev/null +++ b/mouse-data/docs/why-does-my-single-line-if-cond-return-x-get-a-parse-error.md @@ -0,0 +1,32 @@ +--- +slug: why-does-my-single-line-if-cond-return-x-get-a-parse-error +title: Why does my single-line `if (cond) { return X }` get a parse error? +created: 2026-05-08 +last_verified: 2026-05-08 +links: [] +--- + +gen2's grammar requires a statement separator (newline or `;`) before `}` in an `if`-body. A bare `if (cond) { return false }` on one line fails with `error[30151]: syntax error, unexpected '}', expecting if` — the `if` it's expecting is the postfix-conditional form (`return X if (cond)`), and `}` is neither that nor a newline. + +Two fixes: + +``` +// Multiline (canonical): +if (cond) { + return false +} + +// Same-line with explicit terminator: +if (cond) { return false; } +``` + +The same caveat applies to `if`-`elif` chains: `} elif (cond)` works on a single line because `}` is followed by another keyword, but `if (cond) { stmt }\nelif (cond) { ... }` (newline before `elif`) does not — gen2 wants `}` and `elif` adjacent. + +The `||` / `&&` line-continuation rule is related but separate: `return a\n || b` doesn't parse at statement level (CLAUDE.md says newlines are only free inside `(...)` / `[...]` / `{...}`). Wrap the RHS in parens to break across lines. + +## Questions +- Why does my single-line `if (cond) { return X }` get a parse error? +- gen2 single-line if return parse error +- error 30151 unexpected } expecting if +- why doesn't `if (x) { return false }` parse on one line +- gen2 if body needs newline before closing brace diff --git a/site/index.html b/site/index.html index d7e9751970..94ec852f01 100644 --- a/site/index.html +++ b/site/index.html @@ -97,6 +97,10 @@

News

    +
  • + May 8th, 2026: 0.6.2-RC2 is out! Its getting closer. + See the Change List for details, or grab the 0.6.2-RC2 pre-release. +
  • May 1st, 2026: 0.6.2 is just about ready. See the Change List, or grab the diff --git a/skills/make_pr.md b/skills/make_pr.md index 3e5d2e3d80..22c00c8225 100644 --- a/skills/make_pr.md +++ b/skills/make_pr.md @@ -240,6 +240,27 @@ CI's `extended_checks` job runs `./bin/Release/daslang ./das-fmt/dasfmt.das -- - **Before pushing:** mentally format named-arg constructor / call sites with spaces around `=`. If CI `extended_checks` fails on a format diff after MCP said "already formatted", fix the spacing and re-push (or amend, on a squashed branch). +## 5.5. Review the blind-mouse query log + +If you use blind-mouse, take 60 seconds to close the loop before pushing — work is final, you know what was learned, diff is locked. Skip if you don't keep a personal Q&A cache. + +```bash +bin/Release/daslang.exe utils/mouse/main.das -- log --misses +``` + +For each recent miss: +- **Did this PR (or your session research) answer it?** If yes — `mouse__add` it now (or `mouse add` from CLI). Next session won't redo the work. +- **Did you _almost_ ask mouse this session but didn't?** Try asking now — misses-you-skipped don't show up in `--misses`. If the work you just did has the answer, add it. + +```bash +bin/Release/daslang.exe utils/mouse/main.das -- log +``` + +For recent hits: +- **Did this PR invalidate a cached answer?** If yes, edit `mouse-data/docs/.md` directly and bump `last_verified` (or delete if no longer relevant). + +This is curation, not verification — the PR doesn't depend on it. Goal: keep the personal cache aligned with what just shipped. + ## 6. Create the PR Stage, commit, push, and create the PR using GitHub MCP tools or `gh` CLI. Follow the commit message conventions from the repository (see recent `git log` for style). @@ -259,6 +280,7 @@ Stage, commit, push, and create the PR using GitHub MCP tools or `gh` CLI. Follo | AOT tests | `test_aot.exe -use-aot dastest/dastest.das -- --use-aot --test tests` | Same as regular tests | | Docs | `das2rst.das` + stubs + Sphinx | Only if daslib/C++ bindings/RST changed | | Format | MCP `format_file` with comma-separated list or glob of changed `.das` files (single call) | Only changed files | +| Mouse log | `mouse log --misses` / `mouse log` | Optional. Add answers for misses, edit cached answers this PR invalidated | | `.md` stop | `git diff --name-only origin/master..HEAD \| grep '\.md$'` | If any match: STOP, list changes, ask user to review BEFORE push | | PR | GitHub MCP `create_pull_request` or `gh pr create` | — | | Review iter | Follow `skills/pr_review_iteration.md` | One round per Copilot pass; convergence in 1-3 rounds is normal | diff --git a/skills/perf_lint.md b/skills/perf_lint.md index ab50ce8bd4..7cef99766d 100644 --- a/skills/perf_lint.md +++ b/skills/perf_lint.md @@ -118,3 +118,14 @@ After compilation, `Expression._type` is resolved. Check `expr._type.baseType == | PERF010 | `get_ptr(x) == null` | Low | unnecessary; smart_ptr supports == null directly | | PERF011 | `get_ptr(x).field` | Low | unnecessary; smart_ptr auto-dereferences for field access | | PERF012 | `find(string(das_string), ...)` | Medium | unnecessary allocation; use `peek(das_string)` instead | +| PERF013 | `a += 1` / `a -= 1` (six numeric scalars) | Low | use postfix `a++` / `a--` (single SimNode, idiomatic) | +| PERF014 | closed-interval char-class range (`'0'..'9'` / `'a'..'z'` / `'A'..'Z'`) | Info | use `strings::is_alpha` / `is_alnum` / `is_number` | +| PERF015 | ternary min/max (`a < b ? a : b`) | Low | use `math::min(a, b)` / `max(a, b)` | +| PERF016 | ternary abs (`x < 0 ? -x : x`) | Low | use `math::abs(x)` (negabs `x < 0 ? x : -x` not flagged) | +| PERF017 | `length(x) == 0` / `> 0` / `>= 1` etc. | Medium | use `empty(x)` / `!empty(x)`; avoids strlen on strings | + +## Visitor gotchas + +- **`in_closure > 0` is NOT a useful guard in `preVisitExprOp2`** — `loop_depth` already doesn't increment inside closure bodies (`preVisitExprFor` / `While` gate on `in_closure == 0`), so PERF001's `loop_depth > 0` correctly excludes closure-internal loops without a separate skip. An `in_closure` early-return at the top of `preVisitExprOp2` hides syntactic patterns (PERF007/008/010/013/014/017) inside the natural `build_string() $(var w) { ... }` idiom and is a bug, not a feature. +- **Macro-generated functions need `current_function.flags.generated`-suppression** — `[CommandLineArgs]`-style codegen synthesizes AST that the user never wrote. Both `perf_warning` and `style_warning` should early-return when `current_function.flags.generated` is true. Otherwise warnings surface at the source-struct's line with no clear way to fix them. +- **Self-implementation suppression** — when a rule's suggested replacement is itself implemented in terms of the pattern (e.g. `empty(arr)`'s body is literally `length(arr) == 0`), gate the rule with `current_function.name == "" || (fromGeneric != null && fromGeneric.name == "")`. The generic-instantiation arm catches `empty`, `empty`, etc. diff --git a/skills/style_lint.md b/skills/style_lint.md index 54c2a4b849..028882caa7 100644 --- a/skills/style_lint.md +++ b/skills/style_lint.md @@ -28,6 +28,10 @@ The `style_lint` module detects non-idiomatic patterns in daslang code at compil | STYLE013 | `var a = Foo(); a.x = 1; a.y = 2` (or `var a : Foo` for `[safe_when_uninitialized]` structs), ≥ 2 contiguous field assignments | Use a named-argument constructor: `var a = Foo(x = 1, y = 2)`. Skipped when init is non-empty (`Foo(x=1)` then `a.y = 2` stays silent), when assignments are not contiguous, or for `inscope`/generated/generic-instantiation vars | | STYLE014 | `//` or `//!` comment block of more than 3 contiguous lines at module/public scope | Trim to a 1-line WHY (move design notes to a `.md` doc, not source). Module-leading docstring (block before any AST decl in the file) is always allowed. Suppress per-block with `//!@nolint` on first line of a `//!` block (also stripped from generated RST), or `// nolint:STYLE014` on first line of a `//` block. **Opt-in via `options _comment_hygiene = true`** (disabled by default). | | STYLE015 | `//` or `//!` comment block of more than 1 contiguous line inside a `def private` | Private symbols don't surface in any doc generator, so multi-line prose there is dead weight. Trim to one line, or suppress on first line with `// nolint:STYLE015`. **Opt-in via `options _comment_hygiene = true`** (disabled by default). | +| STYLE016 | adjacent `if (a) { return X }` / `if (b) { return X }` (or else-chained form) with the same payload | Combine with `\|\|`. Detection covers both `(a) two adjacent ExprIfThenElse statements` and `(b) if/else if chain`. Bare `break`/`continue` payloads count as equal; `return` payloads are structurally compared. | +| STYLE017 | `if (cond) return true; else return false` (and the inverse) | Use `return cond` / `return !cond`. Two AST shapes: if-else with bool-literal returns, and `if (cond) { return b1 }` followed immediately by `return b2` (b1 != b2). | +| STYLE018 | `b == true` / `b == false` / `b != true` / `b != false` (and Yoda forms) | Use `b` / `!b` directly. Skipped when both sides are bool literals (e.g. `true == true`). | +| STYLE019 | `min(max(x, lo), hi)` (and the `max(min(x, hi), lo)` mirror) | Use `clamp(x, lo, hi)` from math module. Inner/outer must resolve to math::min/max specifically, not user overloads. | Note: `get_ptr()` related patterns (null comparison, field access) are in `perf_lint` as PERF010/PERF011 since they have performance implications. @@ -110,4 +114,4 @@ bin/Release/daslang.exe utils/lint/main.das -- file1.das [file2.das ...] [--quie ## Known Limitations - **STYLE001-003 source detection**: Uses `get_file_source_line()` which reads one line at a time. Multi-line call expressions are handled by scanning from the call's line to the block's line. -- **`[lint_macro]` errors vs `expect`**: Style warnings emitted via `[lint_macro]` don't work with `expect` directives. Tests use the standalone runner instead. +- **`[lint_macro]` errors vs `expect`**: works (both for `expect 31208:N` and `expect 31209:N`). The earlier limitation note here was stale; STYLE016/017/018/019 tests under `utils/lint/tests/` validate exact warning counts via dastest exactly the same way PERF tests do. diff --git a/utils/CMakeLists.txt b/utils/CMakeLists.txt index ddc776bd42..4757aab219 100644 --- a/utils/CMakeLists.txt +++ b/utils/CMakeLists.txt @@ -76,6 +76,7 @@ SET(DAS_UTILS_TO_TEST detect-dupe find-dupe/tests hygiene + mouse/tests ) FOREACH(_das ${DAS_UTILS_TO_TEST}) diff --git a/utils/lint/main.das b/utils/lint/main.das index d1aadf25b3..783df317ed 100644 --- a/utils/lint/main.das +++ b/utils/lint/main.das @@ -112,10 +112,7 @@ def scan_das_files(path : string; var files : array; var cache : table starts_with("_") || is_skip_dir(name)) { + if (name == "." || name == ".." || name |> starts_with("_") || is_skip_dir(name)) { return } let full = "{path}/{name}" @@ -198,7 +195,7 @@ def main() : int { paths |> push(arg) } } - if (length(paths) == 0) { + if (empty(paths)) { print("Error: no files or directories specified\n\n") print_help(get_command_info(type), "utils/lint/main.das") return 1 @@ -213,7 +210,7 @@ def main() : int { for (p in paths) { scan_das_files(p, files, cache) } - if (length(files) == 0) { + if (empty(files)) { print("Error: no .das files found\n") return 1 } diff --git a/utils/lint/tests/perf013_inc_dec.das b/utils/lint/tests/perf013_inc_dec.das new file mode 100644 index 0000000000..9bb71fcb5d --- /dev/null +++ b/utils/lint/tests/perf013_inc_dec.das @@ -0,0 +1,124 @@ +options gen2 +// PERF013: a += 1 / a -= 1 should be a++ / a-- +// +// Problem: +// `a += 1` and `a -= 1` are 2-node operations in interpreted mode (read, +// add/sub, write). The postfix `++` / `--` collapses to a single SimNode. +// The +=/-= form also reads worse than the canonical inc/dec idiom. +// +// All four (op, ±RHS) combinations are flagged; the diagnostic suggests +// `++` for delta=+1 and `--` for delta=-1: +// +// a += 1 → ++ (delta +1) +// a -= 1 → -- (delta -1) +// a += -1 → -- (delta -1, same as a -= 1) +// a -= -1 → ++ (delta +1, same as a += 1) +// +// Bad pattern: +// a += 1 +// a -= 1 +// +// Good pattern: +// a++ +// a-- + +// 6 numeric scalar types × 2 ops × 2 RHS signs would explode; this file +// covers the int and float paths plus the four signed-only neg-one cases +// (uint/uint64 omit -1 since negative literals don't apply). +expect 31208:12 + +require daslib/perf_lint + +// --- Bad patterns: int (4 combos) --- + +def bad_int_plus_one() : int { + var a = 0 + a += 1 // PERF013 — suggests ++ + return a +} + +def bad_int_minus_one() : int { + var a = 0 + a -= 1 // PERF013 — suggests -- + return a +} + +def bad_int_plus_neg_one() : int { + var a = 0 + a += -1 // PERF013 — suggests -- (same as -= 1) + return a +} + +def bad_int_minus_neg_one() : int { + var a = 0 + a -= -1 // PERF013 — suggests ++ (same as += 1) + return a +} + +// --- Bad patterns: float (4 combos) --- + +def bad_float_plus_one() : float { + var x = 0.0 + x += 1.0 // PERF013 — suggests ++ + return x +} + +def bad_float_minus_one() : float { + var x = 0.0 + x -= 1.0 // PERF013 — suggests -- + return x +} + +def bad_float_plus_neg_one() : float { + var x = 0.0 + x += -1.0 // PERF013 — suggests -- + return x +} + +def bad_float_minus_neg_one() : float { + var x = 0.0 + x -= -1.0 // PERF013 — suggests ++ + return x +} + +// --- Bad patterns: other workhorse numeric types --- + +def bad_int64() : int64 { + var a = 0l + a += 1l // PERF013 — suggests ++ + return a +} + +def bad_uint() : uint { + var a = 0u + a += 1u // PERF013 — suggests ++ + return a +} + +def bad_uint64() : uint64 { + var a = 0ul + a += 1ul // PERF013 — suggests ++ + return a +} + +def bad_double() : double { + var x = 0.0lf + x += 1.0lf // PERF013 — suggests ++ + return x +} + +// --- Good patterns (no warnings) --- + +def good_postfix() : int { + var a = 0 + a ++ + a -- + return a +} + +def good_step_other() : int { + var a = 0 + a += 2 // step != 1 — no warning + a -= 5 + return a +} diff --git a/utils/lint/tests/perf014_char_class_range.das b/utils/lint/tests/perf014_char_class_range.das new file mode 100644 index 0000000000..0b0d6de112 --- /dev/null +++ b/utils/lint/tests/perf014_char_class_range.das @@ -0,0 +1,86 @@ +options gen2 +// PERF014: closed-interval char-class range checks +// +// Problem: +// Hand-written ranges like `c >= 'a' && c <= 'z'` reimplement the strings +// module's is_alpha/is_alnum/is_number/is_white_space/etc. helpers. Using +// the helper makes intent obvious and lets future codepoint widening +// propagate automatically. +// +// Bad pattern: +// if (c >= 'a' && c <= 'z') { ... } +// if (c >= 48 && c <= 57) { ... } // '0'..'9' as raw int +// +// Good pattern: +// if (is_alpha(c)) { ... } // covers a-z, A-Z (and your locale) +// if (is_number(c)) { ... } +// +// Hex extras ('a'..'f' / 'A'..'F') deliberately do NOT trigger — is_hex is +// broader and would change behavior for the digit half. + +expect 31208:5 + +require daslib/perf_lint +require strings + +// --- Bad patterns (PERF014) --- + +def bad_lower_alpha(c : int) : bool { + return c >= 'a' && c <= 'z' // PERF014 +} + +def bad_upper_alpha(c : int) : bool { + return c >= 'A' && c <= 'Z' // PERF014 +} + +def bad_digits_char(c : int) : bool { + return c >= '0' && c <= '9' // PERF014 +} + +def bad_digits_int(c : int) : bool { + return c >= 48 && c <= 57 // PERF014 (raw int form) +} + +def bad_yoda_form(c : int) : bool { + return 'a' <= c && c <= 'z' // PERF014 (mixed const-on-left) +} + +// --- Good patterns (no warnings) --- + +def good_is_alpha(c : int) : bool { + return is_alpha(c) +} + +def good_hex_lower(c : int) : bool { + return c >= 'a' && c <= 'f' // hex range — NOT flagged +} + +def good_hex_upper(c : int) : bool { + return c >= 'A' && c <= 'F' // hex range — NOT flagged +} + +def good_open_range(c : int) : bool { + return c > '0' && c < '9' // open interval — NOT flagged +} + +def good_partial_range(c : int) : bool { + return c >= 'a' && c <= 'b' // not a known class — NOT flagged +} + +def good_different_vars(a, b : int) : bool { + return a >= 'a' && b <= 'z' // two different vars — NOT flagged +} + +// Side-effectful subexpressions: replacing `f() >= 'a' && f() <= 'z'` with +// `is_alpha(f())` would call f() once instead of twice — silently changing +// semantics. The rule must NOT fire when operands aren't pure. + +[unused_argument(c)] +def impure_helper(c : int) : int { + print("called\n") // observable side effect + return c +} + +def good_impure_call(c : int) : bool { + return impure_helper(c) >= 'a' && impure_helper(c) <= 'z' // NOT flagged (impure) +} diff --git a/utils/lint/tests/perf015_ternary_minmax.das b/utils/lint/tests/perf015_ternary_minmax.das new file mode 100644 index 0000000000..b09f423097 --- /dev/null +++ b/utils/lint/tests/perf015_ternary_minmax.das @@ -0,0 +1,87 @@ +options gen2 +// PERF015: ternary min/max → min(a, b) / max(a, b) +// +// Problem: +// `a < b ? a : b` reimplements `min(a, b)`. Same for the four orientations. +// The math::min/max builtins are vec-friendly and the intent is clearer. +// +// Bad pattern: +// let smaller = a < b ? a : b +// let larger = a > b ? a : b +// +// Good pattern: +// let smaller = min(a, b) +// let larger = max(a, b) + +expect 31208:8 + +require daslib/perf_lint +require math + +// --- Bad patterns (PERF015) --- +// < / <= : T==L,F==R → min ; T==R,F==L → max +// > / >= : T==L,F==R → max ; T==R,F==L → min + +def bad_lt_min(a, b : int) : int { + return a < b ? a : b // PERF015 — min +} + +def bad_lt_max(a, b : int) : int { + return a < b ? b : a // PERF015 — max +} + +def bad_le_min(a, b : int) : int { + return a <= b ? a : b // PERF015 — min +} + +def bad_le_max(a, b : int) : int { + return a <= b ? b : a // PERF015 — max +} + +def bad_gt_max(a, b : int) : int { + return a > b ? a : b // PERF015 — max +} + +def bad_gt_min(a, b : int) : int { + return a > b ? b : a // PERF015 — min +} + +def bad_ge_max(a, b : int) : int { + return a >= b ? a : b // PERF015 — max +} + +def bad_ge_min(a, b : int) : int { + return a >= b ? b : a // PERF015 — min +} + +// --- Good patterns (no warnings) --- + +def good_min(a, b : int) : int { + return min(a, b) +} + +def good_max(a, b : int) : int { + return max(a, b) +} + +def good_asymmetric_branches(a, b, c : int) : int { + return a < b ? a : c // T != R — NOT flagged +} + +def good_different_operands(a, b, c, d : int) : int { + return a < b ? c : d // condition operands ≠ branches +} + +// Side-effectful operands: `f() < g() ? f() : g()` calls f() and g() up to twice; +// `min(f(), g())` calls each exactly once. The rewrite would silently change the +// number of calls, so the rule must NOT fire on impure operands. + +[unused_argument(x)] +def impure_int(x : int) : int { + print("called\n") // observable side effect + return x +} + +def good_impure_call(a, b : int) : int { + return impure_int(a) < impure_int(b) ? impure_int(a) : impure_int(b) // NOT flagged (impure) +} diff --git a/utils/lint/tests/perf016_ternary_abs.das b/utils/lint/tests/perf016_ternary_abs.das new file mode 100644 index 0000000000..025d5a55d0 --- /dev/null +++ b/utils/lint/tests/perf016_ternary_abs.das @@ -0,0 +1,74 @@ +options gen2 +// PERF016: ternary abs → abs(x) +// +// Problem: +// `x < 0 ? -x : x` reimplements `abs(x)`. abs() exists for all signed +// numeric types and reads more clearly. +// +// Bad pattern: +// let positive = x < 0 ? -x : x +// let positive_alt = x > 0 ? x : -x +// +// Good pattern: +// let positive = abs(x) +// +// `negabs(x)` (`x < 0 ? x : -x`) is NOT flagged — it's a different function. + +expect 31208:6 + +require daslib/perf_lint +require math + +// --- Bad patterns (PERF016) --- + +def bad_lt_zero(x : int) : int { + return x < 0 ? -x : x // PERF016 +} + +def bad_gt_zero(x : int) : int { + return x > 0 ? x : -x // PERF016 +} + +def bad_le_zero(x : int) : int { + return x <= 0 ? -x : x // PERF016 +} + +def bad_ge_zero(x : int) : int { + return x >= 0 ? x : -x // PERF016 +} + +def bad_yoda_zero_gt_x(x : int) : int { + return 0 > x ? -x : x // PERF016 (zero on left) +} + +def bad_float(x : float) : float { + return x < 0.0 ? -x : x // PERF016 +} + +// --- Good patterns (no warnings) --- + +def good_abs(x : int) : int { + return abs(x) +} + +def good_negabs(x : int) : int { + return x < 0 ? x : -x // negabs — NOT flagged +} + +def good_unrelated(x, y : int) : int { + return x < 0 ? -y : y // branches reference y, not x +} + +// Side-effectful operand: `f() < 0 ? -f() : f()` calls f() up to twice and +// the two evaluations may even disagree on sign. `abs(f())` calls once. The +// rule must NOT fire when the operand isn't pure. + +[unused_argument(x)] +def impure_int(x : int) : int { + print("called\n") // observable side effect + return x +} + +def good_impure_call(x : int) : int { + return impure_int(x) < 0 ? -impure_int(x) : impure_int(x) // NOT flagged (impure) +} diff --git a/utils/lint/tests/perf017_length_zero.das b/utils/lint/tests/perf017_length_zero.das new file mode 100644 index 0000000000..77148c3129 --- /dev/null +++ b/utils/lint/tests/perf017_length_zero.das @@ -0,0 +1,121 @@ +options gen2 +// PERF017: length(s) == 0 / != 0 → empty(s) / !empty(s) +// +// Problem: +// `length(string)` walks the whole string (strlen). `empty(string)` checks +// one byte. For arrays/tables both are O(1) but `empty` is the idiomatic +// form. The rule covers all six comparison ops with literal 0 / 1 on +// either side (Yoda forms supported). +// +// The diagnostic prints the canonical equivalent: `1 <= length(x)` is +// equivalent to `length(x) >= 1`, so the message says +// `'length(x) >= 1' — use '!empty(x)'` not `'length(x) <= 1'`. +// +// Bad pattern: +// if (length(s) == 0) { ... } +// if (length(arr) > 0) { ... } +// +// Good pattern: +// if (empty(s)) { ... } +// if (!empty(arr)) { ... } +// +// Vector magnitude `length(float3_var)` (math module) is NOT flagged — +// different semantics and there's no `empty` for vectors. + +expect 31208:15 + +require daslib/perf_lint +require strings + +// --- Bad patterns (length on left) --- + +def bad_string_eq(s : string) : bool { + return length(s) == 0 // PERF017 — empty +} + +def bad_string_ne(s : string) : bool { + return length(s) != 0 // PERF017 — !empty +} + +def bad_string_le(s : string) : bool { + return length(s) <= 0 // PERF017 — empty (length <= 0 ≡ length == 0) +} + +def bad_string_gt(s : string) : bool { + return length(s) > 0 // PERF017 — !empty +} + +def bad_array_eq(a : array) : bool { + return length(a) == 0 // PERF017 — empty +} + +def bad_array_ge_one(a : array) : bool { + return length(a) >= 1 // PERF017 — !empty (length >= 1 ≡ !empty) +} + +def bad_array_lt_one(a : array) : bool { + return length(a) < 1 // PERF017 — empty (length < 1 ≡ empty) +} + +// --- Bad patterns (Yoda — length on right; canonical-equivalent op) --- + +def bad_yoda_eq(s : string) : bool { + return 0 == length(s) // PERF017 — empty (canon: length == 0) +} + +def bad_yoda_ne(s : string) : bool { + return 0 != length(s) // PERF017 — !empty (canon: length != 0) +} + +def bad_yoda_ge_zero(s : string) : bool { + // 0 >= length(s) means length(s) <= 0, which is empty + return 0 >= length(s) // PERF017 — empty (canon: length <= 0) +} + +def bad_yoda_lt_zero(s : string) : bool { + // 0 < length(s) means length(s) > 0, which is !empty + return 0 < length(s) // PERF017 — !empty (canon: length > 0) +} + +def bad_yoda_le_one(a : array) : bool { + // 1 <= length(a) means length(a) >= 1, which is !empty + return 1 <= length(a) // PERF017 — !empty (canon: length >= 1) +} + +def bad_yoda_gt_one(a : array) : bool { + // 1 > length(a) means length(a) < 1, which is empty + return 1 > length(a) // PERF017 — empty (canon: length < 1) +} + +// --- Bad patterns: table --- + +def bad_table_eq(t : table) : bool { + return length(t) == 0 // PERF017 — empty +} + +// --- Good patterns (no warnings) --- + +def good_empty(s : string) : bool { + return empty(s) +} + +def good_string_len_other(s : string; n : int) : bool { + return length(s) == n // not 0/1 — NOT flagged +} + +def good_length_three(s : string) : bool { + return length(s) == 3 // not 0/1 — NOT flagged +} + +// --- Bad: user code has a method literally named `empty` --- +// The suppression for `current_function.name == "empty"` is gated on +// _module.name == "builtin", so a user-defined empty() outside builtin still +// gets linted. This is the regression guard for that gating. + +class UserContainer { + items : array + + def empty() : bool { + return length(items) == 0 // PERF017 — fires (NOT in builtin) + } +} diff --git a/utils/lint/tests/style016_adjacent_guards.das b/utils/lint/tests/style016_adjacent_guards.das new file mode 100644 index 0000000000..b4eed0e9b9 --- /dev/null +++ b/utils/lint/tests/style016_adjacent_guards.das @@ -0,0 +1,128 @@ +options gen2 +// STYLE016: adjacent guards leading to identical early-exit can be combined with '||' +// +// Problem: Two adjacent if-guards with the same exit (return/break/continue with the +// same payload) read as a single decision. Merging into one '||' gate is shorter and +// makes the relationship explicit. +// +// Bad pattern: +// if (a) { return -1 } +// if (b) { return -1 } +// +// Good pattern: +// if (a || b) { return -1 } +// +// Detection covers two AST shapes: +// (a) two adjacent ExprIfThenElse statements in the same block +// (b) `if (a) { return X } else if (b) { return X }` chain + +expect 31209:7 + +require daslib/style_lint + +// --- Bad patterns (warnings) --- + +def bad_adjacent_returns(a, b : int) : int { + if (a == 0) { // STYLE016 + return -1 + } + if (b == 0) { + return -1 + } + return 0 +} + +def bad_else_chain(a, b : int) : int { + if (a == 0) { // STYLE016 (chained else-if form) + return -1 + } elif (b == 0) { + return -1 + } + return 0 +} + +def bad_break_pair() : void { + for (i in range(10)) { + if (i == 3) { // STYLE016 + break + } + if (i == 5) { + break + } + } +} + +def bad_continue_pair() : void { + for (i in range(10)) { + if (i % 2 == 0) { // STYLE016 + continue + } + if (i % 3 == 0) { + continue + } + } +} + +def bad_three_deep(a, b, c : int) : int { + if (a == 0) { // STYLE016 (a-guard with b-guard) + return -1 + } + if (b == 0) { // STYLE016 (b-guard with c-guard) + return -1 + } + if (c == 0) { + return -1 + } + return 0 +} + +def bad_return_with_value(a, b : int) : int { + if (a == 0) { // STYLE016 (same int payload) + return 42 + } + if (b == 0) { + return 42 + } + return 0 +} + +// --- Good patterns (no warnings) --- + +def good_combined(a, b : int) : int { + if (a == 0 || b == 0) { + return -1 + } + return 0 +} + +def good_different_payload(a, b : int) : int { + if (a == 0) { + return -1 + } + if (b == 0) { + return -2 + } + return 0 +} + +def good_multi_stmt_body(a, b : int) : int { + if (a == 0) { + print("a is zero\n") + return -1 + } + if (b == 0) { + return -1 + } + return 0 +} + +def good_intervening(a, b : int) : int { + if (a == 0) { + return -1 + } + print("a is non-zero\n") + if (b == 0) { + return -1 + } + return 0 +} diff --git a/utils/lint/tests/style017_bool_return.das b/utils/lint/tests/style017_bool_return.das new file mode 100644 index 0000000000..fc3ae7fb00 --- /dev/null +++ b/utils/lint/tests/style017_bool_return.das @@ -0,0 +1,80 @@ +options gen2 +// STYLE017: 'if (cond) return true; else return false' — use 'return cond' +// +// Problem: Three lines (or two if-else branches) that just propagate the boolean +// condition unchanged. Read better as `return cond` / `return !cond`. +// +// Bad pattern: +// if (cond) { +// return true +// } else { +// return false +// } +// +// Good pattern: +// return cond +// +// Detection covers two AST shapes: +// (a) `if (cond) return b1 else return b2` (b1 != b2) +// (b) `if (cond) return b1` immediately followed by `return b2` (b1 != b2) + +expect 31209:4 + +require daslib/style_lint +require strings + +// --- Bad patterns --- + +def bad_if_else_true_false(c : bool) : bool { + if (c) { // STYLE017 → return c + return true + } else { + return false + } +} + +def bad_if_else_false_true(c : bool) : bool { + if (c) { // STYLE017 → return !c + return false + } else { + return true + } +} + +def bad_adjacent_true_false(c : bool) : bool { + if (c) { // STYLE017 → return c + return true + } + return false +} + +def bad_adjacent_false_true(c : bool) : bool { + if (c) { // STYLE017 → return !c + return false + } + return true +} + +// --- Good patterns (no warnings) --- + +def good_direct(c : bool) : bool { + return c +} + +def good_negated(c : bool) : bool { + return !c +} + +def good_same_value(c : bool) : bool { + if (c) { + return true // both true — different rule territory + } + return true +} + +def good_compute(c : bool; n : int) : bool { + if (c) { + return n > 0 + } + return false +} diff --git a/utils/lint/tests/style018_bool_compare.das b/utils/lint/tests/style018_bool_compare.das new file mode 100644 index 0000000000..36be0d4e37 --- /dev/null +++ b/utils/lint/tests/style018_bool_compare.das @@ -0,0 +1,62 @@ +options gen2 +// STYLE018: redundant boolean comparison ('b == true' / 'b != false') +// +// Problem: Comparing a bool to a boolean literal is redundant — the bool already IS +// the value. Drop the comparison. +// +// Bad pattern: +// if (flag == true) { ... } +// if (flag != false) { ... } +// +// Good pattern: +// if (flag) { ... } +// if (!flag) { ... } + +expect 31209:6 + +require daslib/style_lint +require strings + +// --- Bad patterns --- + +def bad_eq_true(flag : bool) : bool { + return flag == true // STYLE018 → flag +} + +def bad_eq_false(flag : bool) : bool { + return flag == false // STYLE018 → !flag +} + +def bad_ne_true(flag : bool) : bool { + return flag != true // STYLE018 → !flag +} + +def bad_ne_false(flag : bool) : bool { + return flag != false // STYLE018 → flag +} + +def bad_yoda_eq_true(flag : bool) : bool { + return true == flag // STYLE018 (Yoda) +} + +def bad_yoda_eq_false(flag : bool) : bool { + return false == flag // STYLE018 (Yoda) +} + +// --- Good patterns (no warnings) --- + +def good_direct(flag : bool) : bool { + return flag +} + +def good_negated(flag : bool) : bool { + return !flag +} + +def good_two_bools(a, b : bool) : bool { + return a == b // bool-bool compare — not flagged +} + +def good_two_literals() : bool { + return true == true // both literals — not flagged +} diff --git a/utils/lint/tests/style019_clamp.das b/utils/lint/tests/style019_clamp.das new file mode 100644 index 0000000000..bf78215dec --- /dev/null +++ b/utils/lint/tests/style019_clamp.das @@ -0,0 +1,53 @@ +options gen2 +// STYLE019: nested min/max → clamp(x, lo, hi) +// +// Problem: `min(max(x, lo), hi)` reads as a clamp; `clamp(x, lo, hi)` is a +// math builtin that says so directly. +// +// Bad pattern: +// let bounded = min(max(x, lo), hi) +// let bounded_alt = max(min(x, hi), lo) +// +// Good pattern: +// let bounded = clamp(x, lo, hi) + +expect 31209:4 + +require daslib/style_lint +require math + +// --- Bad patterns --- + +def bad_min_max(x, lo, hi : int) : int { + return min(max(x, lo), hi) // STYLE019 +} + +def bad_max_min(x, lo, hi : int) : int { + return max(min(x, hi), lo) // STYLE019 (mirror form) +} + +def bad_min_max_inner_second(x, lo, hi : int) : int { + return min(hi, max(x, lo)) // STYLE019 — inner is arg[1] +} + +def bad_min_max_float(x, lo, hi : float) : float { + return min(max(x, lo), hi) // STYLE019 +} + +// --- Good patterns (no warnings) --- + +def good_clamp(x, lo, hi : int) : int { + return clamp(x, lo, hi) +} + +def good_min_only(a, b : int) : int { + return min(a, b) +} + +def good_max_only(a, b : int) : int { + return max(a, b) +} + +def good_three_arg_min_no_nested(x, y, z : int) : int { + return min(min(x, y), z) // not min(max ...) — NOT flagged +} diff --git a/utils/mouse/.das_package b/utils/mouse/.das_package new file mode 100644 index 0000000000..c327acf116 --- /dev/null +++ b/utils/mouse/.das_package @@ -0,0 +1,18 @@ +options gen2 + +require daslib/daspkg + +[export] +def package() { + package_name("mouse") + package_description("blind-mouse — personal Q&A cache MCP server. Curated `.md` answers backed by SQLite/FTS5 retrieval; agents both consume and curate.") +} + +[export] +def dependencies(version : string) { +} + +[export] +def release() { + release_main("main.das") +} diff --git a/utils/mouse/OVERVIEW.md b/utils/mouse/OVERVIEW.md new file mode 100644 index 0000000000..a7b7be64fc --- /dev/null +++ b/utils/mouse/OVERVIEW.md @@ -0,0 +1,110 @@ +# blind-mouse — personal Q&A cache + +A separate MCP server (and CLI) that stores curated `.md` answers to "how do I X?" / "what's the pattern for Y?" / "why does Z behave this way?" questions and retrieves them via SQLite-FTS5 BM25 ranking. The agent both consumes and curates the corpus. + +## Problem + +Across sessions, agents repeatedly re-derive the same answers. Existing knowledge channels miss the long tail: + +- `CLAUDE.md` and `skills/*.md` cover **categorical** knowledge — rules, tables, conventions. Authored deliberately, not accumulated. +- The daslang MCP (`find_symbol`, `grep_usage`, `find_references`) covers **symbol lookups** in the current codebase. +- `git log` covers **what changed** and **why** in commit prose. +- Memory covers **persistent facts about the user / project** that don't fit the codebase. + +None of those covers "the agent figured this out once and lost it." That's blind-mouse's slot. + +## Non-goals + +- Not a docs replacement. RST tutorials, skill files, and CLAUDE.md remain the source of truth for categorical knowledge. +- Not project memory. Memory is for who/what/why/when about ongoing work; mouse is for technical-fact recall. +- Not a vector store. v0 ships BM25-only. Embeddings are a vNext layer once we know what the corpus actually looks like. +- Not a search engine. The corpus is small, hand-curated, and lives next to the project. + +## Mental model + +**Atomic docs with cross-refs.** A doc answers one specific question. Related docs link via stable slugs. Aggregating "everything about X" into one mega-doc kills retrieval precision. + +**Question aliases live in the doc.** When `add` records a Q&A, the original question becomes the first bullet in a `## Questions` section. Future paraphrased asks that match this section will retrieve the doc — the doc accumulates its own retrieval surface as it gets hit. + +**The agent both consumes and curates.** Retrieval is the easy part. The killer feature is the edit-when-wrong loop: agent reads an answer, finds it stale or incomplete, edits the `.md` directly, bumps `last_verified`. No separate maintenance tool — just `Edit`. + +## Doc anatomy + +```markdown +--- +slug: typefunction-return-type +title: How do I declare a [typefunction] return type? +created: 2026-05-08 +last_verified: 2026-05-08 +links: [typefunction-overview, macro-types] +--- + +(answer body — code blocks, prose, examples) + +## Questions +- How do I declare a [typefunction] return type? +- typefunction return value +- what's the type for a typefunction macro +``` + +Frontmatter fields: `slug` (stable ID, used for cross-refs), `title` (1-line description), `created` / `last_verified` (ISO dates), `links` (inline list of slugs this doc references). + +`## Questions` section: bullet list of natural-language phrasings that should retrieve this doc. Auto-grows over time as different paraphrases hit the same answer (vNext: `confirm` tool to grow it on successful retrieval; v0: edit by hand). + +## Operations + +| Operation | CLI | MCP tool | Notes | +|---|---|---|---| +| Retrieve | `mouse ask ""` | `mouse__ask` | Top-K BM25 ranked. Words OR-joined. | +| Add Q&A | `mouse add "" --body "..."` | `mouse__add` | Dupe-gated by default; pass `--force` / `force=true` to override. | +| Get doc | `mouse get ` | `mouse__get` | Body + frontmatter + reverse-link footer. | +| Rebuild | `mouse rebuild` | `mouse__rebuild` | Rescans `/docs/`; idempotent. | +| Serve MCP | `mouse serve` | (this _is_ the server) | stdio JSON-RPC. | + +`add`'s **dupe-on-add gate** is the corpus-hygiene mechanism. With `force=false` (default), `add` first runs retrieval on the new question and returns the similar docs without writing if any match. The agent decides: extend an existing doc (edit the `.md`) or create a new one (re-call with `force=true`). + +## Storage model + +The `.md` files under `/docs/` are the **source of truth**. The SQLite index at `/index.db` is rebuildable — `mouse rebuild` repopulates it from disk. Implications: + +- The corpus is `git`-friendly. Check it in if you want a shared corpus; `git pull` followed by `mouse rebuild` syncs. +- Hand-edits work. `Edit` an answer, run `mouse rebuild`, the index reflects the change. +- The DB is disposable. Lose it, regenerate it. + +The SQLite schema (managed via `[sql_migration]` from `sqlite/sqlite_migrate`): + +- `docs` — slug PK, path, title, created, last_verified, body_hash. +- `links` — composite-PK pair `(from_slug, to_slug)` for cross-refs. +- `search_idx` — FTS5 virtual table; per-doc concatenation of title + question aliases + body. BM25 ranks via the `@sql_fts_rank` column. + +Rebuild is whole-corpus delete+repopulate — simple, correct, fast for small corpora. Incremental update (re-index only changed `body_hash`) is a vNext optimization once the corpus is large enough that whole-rebuild matters. + +## Curation discipline + +**Extend an existing doc** when: +- The new question is a paraphrase of one already covered. +- The fix is "add one more bullet to the Questions section" or "clarify one paragraph in the body." + +**Create a new doc** when: +- The new question is a different facet that deserves separate retrieval (different slug, separable answer). +- Cross-link the related doc via `links:` if there's relationship. + +**Bump `last_verified`** when you confirm an answer is still correct. The displayed date in `mouse get` lets future agents calibrate trust. + +**Before deleting or renaming a doc**, check `mouse get `'s "linked from:" footer. Any incoming links would dangle. + +## v0 → vNext backlog + +1. **Embeddings**. Replace BM25 with cosine over a small embedding model. Better synonym handling. +2. **LLM rerank.** Top-K BM25 → Haiku rerank → final K. Confidence threshold filters obvious misses. +3. **Two-tier corpus.** `~/.mouse/` (cross-project) + workspace overlay. +4. **Auto-staleness.** Periodic re-verification; flag answers whose external refs no longer resolve. +5. **`confirm(slug, question)`** tool — grow the question alias list automatically after a successful retrieval. +6. **External-ref validators** — given a doc cites a file path / PR / symbol, check it still exists. +7. **Schema validation** — when the corpus is shared via git, a CI step that runs `mouse rebuild` and checks for parse errors. + +## What this is testing + +The hypothesis: **the curate-on-use loop is more valuable than the retrieval algorithm.** If after a week the corpus is empty, the design failed for reasons no amount of embedding cleverness fixes — the agent never reached for the tool, or never recorded what it learned. If it fills up with garbage, the dupe-on-add gate wasn't strict enough. If it fills up cleanly, the next move is layering on retrieval improvements. + +v0 ships the smallest thing that exercises the whole loop end-to-end. Decisions get made from real corpus shape, not architecture diagrams. diff --git a/utils/mouse/README.md b/utils/mouse/README.md new file mode 100644 index 0000000000..95381272bf --- /dev/null +++ b/utils/mouse/README.md @@ -0,0 +1,58 @@ +# mouse + +Personal Q&A cache MCP server. `.md` answers backed by SQLite/FTS5 retrieval. Long vision: [OVERVIEW.md](OVERVIEW.md). + +## Quick start + +```bash +# Rebuild the index from /docs/ (defaults to ./mouse-data, override via --root or $MOUSE_ROOT) +daslang utils/mouse/main.das -- rebuild + +# Search +daslang utils/mouse/main.das -- ask "how do I X" + +# Add a Q&A (dupe-gated by default) +daslang utils/mouse/main.das -- add "how do I X" --body "answer body" + +# Run as MCP stdio server +daslang utils/mouse/main.das -- serve +``` + +## MCP wiring + +Add to your `.mcp.json`: + +```json +{ + "mcpServers": { + "mouse": { + "command": "daslang.exe", + "args": ["utils/mouse/main.das", "--", "serve", "--root", "./mouse-data"] + } + } +} +``` + +Tools exposed: `mouse__ask`, `mouse__add`, `mouse__get`, `mouse__rebuild`. + +## Layout + +``` +mouse-data/ + docs/.md -- one doc per atomic Q&A (source of truth) + index.db -- SQLite, rebuildable from docs/ +``` + +`.md` files are checked-in-friendly. `git pull` + `mouse rebuild` re-syncs the index. + +## Development + +Run the test suite: + +```bash +daslang dastest/dastest.das -- --test utils/mouse/tests/ +``` + +Format `.das` files via the daslang MCP `format_file` tool (`mcp__daslang__format_file` when invoking from an agent) — there is no shell equivalent, so the snippet above is shell-runnable but the formatter is not. + +CLAUDE.md has a top-level "Asking blind-mouse" section describing when to reach for this tool from inside Claude Code. diff --git a/utils/mouse/index.das b/utils/mouse/index.das new file mode 100644 index 0000000000..7942b4777a --- /dev/null +++ b/utils/mouse/index.das @@ -0,0 +1,410 @@ +// blind-mouse — SQLite/FTS5 index over the .md doc corpus. +// .md files under /docs/ are the source of truth. The DB is a +// rebuildable retrieval index — `rebuild` repopulates it from disk. + +options gen2 + +require daslib/sql public +require sqlite/sqlite_boost public +require sqlite/sqlite_linq public +require sqlite/sqlite_migrate public +require daslib/fio public +require strings public +require math +require store public + +// ─── on-disk layout ────────────────────────────────────────────────── + +def docs_dir(root : string) : string { + return path_join(root, "docs") +} + +def db_path(root : string) : string { + return path_join(root, "index.db") +} + +def is_directory_path(path : string) : bool { + let s = stat(path) + return s.is_valid && s.is_dir +} + +def ensure_root(root : string) { + if (!is_directory_path(root)) { + mkdir_rec(root) + } + let dd = docs_dir(root) + if (!is_directory_path(dd)) { + mkdir_rec(dd) + } +} + +def list_doc_files(root : string) : array { + var out : array + let dd = docs_dir(root) + if (!is_directory_path(dd)) { + return <- out + } + dir(dd) $(filename) { + if (filename == "." || filename == "..") { + return + } + if (ends_with(filename, ".md")) { + out |> push(path_join(dd, filename)) + } + } + return <- out +} + +// ─── schema ────────────────────────────────────────────────────────── + +[sql_table(name = "docs")] +struct DocRow { + @sql_primary_key + slug : string + path : string + title : string + created : string + last_verified : string + body_hash : string +} + +[sql_table(name = "links"), + sql_index(unique = true, fields = ("from_slug", "to_slug"))] +struct LinkRow { + from_slug : string + to_slug : string +} + +[sql_fts5(name = "search_idx")] +struct SearchRow { + Slug : string + Text : string + @sql_fts_rank Rank : float +} + +// Append-only log of every ask. Survives rebuild() (which only wipes the +// doc cache). The miss list is the input signal for `mouse__add` — +// "questions asked but no doc covers them yet". +[sql_table(name = "query_log")] +struct QueryLog { + @sql_primary_key id : int64 + asked_at : string + question : string + match_count : int + top_slug : string + source : string // "cli" | "mcp" +} + +[sql_migration(version = 1, description = "create docs/links/search_idx")] +def migration_001(db : SqlRunner) { + db |> create_table(type) + db |> create_table(type) + db |> create_table(type) +} + +[sql_migration(version = 2, description = "add query_log")] +def migration_002(db : SqlRunner) { + db |> create_table(type) +} + +def with_index_db(root : string; blk : block<(db : SqlRunner) : void>) { + ensure_root(root) + with_latest_sqlite(db_path(root)) $(db) { + invoke(blk, db) + } +} + +// ─── result shapes ──────────────────────────────────────────────────── + +struct SearchHit { + slug : string + title : string + path : string + last_verified : string + rank : float +} + +struct DupeMatch { + slug : string + title : string + rank : float +} + +struct AddOutcome { + slug : string + similar : array + created : bool + written_path : string + error : string +} + +// ─── rebuild ───────────────────────────────────────────────────────── + +def rebuild(db : SqlRunner; root : string) : int { + let files <- list_doc_files(root) + var n = 0 + db |> with_transaction() { + db |> exec("DELETE FROM docs") + db |> exec("DELETE FROM links") + db |> exec("DELETE FROM search_idx") + for (f in files) { + var doc : ParsedDoc + var error : string + if (!read_doc(f, doc, error)) { + to_log(LOG_WARNING, "skipping {f}: {error}") + continue + } + if (!empty(doc.parse_error)) { + to_log(LOG_WARNING, "skipping {f}: {doc.parse_error}") + continue + } + if (empty(doc.frontmatter.slug)) { + to_log(LOG_WARNING, "skipping {f}: missing slug") + continue + } + if (!is_valid_slug(doc.frontmatter.slug)) { + to_log(LOG_WARNING, "skipping {f}: invalid slug `{doc.frontmatter.slug}` (would be unretrievable via get)") + continue + } + // get rebuilds the path as /{slug}.md, so a hand-edited + // file whose basename diverges from its frontmatter slug would + // be searchable but unfetchable. + let expected_basename = "{doc.frontmatter.slug}.md" + if (base_name(f) != expected_basename) { + to_log(LOG_WARNING, "skipping {f}: basename does not match slug (expected `{expected_basename}`, would be unretrievable via get)") + continue + } + db |> insert(DocRow( + slug = doc.frontmatter.slug, + path = f, + title = doc.frontmatter.title, + created = doc.frontmatter.created, + last_verified = doc.frontmatter.last_verified, + body_hash = doc.body_hash)) + for (link in doc.frontmatter.links) { + // insert_or_ignore: a doc accidentally listing the same + // link twice in frontmatter would otherwise hit the + // UNIQUE(from_slug, to_slug) constraint and abort the + // whole rebuild transaction. + db |> insert_or_ignore(LinkRow( + from_slug = doc.frontmatter.slug, + to_slug = link)) + } + // doc.body already contains the `## Questions` section, so + // we don't append doc.questions separately — that would + // double-index aliases and skew BM25. + let combined = build_string() $(var w) { + if (!empty(doc.frontmatter.title)) { + w |> write(doc.frontmatter.title) + w |> write("\n") + } + w |> write(doc.body) + } + db |> insert(SearchRow( + Slug = doc.frontmatter.slug, + Text = combined, + Rank = 0.0f)) + n ++ + } + } + return n +} + +// ─── search ────────────────────────────────────────────────────────── + +// Strip non-alphanumeric chars (except whitespace and `*`) so a free-form +// user question can't break FTS5's query parser. v0 trade-off: no +// quoted-phrase / OR / NEAR support for free-text queries; users who +// want those can pass them via raw FTS5 syntax (vNext: bypass sanitizer +// with a `--raw-query` flag or detect quoted forms). +def sanitize_fts5_query(q : string) : string { + return build_string() $(var w) { + let n = length(q) + for (i in range(n)) { + let c = character_at(q, i) // nolint:PERF003 + let is_safe = is_alnum(c) || c == ' ' || c == '*' + if (is_safe) { + w |> write(slice(q, i, i + 1)) + } else { + w |> write(" ") + } + } + } +} + +// Hit-count cap. Untrusted callers (CLI/MCP) supply `k`, and SQLite +// treats `LIMIT -1` as "no limit", so a negative or oversize `k` would +// scan the whole index. 50 is well above any sane top-k for this corpus. +let MAX_SEARCH_K = 50 + +def search(db : SqlRunner; query : string; k : int) : array { + var out : array + if (k <= 0) { + return <- out + } + let kk = min(k, MAX_SEARCH_K) + // to_lower: FTS5 keywords (OR/AND/NOT/NEAR) are uppercase-only operators. + // A user query like "foo OR bar" would tokenize to [foo, OR, bar] and + // OR-join to `foo OR OR OR bar` — invalid FTS syntax → empty result. + // Downcasing every token makes user-typed keywords inert. + let cleaned = to_lower(strip(sanitize_fts5_query(query))) + if (empty(cleaned)) { + return <- out + } + // Free-form queries → OR-joined so BM25 ranks docs by how many of the + // user's words appear, rather than failing when one word is missing. + // FTS5 default is whitespace-AND, which is wrong for a "find related" + // search. Users who want strict AND/phrase have to wait for + // vNext --raw-query. + let words <- split(cleaned, " ") + var meaningful : array + meaningful |> reserve(length(words)) + for (w in words) { + if (length(w) >= 2) { + meaningful |> push(w) + } + } + if (empty(meaningful)) { + return <- out + } + let or_query = meaningful |> join(" OR ") + let raw_hits <- _try_sql(db |> select_from(type) + |> _where(_.Text |> text_match(or_query)) + |> _order_by(_.Rank) + |> take(kk)) + if (raw_hits |> is_err) { + to_log(LOG_WARNING, "search failed: {raw_hits |> unwrap_err}") + return <- out + } + for (h in raw_hits._value) { + let drow_opt <- _sql(db |> select_from(type) + |> _where(_.slug == h.Slug) + |> _first_opt()) + if (!(drow_opt |> is_some)) { + continue + } + let drow = drow_opt |> unwrap + out |> push(SearchHit( + slug = drow.slug, + title = drow.title, + path = drow.path, + last_verified = drow.last_verified, + rank = h.Rank)) + } + return <- out +} + +def dupe_check(db : SqlRunner; question : string; k : int = 5) : array { + let hits <- search(db, question, k) + var out : array + out |> reserve(length(hits)) + for (h in hits) { + out |> push(DupeMatch(slug = h.slug, title = h.title, rank = h.rank)) + } + return <- out +} + +// ─── reverse links ─────────────────────────────────────────────────── + +def linked_from(db : SqlRunner; slug : string) : array { + var froms <- _sql(db |> select_from(type) + |> _where(_.to_slug == slug) + |> _order_by(_.from_slug) + |> _select(_.from_slug)) + return <- froms +} + +// ─── query log ─────────────────────────────────────────────────────── + +def now_iso(db : SqlRunner) : string { + return db |> query_scalar("SELECT datetime('now')", type) +} + +def log_query(db : SqlRunner; question : string; + hits : array; source : string) { + let top = !empty(hits) ? hits[0].slug : "" + db |> insert(QueryLog( + asked_at = now_iso(db), + question = question, + match_count = length(hits), + top_slug = top, + source = source)) +} + +def recent_queries(db : SqlRunner; n : int; misses_only : bool) : array { + if (misses_only) { + var rows <- _sql(db |> select_from(type) + |> _where(_.match_count == 0) + |> _order_by_descending(_.id) + |> take(n)) + return <- rows + } + var rows <- _sql(db |> select_from(type) + |> _order_by_descending(_.id) + |> take(n)) + return <- rows +} + +// ─── add ───────────────────────────────────────────────────────────── + +def today_iso(db : SqlRunner) : string { + return db |> query_scalar("SELECT date('now')", type) +} + +def add_doc(db : SqlRunner; root : string; + question : string; body : string; + slug_hint : string; force : bool) : AddOutcome { + var outcome : AddOutcome + let clean_question = sanitize_question(question) + if (empty(clean_question)) { + outcome.error = "question is empty after sanitization" + return <- outcome + } + if (!force) { + var similar <- dupe_check(db, clean_question, 5) + if (!empty(similar)) { + outcome.similar <- similar + outcome.created = false + return <- outcome + } + } + var existing : table + let all_slugs <- _sql(db |> select_from(type) |> _select(_.slug)) + for (s in all_slugs) { + existing |> insert(s) + } + var slug : string + if (!empty(slug_hint) && is_valid_slug(slug_hint)) { + slug = dedupe_slug(slug_hint, existing) + } else { + slug = slug_from_title_dedupe(clean_question, existing) + } + if (empty(slug)) { + slug = "doc-{ref_time_ticks()}" + } + let now = today_iso(db) + let fm = Frontmatter( + slug = slug, + title = clean_question, + created = now, + last_verified = now) + let composed = build_string() $(var w) { + w |> write(body) + if (!empty(body) && character_at(body, length(body) - 1) != '\n') { // nolint:PERF003 + w |> write("\n") + } + w |> write("\n## Questions\n") + w |> write("- {clean_question}\n") + } + let path = path_join(docs_dir(root), "{slug}.md") + var werr : string + if (!write_doc(path, fm, composed, werr)) { + outcome.error = "could not write {path}: {werr}" + return <- outcome + } + rebuild(db, root) + outcome.slug = slug + outcome.created = true + outcome.written_path = path + return <- outcome +} diff --git a/utils/mouse/main.das b/utils/mouse/main.das new file mode 100644 index 0000000000..c0ed64f6fa --- /dev/null +++ b/utils/mouse/main.das @@ -0,0 +1,592 @@ +// blind-mouse — entry point. Subcommand-dispatched CLI; `serve` runs a +// stdio JSON-RPC MCP server. +// +// daslang utils/mouse/main.das -- ask "how do I X" --root ./mouse-data +// daslang utils/mouse/main.das -- add "question" --body "answer body" +// daslang utils/mouse/main.das -- get +// daslang utils/mouse/main.das -- rebuild +// daslang utils/mouse/main.das -- log [-n N] [--misses] +// daslang utils/mouse/main.das -- serve # MCP stdio loop + +options gen2 +options persistent_heap +options gc +options multiple_contexts +options rtti +options no_unused_function_arguments = false +options no_unused_block_arguments = false + +require daslib/clargs +require daslib/json_boost +require daslib/fio +require daslib/strings_boost +require strings +require store +require index + +// ─── argv ──────────────────────────────────────────────────────────── + +[CommandLineArgs] +struct MouseArgs { + @clarg_positional + @clarg_doc = "Subcommand: ask | add | get | rebuild | log | serve" + command : Option + + @clarg_positional + @clarg_doc = "Argument: question text (ask, add) or slug (get)" + arg : Option + + @clarg_short = "r" + @clarg_doc = "Corpus root (default ./mouse-data, env: MOUSE_ROOT)" + root : string + + @clarg_short = "k" + @clarg_doc = "Top-K results (ask)" + k : int = 5 + + @clarg_doc = "Slug for add (overrides auto-generated)" + slug : string + + @clarg_short = "b" + @clarg_doc = "Body content (add)" + body : string + + @clarg_doc = "Force add even if similar exists" + force : bool + + @clarg_short = "n" + @clarg_doc = "Limit (log)" + limit : int = 20 + + @clarg_doc = "Show only queries with no result (log)" + misses : bool + + @clarg_short = "?" + @clarg_name = "show-help" + @clarg_doc = "Show this help and exit" + help : bool +} + +def resolve_root(arg_root : string) : string { + if (!empty(arg_root)) { + return arg_root + } + if (has_env_variable("MOUSE_ROOT")) { + return get_env_variable("MOUSE_ROOT") + } + return "./mouse-data" +} + +def positional_value(opt : Option) : string { + return opt ?? "" +} + +// ─── CLI commands ──────────────────────────────────────────────────── + +def cmd_ask(args : MouseArgs) { + let root = resolve_root(args.root) + let query = positional_value(args.arg) + if (empty(query)) { + to_log(LOG_ERROR, "ask: missing question; pass as positional arg") + return + } + with_index_db(root) $(db) { + let hits <- search(db, query, args.k) + log_query(db, query, hits, "cli") + if (empty(hits)) { + print("(no results){HINT_NO_MATCH}\n") + return + } + for (h in hits) { + print("[{h.rank}] {h.slug} — {h.title}\n") + print(" path: {h.path}\n") + print(" last_verified: {h.last_verified}\n") + } + print("{HINT_HIT}\n") + } +} + +def cmd_get(args : MouseArgs) { + let root = resolve_root(args.root) + let slug = positional_value(args.arg) + if (empty(slug)) { + to_log(LOG_ERROR, "get: missing slug") + return + } + if (!is_valid_slug(slug)) { + to_log(LOG_ERROR, "get: invalid slug '{slug}'") + return + } + with_index_db(root) $(db) { + let path = path_join(docs_dir(root), "{slug}.md") + var doc : ParsedDoc + var err : string + if (!read_doc(path, doc, err)) { + to_log(LOG_ERROR, "could not read {slug}: {err}") + return + } + // rebuild already skips parse_error docs from the index, so they + // can only reach get via an explicit slug. Warn-and-continue + // (rather than fail) — a partial header is still better than nothing. + if (!empty(doc.parse_error)) { + to_log(LOG_WARNING, "{slug}: {doc.parse_error}") + } + print("# {doc.frontmatter.title}\n") + print("slug: {doc.frontmatter.slug}\n") + if (!empty(doc.frontmatter.created)) { + print("created: {doc.frontmatter.created}\n") + } + if (!empty(doc.frontmatter.last_verified)) { + print("last_verified: {doc.frontmatter.last_verified}\n") + } + if (!empty(doc.frontmatter.links)) { + let links_str = doc.frontmatter.links |> join(", ") + print("links: {links_str}\n") + } + let froms <- linked_from(db, slug) + if (!empty(froms)) { + let froms_str = froms |> join(", ") + print("linked from: {froms_str}\n") + } + print("\n---\n\n") + print(doc.body) + print("\n\nHint: if this answer is stale or wrong, edit this .md directly and bump `last_verified`. Delete it if the answer is no longer correct or the question no longer comes up.\n") + } +} + +def cmd_add(args : MouseArgs) { + let root = resolve_root(args.root) + let question = positional_value(args.arg) + if (empty(question)) { + to_log(LOG_ERROR, "add: missing question") + return + } + if (empty(args.body)) { + to_log(LOG_ERROR, "add: missing --body") + return + } + with_index_db(root) $(db) { + let outcome <- add_doc(db, root, question, args.body, args.slug, args.force) + if (!empty(outcome.error)) { + to_log(LOG_ERROR, outcome.error) + return + } + if (outcome.created) { + print("created: {outcome.slug}\n") + print("path: {outcome.written_path}\n") + print("\nHint: if you discover this answer is wrong later, edit the .md at the path above and bump `last_verified`.\n") + } else { + print("not created (use --force to override). similar:\n") + for (s in outcome.similar) { + print(" [{s.rank}] {s.slug} — {s.title}\n") + } + print("\nHint: prefer extending an existing doc by editing its .md (use mouse get to find the path). Re-call with --force only when the new answer is genuinely a different topic.\n") + } + } +} + +def cmd_rebuild(args : MouseArgs) { + let root = resolve_root(args.root) + var n = 0 + with_index_db(root) $(db) { + n = rebuild(db, root) + } + print("rebuilt: {n} doc(s) in {root}\n") +} + +def cmd_log(args : MouseArgs) { + let root = resolve_root(args.root) + let n = args.limit > 0 ? args.limit : 20 + with_index_db(root) $(db) { + let rows <- recent_queries(db, n, args.misses) + if (empty(rows)) { + print("(no queries logged yet)\n") + return + } + for (r in rows) { + if (r.match_count == 0) { + print("[{r.asked_at}] ({r.source}) {r.question} -> MISS\n") + } else { + print("[{r.asked_at}] ({r.source}) {r.question} -> {r.match_count} hit(s) (top: {r.top_slug})\n") + } + } + } +} + +// ─── MCP stdio server ──────────────────────────────────────────────── + +let MCP_PROTOCOL_VERSION = "2024-11-05" +let MCP_SERVER_NAME = "blind-mouse" +let MCP_SERVER_VERSION = "0.1.0" + +struct ContentItem { + @rename = "type" _type : string + text : string +} + +struct ToolResult { + content : array + isError : bool +} + +def make_tool_result(text : string; is_error : bool = false) : string { + var result : ToolResult + result.isError = is_error + result.content |> emplace(ContentItem(_type = "text", text = text)) + return sprint_json(result, false) +} + +def jsonrpc_response(id : string; result_body : string) : string { + return "\{\"jsonrpc\":\"2.0\",\"id\":{id},\"result\":{result_body}\}" +} + +def jsonrpc_error(id : string; code : int; msg : string) : string { + let escaped = sprint_json(msg, false) + return "\{\"jsonrpc\":\"2.0\",\"id\":{id},\"error\":\{\"code\":{code},\"message\":{escaped}\}\}" +} + +def get_string_arg(args : JsonValue?; key : string) : string { + if (args == null) { + return "" + } + let v = args?[key] + if (v == null) { + return "" + } + if (v.value is _string) { + return v.value as _string + } + return "" +} + +def get_int_arg(args : JsonValue?; key : string; dflt : int) : int { + if (args == null) { + return dflt + } + let v = args?[key] + if (v == null) { + return dflt + } + if (v.value is _number) { + return int(v.value as _number) + } + return dflt +} + +def get_bool_arg(args : JsonValue?; key : string) : bool { + if (args == null) { + return false + } + let v = args?[key] + if (v == null) { + return false + } + if (v.value is _bool) { + return v.value as _bool + } + return false +} + +def jsonvalue_id_to_string(id : JsonValue?) : string { + if (id == null) { + return "null" + } + return write_json(id) +} + +def handle_initialize(id : string) : string { + let body = "\{\"protocolVersion\":\"{MCP_PROTOCOL_VERSION}\",\"capabilities\":\{\"tools\":\{\}\},\"serverInfo\":\{\"name\":\"{MCP_SERVER_NAME}\",\"version\":\"{MCP_SERVER_VERSION}\"\}\}" + return jsonrpc_response(id, body) +} + +def tools_list_body() : string { + return build_string() $(var w) { + w |> write("\{\"tools\":[") + w |> write("\{\"name\":\"mouse__ask\",\"description\":\"Search the blind-mouse Q&A cache. Returns top-K matching .md docs ranked by FTS5 BM25.\",\"inputSchema\":\{\"type\":\"object\",\"properties\":\{\"question\":\{\"type\":\"string\"\},\"k\":\{\"type\":\"integer\",\"default\":5\},\"root\":\{\"type\":\"string\"\}\},\"required\":[\"question\"]\}\},") + w |> write("\{\"name\":\"mouse__add\",\"description\":\"Add a Q&A to the cache. With force=false (default), runs dupe-check first and returns similar docs without writing if any are found.\",\"inputSchema\":\{\"type\":\"object\",\"properties\":\{\"question\":\{\"type\":\"string\"\},\"body\":\{\"type\":\"string\"\},\"slug\":\{\"type\":\"string\"\},\"force\":\{\"type\":\"boolean\",\"default\":false\},\"root\":\{\"type\":\"string\"\}\},\"required\":[\"question\",\"body\"]\}\},") + w |> write("\{\"name\":\"mouse__get\",\"description\":\"Fetch a doc by slug. Returns body, frontmatter, and reverse-link footer (which docs link to this one).\",\"inputSchema\":\{\"type\":\"object\",\"properties\":\{\"slug\":\{\"type\":\"string\"\},\"root\":\{\"type\":\"string\"\}\},\"required\":[\"slug\"]\}\},") + w |> write("\{\"name\":\"mouse__rebuild\",\"description\":\"Rescan /docs/ and rebuild the SQLite index from disk. .md files are the source of truth.\",\"inputSchema\":\{\"type\":\"object\",\"properties\":\{\"root\":\{\"type\":\"string\"\}\}\}\}") + w |> write("]\}") + } +} + +def handle_tools_list(id : string) : string { + return jsonrpc_response(id, tools_list_body()) +} + +def fmt_search_hit(h : SearchHit) : string { + return "[{h.rank}] {h.slug} — {h.title}\n path: {h.path}\n last_verified: {h.last_verified}\n" +} + +let HINT_NO_MATCH = "\nHint: nothing matched. Do the research yourself, then call mouse__add(question, body) so the next session doesn't redo this work." +let HINT_HIT = "\nHint: if any answer above is stale or wrong, edit the .md at the path shown and bump `last_verified` (or delete the file if it's no longer relevant)." + +def tool_ask(args : JsonValue?) : string { + let question = get_string_arg(args, "question") + if (empty(question)) { + return make_tool_result("missing 'question' argument", true) + } + let k = get_int_arg(args, "k", 5) + let root = resolve_root(get_string_arg(args, "root")) + var output : string + with_index_db(root) $(db) { + let hits <- search(db, question, k) + log_query(db, question, hits, "mcp") + if (empty(hits)) { + output = "(no results for: {question}){HINT_NO_MATCH}" + return + } + output = build_string() $(var w) { + for (h in hits) { + w |> write(fmt_search_hit(h)) + } + w |> write(HINT_HIT) + } + } + return make_tool_result(output, false) +} + +def tool_add(args : JsonValue?) : string { + let question = get_string_arg(args, "question") + if (empty(question)) { + return make_tool_result("missing 'question' argument", true) + } + let body = get_string_arg(args, "body") + if (empty(body)) { + return make_tool_result("missing 'body' argument", true) + } + let slug_hint = get_string_arg(args, "slug") + let force = get_bool_arg(args, "force") + let root = resolve_root(get_string_arg(args, "root")) + var output : string + var is_error = false + with_index_db(root) $(db) { + let outcome <- add_doc(db, root, question, body, slug_hint, force) + if (!empty(outcome.error)) { + output = outcome.error + is_error = true + return + } + if (outcome.created) { + output = "created: {outcome.slug}\npath: {outcome.written_path}\n\nHint: if you discover this answer is wrong later, edit the .md at the path above and bump `last_verified`." + } else { + output = build_string() $(var w) { + w |> write("not created (use force=true to override). similar:\n") + for (s in outcome.similar) { + w |> write(" [{s.rank}] {s.slug} — {s.title}\n") + } + w |> write("\nHint: prefer extending an existing doc by editing its .md (call mouse__get with one of the slugs above to find the path). Re-call with force=true only when the new answer is genuinely a different topic.") + } + } + } + return make_tool_result(output, is_error) +} + +def tool_get(args : JsonValue?) : string { + let slug = get_string_arg(args, "slug") + if (empty(slug)) { + return make_tool_result("missing 'slug' argument", true) + } + if (!is_valid_slug(slug)) { + return make_tool_result("invalid slug '{slug}'", true) + } + let root = resolve_root(get_string_arg(args, "root")) + var output : string + var is_error = false + with_index_db(root) $(db) { + let path = path_join(docs_dir(root), "{slug}.md") + var doc : ParsedDoc + var err : string + if (!read_doc(path, doc, err)) { + output = "could not read {slug}: {err}" + is_error = true + return + } + // rebuild already skips parse_error docs from the index. Surface a + // warning prefix so MCP callers see the diagnostic without losing + // the partial body. + var parse_warning : string + if (!empty(doc.parse_error)) { + parse_warning = "[W] {slug}: {doc.parse_error}\n\n" + } + let froms <- linked_from(db, slug) + output = build_string() $(var w) { + if (!empty(parse_warning)) { + w |> write(parse_warning) + } + w |> write("# {doc.frontmatter.title}\n") + w |> write("slug: {doc.frontmatter.slug}\n") + if (!empty(doc.frontmatter.created)) { + w |> write("created: {doc.frontmatter.created}\n") + } + if (!empty(doc.frontmatter.last_verified)) { + w |> write("last_verified: {doc.frontmatter.last_verified}\n") + } + if (!empty(doc.frontmatter.links)) { + let links_str = doc.frontmatter.links |> join(", ") + w |> write("links: {links_str}\n") + } + if (!empty(froms)) { + let froms_str = froms |> join(", ") + w |> write("linked from: {froms_str}\n") + } + w |> write("\n---\n\n") + w |> write(doc.body) + w |> write("\n\nHint: if this answer is stale or wrong, edit this .md directly and bump `last_verified`. Delete it if the answer is no longer correct or the question no longer comes up.") + } + } + return make_tool_result(output, is_error) +} + +def tool_rebuild(args : JsonValue?) : string { + let root = resolve_root(get_string_arg(args, "root")) + var n = 0 + with_index_db(root) $(db) { + n = rebuild(db, root) + } + return make_tool_result("rebuilt: {n} doc(s) in {root}", false) +} + +def handle_tools_call(id : string; params : JsonValue?) : string { + if (params == null) { + return jsonrpc_error(id, -32602, "missing params") + } + let name_v = params?.name + if (name_v == null || !(name_v.value is _string)) { + return jsonrpc_error(id, -32602, "missing tool name") + } + let name = name_v.value as _string + let args = params?.arguments + var result : string + if (name == "mouse__ask") { + result = tool_ask(args) + } elif (name == "mouse__add") { + result = tool_add(args) + } elif (name == "mouse__get") { + result = tool_get(args) + } elif (name == "mouse__rebuild") { + result = tool_rebuild(args) + } else { + return jsonrpc_error(id, -32602, "unknown tool: {name}") + } + return jsonrpc_response(id, result) +} + +def dispatch_jsonrpc(body : string) : string { + var err : string + var parsed = read_json(body, err) + if (parsed == null) { + return jsonrpc_error("null", -32700, "parse error: {err}") + } + let id = jsonvalue_id_to_string(parsed?.id) + let method_v = parsed?.method + if (method_v == null || !(method_v.value is _string)) { + unsafe { + delete parsed + } + return jsonrpc_error(id, -32600, "missing method") + } + let method = method_v.value as _string + var result : string + if (method == "initialize") { + result = handle_initialize(id) + } elif (method == "notifications/initialized" || method == "initialized") { + unsafe { + delete parsed + } + return "" + } elif (method == "tools/list") { + result = handle_tools_list(id) + } elif (method == "tools/call") { + result = handle_tools_call(id, parsed?.params) + } elif (method == "ping") { + result = jsonrpc_response(id, "\{\}") + } else { + result = jsonrpc_error(id, -32601, "method not found: {method}") + } + unsafe { + delete parsed + } + return result +} + +let HEAP_COLLECT_THRESHOLD = uint64(1024 * 1024) + +def cmd_serve(args : MouseArgs) { + let sin = fstdin() + let sout = fstdout() + while (!feof(sin)) { + let msg = build_string() $(var w) { + while (!feof(sin)) { + let chunk = fgets(sin) + let n = length(chunk) + if (n == 0) { + break + } + if (character_at(chunk, n - 1) == '\n') { // nolint:PERF003 + if (n > 1 && character_at(chunk, n - 2) == '\r') { // nolint:PERF003 + w |> write(slice(chunk, 0, n - 2)) + } else { + w |> write(slice(chunk, 0, n - 1)) + } + break + } + w |> write(chunk) + } + } + if (empty(msg)) { + continue + } + let response = dispatch_jsonrpc(msg) + if (!empty(response)) { + fprint(sout, response) + fprint(sout, "\n") + fflush(sout) + } + let heap_used = heap_bytes_allocated() + let str_used = string_heap_bytes_allocated() + if (heap_used > HEAP_COLLECT_THRESHOLD || str_used > HEAP_COLLECT_THRESHOLD) { + unsafe { + heap_collect(str_used > HEAP_COLLECT_THRESHOLD, false) + } + } + } +} + +// ─── main ──────────────────────────────────────────────────────────── + +[export] +def main() { + var args_r <- parse_args(type) + if (args_r |> is_err) { + to_log(LOG_ERROR, "error: {args_r |> unwrap_err}") + print_help(get_command_info(type), "mouse") + return + } + let args <- args_r |> move_unwrap + if (args.help) { + print_help(get_command_info(type), "mouse") + return + } + let cmd = positional_value(args.command) + if (empty(cmd)) { + print_help(get_command_info(type), "mouse") + return + } + if (cmd == "ask") { + cmd_ask(args) + } elif (cmd == "add") { + cmd_add(args) + } elif (cmd == "get") { + cmd_get(args) + } elif (cmd == "rebuild") { + cmd_rebuild(args) + } elif (cmd == "log") { + cmd_log(args) + } elif (cmd == "serve") { + cmd_serve(args) + } else { + to_log(LOG_ERROR, "unknown command: {cmd}") + print_help(get_command_info(type), "mouse") + } +} diff --git a/utils/mouse/store.das b/utils/mouse/store.das new file mode 100644 index 0000000000..4514439723 --- /dev/null +++ b/utils/mouse/store.das @@ -0,0 +1,375 @@ +// blind-mouse — frontmatter/body parsing, slug generation, body hash, file IO. +// No DB knowledge here. See index.das for SQLite + retrieval. + +options gen2 +options persistent_heap + +require strings public +require daslib/strings_boost public +require daslib/fio public + +// Slug length cap. Shared by `is_valid_slug`, `slug_from_title`, and the +// suffix-fits-too math in `dedupe_slug`. Untrusted callers (MCP/CLI) hit +// `is_valid_slug` so any auto-generated slug must stay within this limit +// or the resulting doc would be unfetchable. +let MAX_SLUG_LEN = 128 + +struct Frontmatter { + slug : string + title : string + created : string + last_verified : string + links : array +} + +struct ParsedDoc { + frontmatter : Frontmatter + body : string + questions : array + body_hash : string + parse_error : string // empty on success +} + +// ─── frontmatter ───────────────────────────────────────────────────── + +def private parse_inline_list(value : string) : array { + var out : array + let trimmed = strip(value) + // The trailing-`]` check uses character_at to keep the guard a one-liner. + if (length(trimmed) < 2 || first_character(trimmed) != '[' || character_at(trimmed, length(trimmed) - 1) != ']') { // nolint:PERF003 + return <- out + } + let inner = slice(trimmed, 1, length(trimmed) - 1) + if (empty(strip(inner))) { + return <- out + } + for (item in split(inner, ",")) { + let cleaned = strip(item) + if (!empty(cleaned)) { + out |> push(cleaned) + } + } + return <- out +} + +def private parse_kv_line(line : string; var key : string&; var value : string&) : bool { + let colon = find(line, ":") + if (colon < 0) { + return false + } + key = strip(slice(line, 0, colon)) + value = strip(slice(line, colon + 1)) + return true +} + +// Returns body + parse_error. On no frontmatter at all, returns the +// whole input as body with empty error. On opening fence without +// closing fence, returns whole input with parse_error set. +def parse_frontmatter_text(text : string; var fm : Frontmatter) : tuple { + let lines <- split(text, "\n") + if (empty(lines)) { + return (body = "", parse_error = "") + } + if (strip(lines[0]) != "---") { + return (body = text, parse_error = "") + } + var close_idx = -1 + for (i in range(1, length(lines))) { + if (strip(lines[i]) == "---") { + close_idx = i + break + } + } + if (close_idx < 0) { + return (body = text, parse_error = "missing closing frontmatter fence") + } + for (i in range(1, close_idx)) { + let stripped = strip(lines[i]) + if (empty(stripped)) { + continue + } + var key : string + var value : string + if (!parse_kv_line(stripped, key, value)) { + continue + } + if (key == "slug") { + fm.slug = value + } elif (key == "title") { + fm.title = value + } elif (key == "created") { + fm.created = value + } elif (key == "last_verified") { + fm.last_verified = value + } elif (key == "links") { + fm.links <- parse_inline_list(value) + } + } + let body = build_string() $(var w) { + for (i in range(close_idx + 1, length(lines))) { + if (i > close_idx + 1) { + w |> write("\n") + } + w |> write(lines[i]) + } + } + return (body = body, parse_error = "") +} + +// ─── ## Questions section ───────────────────────────────────────────── + +def parse_questions(body : string) : array { + var out : array + let lines <- split(body, "\n") + var in_section = false + for (i in range(length(lines))) { + let stripped = strip(lines[i]) + if (!in_section) { + if (to_lower(stripped) == "## questions") { + in_section = true + } + continue + } + if (starts_with(stripped, "#")) { + break + } + if (starts_with(stripped, "- ")) { + let q = strip(slice(stripped, 2)) + if (!empty(q)) { + out |> push(q) + } + } + } + return <- out +} + +// ─── slug generation ───────────────────────────────────────────────── + +def slug_from_title(title : string) : string { + let lower = to_lower(title) + let raw = build_string() $(var w) { + var prev_dash = true + let n = length(lower) + for (i in range(n)) { + let c = character_at(lower, i) // nolint:PERF003 + // `lower` is already lowercased, so is_alnum's uppercase half can never match — equivalent to the explicit ranges. + if (is_alnum(c)) { + w |> write(slice(lower, i, i + 1)) + prev_dash = false + } elif (!prev_dash) { + w |> write("-") + prev_dash = true + } + } + } + var s = 0 + var e = length(raw) + while (s < e && character_at(raw, s) == '-') { // nolint:PERF003 + s ++ + } + while (e > s && character_at(raw, e - 1) == '-') { // nolint:PERF003 + e -- + } + // Cap to MAX_SLUG_LEN so a long title can't produce a slug that + // `is_valid_slug` later rejects (making the doc unfetchable). Re-trim + // trailing dashes since the cut may land on a `-`. + if (e - s > MAX_SLUG_LEN) { + e = s + MAX_SLUG_LEN + while (e > s && character_at(raw, e - 1) == '-') { // nolint:PERF003 + e -- + } + } + return s < e ? slice(raw, s, e) : "" +} + +// A slug is the on-disk filename and SQL primary key. Untrusted input +// (MCP JSON-RPC, CLI args) flows here, so reject anything that could +// escape /docs/ or break path joins. Shape: lowercase alnum, dash, +// underscore; must start with alnum; capped at MAX_SLUG_LEN chars. +def is_valid_slug(s : string) : bool { + let n = length(s) + if (n == 0 || n > MAX_SLUG_LEN) { + return false + } + var ok = true + peek_data(s) $(bytes) { + // Slugs must be lowercase-only — using is_alnum here would silently allow + // uppercase letters and break the on-disk filename round-trip on + // case-sensitive filesystems. The explicit ranges intentionally reject + // uppercase, so the broader is_alnum() helper is wrong here. + let first = int(bytes[0]) + let first_ok = (first >= '0' && first <= '9') || (first >= 'a' && first <= 'z') // nolint:PERF014 + if (!first_ok) { + ok = false + return + } + for (b in bytes) { + let c = int(b) + let valid = (c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || c == '-' || c == '_' // nolint:PERF014 + if (!valid) { + ok = false + return + } + } + } + return ok +} + +// Append `-2`, `-3`, ... to `base` until a free slot is found. Preserves +// `base` verbatim when there's no collision — does NOT re-slugify, so a +// valid hint with `_` keeps its `_`. On collision, truncates `base` only +// as much as needed to fit the suffix within MAX_SLUG_LEN. +def dedupe_slug(base : string; existing : table) : string { + if (empty(base)) { + return "" + } + if (!key_exists(existing, base)) { + return base + } + // Suffix is at most `-99999` (6 chars). Cap base to leave room. + let max_base = MAX_SLUG_LEN - 6 + var capped = length(base) > max_base ? slice(base, 0, max_base) : base + while (!empty(capped) && character_at(capped, length(capped) - 1) == '-') { // nolint:PERF003 + capped = slice(capped, 0, length(capped) - 1) + } + if (empty(capped)) { + return "" + } + if (capped != base && !key_exists(existing, capped)) { + return capped + } + var n = 2 + while (n < 100000) { + let candidate = "{capped}-{n}" + if (!key_exists(existing, candidate)) { + return candidate + } + n++ + } + panic("slug collision overflow for {capped}") + return "" +} + +def slug_from_title_dedupe(title : string; existing : table) : string { + return dedupe_slug(slug_from_title(title), existing) +} + +// Strip CR/LF (and surrounding whitespace) from a question. Without this, +// MCP-supplied newlines in `question` would split the YAML `title:` line +// across multiple lines and could inject sibling keys into frontmatter. +// Callers must treat empty-after-sanitize as a rejection. +def sanitize_question(q : string) : string { + let cleaned = build_string() $(var w) { + let n = length(q) + for (i in range(n)) { + let c = character_at(q, i) // nolint:PERF003 + if (c == '\n' || c == '\r') { + w |> write(" ") + } else { + w |> write(slice(q, i, i + 1)) + } + } + } + return strip(cleaned) +} + +// ─── body hash (FNV-1a 64-bit, decimal) ────────────────────────────── + +def hash_body(body : string) : string { + var h = 0xcbf29ce484222325ul + let prime = 0x100000001b3ul + peek_data(body) $(bytes) { + for (b in bytes) { + h ^= uint64(b) + h *= prime + } + } + return "{h}" +} + +// ─── parse + read + write ──────────────────────────────────────────── + +def parse_doc_text(text : string) : ParsedDoc { + var doc : ParsedDoc + let r <- parse_frontmatter_text(text, doc.frontmatter) + doc.body = r.body + doc.parse_error = r.parse_error + doc.questions <- parse_questions(doc.body) + doc.body_hash = hash_body(doc.body) + return <- doc +} + +def read_doc_text(path : string; var error : string&) : string { + error = "" + var content : string + var ok = false + fopen(path, "rb") $(f) { + if (f == null) { + return + } + content := unsafe(fread_to_eof(f)) + ok = true + } + if (!ok) { + error = "could not open {path}" + } + return content +} + +def read_doc(path : string; var doc : ParsedDoc; var error : string&) : bool { + error = "" + let text = read_doc_text(path, error) + if (!empty(error)) { + return false + } + doc <- parse_doc_text(text) + return true +} + +def serialize_doc(fm : Frontmatter; body : string) : string { + return build_string() $(var w) { + w |> write("---\n") + if (!empty(fm.slug)) { + w |> write("slug: {fm.slug}\n") + } + if (!empty(fm.title)) { + w |> write("title: {fm.title}\n") + } + if (!empty(fm.created)) { + w |> write("created: {fm.created}\n") + } + if (!empty(fm.last_verified)) { + w |> write("last_verified: {fm.last_verified}\n") + } + w |> write("links: [") + for (i in range(length(fm.links))) { + if (i > 0) { + w |> write(", ") + } + w |> write(fm.links[i]) + } + w |> write("]\n") + w |> write("---\n\n") + w |> write(body) + if (!empty(body) && character_at(body, length(body) - 1) != '\n') { // nolint:PERF003 + w |> write("\n") + } + } +} + +def write_doc(path : string; fm : Frontmatter; body : string; var error : string&) : bool { + error = "" + let text = serialize_doc(fm, body) + var ok = false + fopen(path, "wb") $(f) { + if (f == null) { + return + } + f |> fprint(text) + ok = true + } + if (!ok) { + error = "could not write {path}" + } + return ok +} diff --git a/utils/mouse/tests/fixtures/malformed-frontmatter.md b/utils/mouse/tests/fixtures/malformed-frontmatter.md new file mode 100644 index 0000000000..fd9ed38564 --- /dev/null +++ b/utils/mouse/tests/fixtures/malformed-frontmatter.md @@ -0,0 +1,9 @@ +--- +slug: malformed +title: Frontmatter without closing fence + +Body that should still be readable as the body, but the parser should +flag a parse_error because the closing `---` is missing. + +## Questions +- This bullet should NOT be extracted because the frontmatter never closed. diff --git a/utils/mouse/tests/fixtures/no-frontmatter.md b/utils/mouse/tests/fixtures/no-frontmatter.md new file mode 100644 index 0000000000..03fb111a4f --- /dev/null +++ b/utils/mouse/tests/fixtures/no-frontmatter.md @@ -0,0 +1,5 @@ +This file has no frontmatter at all. The whole text is the body. + +## Questions +- Does parse still work without frontmatter? +- no-frontmatter check diff --git a/utils/mouse/tests/fixtures/sample-doc-minimal.md b/utils/mouse/tests/fixtures/sample-doc-minimal.md new file mode 100644 index 0000000000..641b0ab20a --- /dev/null +++ b/utils/mouse/tests/fixtures/sample-doc-minimal.md @@ -0,0 +1,6 @@ +--- +slug: sample-doc-minimal +title: Minimal frontmatter fixture +--- + +Body with only the required frontmatter fields. No links, no last_verified, no created date, no Questions section. diff --git a/utils/mouse/tests/fixtures/sample-doc.md b/utils/mouse/tests/fixtures/sample-doc.md new file mode 100644 index 0000000000..c1b2da433d --- /dev/null +++ b/utils/mouse/tests/fixtures/sample-doc.md @@ -0,0 +1,20 @@ +--- +slug: sample-doc +title: Canonical full doc fixture +created: 2026-05-08 +last_verified: 2026-05-08 +links: [other-slug, another-slug] +--- + +This is the canonical fixture body. It has prose, then a list: + +- one +- two +- three + +And then a `## Questions` section so the parser has something to extract. + +## Questions +- How do I do the canonical thing? +- canonical fixture +- sample doc question diff --git a/utils/mouse/tests/test_index.das b/utils/mouse/tests/test_index.das new file mode 100644 index 0000000000..248daf0e2c --- /dev/null +++ b/utils/mouse/tests/test_index.das @@ -0,0 +1,782 @@ +options gen2 +options no_unused_function_arguments = false +options no_unused_block_arguments = false + +require dastest/testing_boost public +require strings +require daslib/fio +require ../store.das +require ../index.das + +// ─── helpers ───────────────────────────────────────────────────────── + +def make_temp_root(t : T?) : string { + let r = create_temp_directory_result("mouse_test") + if (!(r is value)) { + t |> failure("could not create temp dir: {unsafe(r.error)}") + return "" + } + let root = unsafe(r.value) + mkdir_rec(path_join(root, "docs")) + return root +} + +def cleanup_root(root : string) { + if (!empty(root)) { + rmdir_rec(root) + } +} + +def seed_doc(t : T?; root : string; + slug : string; title : string; body : string; + links : array; questions : array) { + var fm = Frontmatter( + slug = slug, + title = title, + created = "2026-05-08", + last_verified = "2026-05-08") + fm.links |> reserve(length(links)) + for (l in links) { + fm.links |> push(l) + } + let composed = build_string() $(var w) { + w |> write(body) + if (!empty(body) && character_at(body, length(body) - 1) != '\n') { // nolint:PERF003 + w |> write("\n") + } + if (!empty(questions)) { + w |> write("\n## Questions\n") + for (q in questions) { + w |> write("- {q}\n") + } + } + } + let path = path_join(path_join(root, "docs"), "{slug}.md") + var err : string + let ok = write_doc(path, fm, composed, err) + t |> success(ok, "seed {slug}: {err}") +} + +// ─── rebuild ───────────────────────────────────────────────────────── + +[test] +def test_rebuild_empty(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + var n = -1 + with_index_db(root) $(db) { + n = rebuild(db, root) + } + t |> equal(n, 0) + cleanup_root(root) +} + +[test] +def test_rebuild_picks_up_files(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs1 <- ["one a", "one b"] + let qs2 <- ["two a"] + let qs3 <- ["three a"] + seed_doc(t, root, "alpha", "Alpha title", "alpha body", no_links, qs1) + seed_doc(t, root, "beta", "Beta title", "beta body", no_links, qs2) + seed_doc(t, root, "gamma", "Gamma title", "gamma body", no_links, qs3) + var n = -1 + var n_docs = -1 + with_index_db(root) $(db) { + n = rebuild(db, root) + let rows <- _sql(db |> select_from(type)) + n_docs = length(rows) + } + t |> equal(n, 3) + t |> equal(n_docs, 3) + cleanup_root(root) +} + +[test] +def test_rebuild_idempotent(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["q one"] + seed_doc(t, root, "alpha", "Alpha title", "alpha body", no_links, qs) + var n1 = -1 + var n2 = -1 + var doc_count_after = -1 + with_index_db(root) $(db) { + n1 = rebuild(db, root) + n2 = rebuild(db, root) + let rows <- _sql(db |> select_from(type)) + doc_count_after = length(rows) + } + t |> equal(n1, 1) + t |> equal(n2, 1) + t |> equal(doc_count_after, 1) + cleanup_root(root) +} + +[test] +def test_rebuild_removes_deleted(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs1 <- ["a"] + let qs2 <- ["b"] + seed_doc(t, root, "alpha", "Alpha", "alpha body", no_links, qs1) + seed_doc(t, root, "beta", "Beta", "beta body", no_links, qs2) + var n_first = -1 + var n_after_delete = -1 + var has_alpha_after = false + var has_beta_after = false + with_index_db(root) $(db) { + n_first = rebuild(db, root) + // Delete beta from disk and rebuild. + remove(path_join(path_join(root, "docs"), "beta.md")) + n_after_delete = rebuild(db, root) + let rows <- _sql(db |> select_from(type) |> _select(_.slug)) + for (s in rows) { + if (s == "alpha") { + has_alpha_after = true + } + if (s == "beta") { + has_beta_after = true + } + } + } + t |> equal(n_first, 2) + t |> equal(n_after_delete, 1) + t |> success(has_alpha_after, "alpha kept") + t |> success(!has_beta_after, "beta removed") + cleanup_root(root) +} + +// ─── search ────────────────────────────────────────────────────────── + +[test] +def test_search_returns_top_match(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs1 <- ["how do typefunction macros work"] + let qs2 <- ["how do I install daspkg"] + seed_doc(t, root, "doc-a", "Typefunction macros", "details about typefunction", no_links, qs1) + seed_doc(t, root, "doc-b", "Daspkg install", "details about daspkg", no_links, qs2) + var top_slug : string + var n_results = 0 + with_index_db(root) $(db) { + rebuild(db, root) + let hits <- search(db, "typefunction", 5) + n_results = length(hits) + if (n_results > 0) { + top_slug = hits[0].slug + } + } + t |> success(n_results > 0, "non-empty results for 'typefunction'") + t |> equal(top_slug, "doc-a") + cleanup_root(root) +} + +[test] +def test_search_empty_corpus(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + var n_results = -1 + with_index_db(root) $(db) { + rebuild(db, root) + let hits <- search(db, "anything", 5) + n_results = length(hits) + } + t |> equal(n_results, 0) + cleanup_root(root) +} + +[test] +def test_search_no_match(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["the canonical question"] + seed_doc(t, root, "doc-a", "Some title", "some body", no_links, qs) + var n_results = -1 + with_index_db(root) $(db) { + rebuild(db, root) + let hits <- search(db, "zzqxxgibberish", 5) + n_results = length(hits) + } + t |> equal(n_results, 0) + cleanup_root(root) +} + +[test] +def test_search_respects_k(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + for (i in range(5)) { + let qs <- ["foo bar question {i}"] + seed_doc(t, root, "doc-{i}", "Title foo {i}", "body foo {i}", no_links, qs) + } + var n_results = -1 + with_index_db(root) $(db) { + rebuild(db, root) + let hits <- search(db, "foo", 2) + n_results = length(hits) + } + t |> equal(n_results, 2) + cleanup_root(root) +} + +// SQLite treats LIMIT -1 as "no limit", so a non-positive k must short- +// circuit before reaching the SQL layer. +[test] +def test_search_k_zero_returns_empty(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["foo question"] + seed_doc(t, root, "alpha", "Foo title", "foo body", no_links, qs) + var n_zero = -1 + var n_neg = -1 + with_index_db(root) $(db) { + rebuild(db, root) + let h0 <- search(db, "foo", 0) + n_zero = length(h0) + let hn <- search(db, "foo", -3) + n_neg = length(hn) + } + t |> equal(n_zero, 0) + t |> equal(n_neg, 0) + cleanup_root(root) +} + +// Tabs in the query previously survived sanitize then sat inside a single +// `split(" ")` token, breaking matches. Verify a tab-separated query +// still matches a doc whose body contains the words. +[test] +def test_search_handles_tab_in_query(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["sample"] + seed_doc(t, root, "alpha", "Tab Match", "alpha gamma sample body", no_links, qs) + var hits_count = -1 + var top_slug = "" + with_index_db(root) $(db) { + rebuild(db, root) + let hits <- search(db, "alpha\tgamma", 5) + hits_count = length(hits) + if (!empty(hits)) { + top_slug = hits[0].slug + } + } + t |> success(hits_count >= 1, "tab-separated query produced a hit") + t |> equal(top_slug, "alpha") + cleanup_root(root) +} + +// ─── cross-refs ────────────────────────────────────────────────────── + +[test] +def test_links_table_populated(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let links_a <- ["other-slug"] + let no_q : array + seed_doc(t, root, "doc-a", "Title A", "body A", links_a, no_q) + var links : array> + with_index_db(root) $(db) { + rebuild(db, root) + let rows <- _sql(db |> select_from(type) |> _select((from_slug = _.from_slug, to_slug = _.to_slug))) + for (r in rows) { + links |> push(r) + } + } + t |> equal(length(links), 1) + if (!empty(links)) { + t |> equal(links[0].from_slug, "doc-a") + t |> equal(links[0].to_slug, "other-slug") + } + cleanup_root(root) +} + +[test] +def test_reverse_index(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let links_a <- ["target"] + let links_b <- ["target"] + let no_q : array + seed_doc(t, root, "alpha", "A", "a body", links_a, no_q) + seed_doc(t, root, "beta", "B", "b body", links_b, no_q) + var froms : array + with_index_db(root) $(db) { + rebuild(db, root) + froms <- linked_from(db, "target") + } + t |> equal(length(froms), 2) + if (length(froms) >= 2) { + t |> equal(froms[0], "alpha") + t |> equal(froms[1], "beta") + } + cleanup_root(root) +} + +// ─── dupe-on-add gate ──────────────────────────────────────────────── + +[test] +def test_dupe_check_finds_similar(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["how to write typefunction macros"] + seed_doc(t, root, "doc-a", "Writing typefunction macros", "details", no_links, qs) + var n_similar = -1 + var top_slug : string + with_index_db(root) $(db) { + rebuild(db, root) + let similar <- dupe_check(db, "typefunction", 5) + n_similar = length(similar) + if (n_similar > 0) { + top_slug = similar[0].slug + } + } + t |> success(n_similar > 0, "expected at least one match") + t |> equal(top_slug, "doc-a") + cleanup_root(root) +} + +[test] +def test_dupe_check_no_match(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["alpha question"] + seed_doc(t, root, "doc-a", "Alpha", "alpha body", no_links, qs) + var n_similar = -1 + with_index_db(root) $(db) { + rebuild(db, root) + let similar <- dupe_check(db, "zzqxxgibberish", 5) + n_similar = length(similar) + } + t |> equal(n_similar, 0) + cleanup_root(root) +} + +[test] +def test_add_dupe_gate_no_force(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["how to typefunction"] + seed_doc(t, root, "doc-a", "Typefunction", "typefunction body", no_links, qs) + var created = true + var n_similar = -1 + var n_files_after = -1 + with_index_db(root) $(db) { + rebuild(db, root) + let outcome <- add_doc(db, root, "typefunction question paraphrase", "answer body", "", false) + created = outcome.created + n_similar = length(outcome.similar) + let files <- list_doc_files(root) + n_files_after = length(files) + } + t |> success(!created, "should not create when similar exists") + t |> success(n_similar > 0, "should return at least one similar") + t |> equal(n_files_after, 1) // only doc-a, no new file written + cleanup_root(root) +} + +[test] +def test_add_doc_rejects_traversal_slug_hint(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + var created = false + var produced_slug : string + var written_under_docs = false + var n_files_after = -1 + with_index_db(root) $(db) { + rebuild(db, root) + let outcome <- add_doc(db, root, "Some Question", "answer body", "../escape", true) + created = outcome.created + produced_slug = outcome.slug + let docs = docs_dir(root) + written_under_docs = starts_with(outcome.written_path, docs) + let files <- list_doc_files(root) + n_files_after = length(files) + } + t |> success(created, "fail-soft: bad slug_hint should still create from title") + t |> equal(produced_slug, "some-question") + t |> success(written_under_docs, "written path stays under docs dir: got {produced_slug}") + t |> equal(n_files_after, 1) + cleanup_root(root) +} + +// Newlines in `question` would otherwise split the YAML `title:` line and +// could inject sibling frontmatter keys. +[test] +def test_add_doc_sanitizes_question_newlines(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + var written_path : string + var produced_slug : string + with_index_db(root) $(db) { + rebuild(db, root) + let injected = "Hello\nslug: hijacked" + let outcome <- add_doc(db, root, injected, "answer", "", true) + written_path = outcome.written_path + produced_slug = outcome.slug + } + var doc : ParsedDoc + var err : string + let ok = read_doc(written_path, doc, err) + t |> success(ok, "read back: {err}") + t |> success(find(doc.frontmatter.title, "\n") < 0, "no newlines in title: {doc.frontmatter.title}") + t |> success(produced_slug != "hijacked", "slug not hijacked: {produced_slug}") + t |> success(doc.frontmatter.slug != "hijacked", "frontmatter slug not hijacked: {doc.frontmatter.slug}") + cleanup_root(root) +} + +[test] +def test_add_doc_rejects_blank_question(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + var created = false + var error : string + var n_files_after = -1 + with_index_db(root) $(db) { + rebuild(db, root) + let outcome <- add_doc(db, root, " \n\r ", "answer", "", true) + created = outcome.created + error = outcome.error + let files <- list_doc_files(root) + n_files_after = length(files) + } + t |> success(!created, "must not create with blank question") + t |> success(!empty(error), "error message populated: {error}") + t |> equal(n_files_after, 0) + cleanup_root(root) +} + +// A doc accidentally listing the same link twice in frontmatter must not +// abort the rebuild transaction on UNIQUE(from_slug, to_slug). +[test] +def test_rebuild_dedupes_duplicate_links(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let dup_links <- ["target", "target", "target"] + let no_q : array + seed_doc(t, root, "alpha", "Alpha", "alpha body", dup_links, no_q) + var n_links_after = -1 + var n_docs = -1 + with_index_db(root) $(db) { + rebuild(db, root) + let link_rows <- _sql(db |> select_from(type)) + n_links_after = length(link_rows) + let doc_rows <- _sql(db |> select_from(type)) + n_docs = length(doc_rows) + } + t |> equal(n_links_after, 1) + t |> equal(n_docs, 1) // doc itself still indexed + cleanup_root(root) +} + +// `cmd_get`/`tool_get` build the path as `/{slug}.md`, never reading +// `DocRow.path`. A file whose basename diverges from its frontmatter slug +// (rename without re-frontmatter, or hand-edited slug) would be searchable +// but unfetchable. Skip+warn during rebuild so search results never point +// at unfetchable docs. +[test] +def test_rebuild_skips_basename_slug_mismatch(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let no_q : array + seed_doc(t, root, "good", "Good", "good body", no_links, no_q) + // Frontmatter slug `actual-slug` written into a file named `weird.md`. + let fm = Frontmatter( + slug = "actual-slug", + title = "Mismatch", + created = "2026-05-08", + last_verified = "2026-05-08") + var werr : string + let weird_path = path_join(path_join(root, "docs"), "weird.md") + let wrote = write_doc(weird_path, fm, "weird body\n", werr) + t |> success(wrote, "seed mismatched basename: {werr}") + var n = -1 + var slugs : array + with_index_db(root) $(db) { + n = rebuild(db, root) + let rows <- _sql(db |> select_from(type) |> _select(_.slug)) + for (s in rows) { + slugs |> push(s) + } + } + t |> equal(n, 1) // only `good`; mismatched-basename doc skipped + t |> equal(length(slugs), 1) + if (!empty(slugs)) { + t |> equal(slugs[0], "good") + } + cleanup_root(root) +} + +// FTS5 keywords (OR/AND/NOT/NEAR) are uppercase-only operators, and tokens +// are OR-joined raw (no quoting). `foo OR bar` would tokenize to +// [foo, OR, bar] and join to `foo OR OR OR bar` — invalid syntax, caught, +// empty result. Downcasing every token makes user-typed keywords inert. +[test] +def test_search_handles_uppercase_fts_keywords(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["alpha"] + seed_doc(t, root, "alpha", "Alpha title", "alpha bravo body", no_links, qs) + var hits_count = -1 + var top_slug = "" + with_index_db(root) $(db) { + rebuild(db, root) + let hits <- search(db, "alpha OR bravo", 5) + hits_count = length(hits) + if (!empty(hits)) { + top_slug = hits[0].slug + } + } + t |> success(hits_count >= 1, "uppercase OR token must not break query") + t |> equal(top_slug, "alpha") + cleanup_root(root) +} + +// A hand-edited `.md` with an invalid frontmatter slug (uppercase, dot, +// leading underscore, etc.) would otherwise be indexed but unretrievable +// via cmd_get/tool_get since they enforce is_valid_slug. Skip+warn so +// search results never point at unfetchable docs. +[test] +def test_rebuild_skips_invalid_frontmatter_slug(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let no_q : array + seed_doc(t, root, "good", "Good", "good body", no_links, no_q) + let fm = Frontmatter( + slug = "Bad.Slug", + title = "Bad", + created = "2026-05-08", + last_verified = "2026-05-08") + var werr : string + let bad_path = path_join(path_join(root, "docs"), "bad.md") + let wrote = write_doc(bad_path, fm, "bad body\n", werr) + t |> success(wrote, "seed bad slug: {werr}") + var n = -1 + var slugs : array + with_index_db(root) $(db) { + n = rebuild(db, root) + let rows <- _sql(db |> select_from(type) |> _select(_.slug)) + for (s in rows) { + slugs |> push(s) + } + } + t |> equal(n, 1) // only `good` indexed; bad-slug doc skipped + t |> equal(length(slugs), 1) + if (!empty(slugs)) { + t |> equal(slugs[0], "good") + } + cleanup_root(root) +} + +[test] +def test_add_doc_accepts_valid_slug_hint(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + var produced_slug : string + with_index_db(root) $(db) { + rebuild(db, root) + let outcome <- add_doc(db, root, "Some Question", "answer body", "my-slug", true) + produced_slug = outcome.slug + } + t |> equal(produced_slug, "my-slug") + cleanup_root(root) +} + +// ─── query log ─────────────────────────────────────────────────────── + +[test] +def test_log_query_records_hit(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["how to typefunction"] + seed_doc(t, root, "alpha", "Typefunction macros", "alpha body", no_links, qs) + var rows : array + with_index_db(root) $(db) { + rebuild(db, root) + let hits <- search(db, "typefunction", 5) + log_query(db, "typefunction", hits, "cli") + rows <- _sql(db |> select_from(type)) + } + t |> equal(length(rows), 1) + if (!empty(rows)) { + t |> equal(rows[0].question, "typefunction") + t |> equal(rows[0].source, "cli") + t |> success(rows[0].match_count >= 1, "match_count populated") + t |> equal(rows[0].top_slug, "alpha") + t |> success(!empty(rows[0].asked_at), "asked_at populated") + } + cleanup_root(root) +} + +[test] +def test_log_query_records_miss(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["alpha question"] + seed_doc(t, root, "alpha", "Alpha", "alpha body", no_links, qs) + var rows : array + with_index_db(root) $(db) { + rebuild(db, root) + let hits <- search(db, "zzqxxgibberish", 5) + log_query(db, "zzqxxgibberish", hits, "mcp") + rows <- _sql(db |> select_from(type)) + } + t |> equal(length(rows), 1) + if (!empty(rows)) { + t |> equal(rows[0].match_count, 0) + t |> equal(rows[0].top_slug, "") + t |> equal(rows[0].source, "mcp") + } + cleanup_root(root) +} + +[test] +def test_rebuild_preserves_log(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["alpha question"] + seed_doc(t, root, "alpha", "Alpha", "alpha body", no_links, qs) + var n_before = -1 + var n_after = -1 + with_index_db(root) $(db) { + rebuild(db, root) + let hits <- search(db, "alpha", 5) + log_query(db, "alpha", hits, "cli") + let rows1 <- _sql(db |> select_from(type)) + n_before = length(rows1) + rebuild(db, root) + let rows2 <- _sql(db |> select_from(type)) + n_after = length(rows2) + } + t |> equal(n_before, 1) + t |> equal(n_after, 1) + cleanup_root(root) +} + +[test] +def test_recent_queries_filters_misses(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["alpha question"] + seed_doc(t, root, "alpha", "Alpha", "alpha body", no_links, qs) + var n_all = -1 + var n_misses = -1 + with_index_db(root) $(db) { + rebuild(db, root) + let h1 <- search(db, "alpha", 5) + log_query(db, "alpha", h1, "cli") + let h2 <- search(db, "zzqxxgibberish", 5) + log_query(db, "zzqxxgibberish", h2, "cli") + let all_rows <- recent_queries(db, 10, false) + let miss_rows <- recent_queries(db, 10, true) + n_all = length(all_rows) + n_misses = length(miss_rows) + } + t |> equal(n_all, 2) + t |> equal(n_misses, 1) + cleanup_root(root) +} + +[test] +def test_add_dupe_gate_force(t : T?) { + let root = make_temp_root(t) + if (empty(root)) { + return + } + let no_links : array + let qs <- ["how to typefunction"] + seed_doc(t, root, "doc-a", "Typefunction", "typefunction body", no_links, qs) + var created = false + var n_files_after = -1 + var found_via_search = false + with_index_db(root) $(db) { + rebuild(db, root) + let outcome <- add_doc(db, root, "completely different question xyz", "new answer body", "", true) + created = outcome.created + let files <- list_doc_files(root) + n_files_after = length(files) + let hits <- search(db, "completely different xyz", 5) + for (h in hits) { + if (h.slug == outcome.slug) { + found_via_search = true + } + } + } + t |> success(created, "should create with force=true") + t |> equal(n_files_after, 2) + t |> success(found_via_search, "newly added doc retrievable via search") + cleanup_root(root) +} diff --git a/utils/mouse/tests/test_store.das b/utils/mouse/tests/test_store.das new file mode 100644 index 0000000000..109632d1f1 --- /dev/null +++ b/utils/mouse/tests/test_store.das @@ -0,0 +1,316 @@ +options gen2 +options no_unused_function_arguments = false + +require dastest/testing_boost public +require strings +require daslib/fio +require ../store.das + + +def fixture_path(name : string) : string { + return "utils/mouse/tests/fixtures/{name}" +} + +def load_fixture(t : T?; name : string) : ParsedDoc { + var doc : ParsedDoc + var error : string + let ok = read_doc(fixture_path(name), doc, error) + t |> success(ok, "fixture {name} loads: {error}") + return <- doc +} + +// ─── frontmatter parsing ────────────────────────────────────────────── + +[test] +def test_parse_frontmatter_full(t : T?) { + let doc <- load_fixture(t, "sample-doc.md") + t |> equal(doc.frontmatter.slug, "sample-doc") + t |> equal(doc.frontmatter.title, "Canonical full doc fixture") + t |> equal(doc.frontmatter.created, "2026-05-08") + t |> equal(doc.frontmatter.last_verified, "2026-05-08") + t |> equal(length(doc.frontmatter.links), 2) + if (length(doc.frontmatter.links) >= 2) { + t |> equal(doc.frontmatter.links[0], "other-slug") + t |> equal(doc.frontmatter.links[1], "another-slug") + } + t |> success(empty(doc.parse_error), "no parse error: {doc.parse_error}") +} + +[test] +def test_parse_frontmatter_minimal(t : T?) { + let doc <- load_fixture(t, "sample-doc-minimal.md") + t |> equal(doc.frontmatter.slug, "sample-doc-minimal") + t |> equal(doc.frontmatter.title, "Minimal frontmatter fixture") + t |> equal(doc.frontmatter.created, "") + t |> equal(doc.frontmatter.last_verified, "") + t |> equal(length(doc.frontmatter.links), 0) + t |> success(empty(doc.parse_error), "no parse error: {doc.parse_error}") +} + +[test] +def test_parse_frontmatter_missing_fence(t : T?) { + let doc <- load_fixture(t, "no-frontmatter.md") + t |> equal(doc.frontmatter.slug, "") + t |> equal(doc.frontmatter.title, "") + t |> success(empty(doc.parse_error), "no fence is not an error: {doc.parse_error}") + // Body should contain the whole file content. + t |> success(find(doc.body, "no frontmatter at all") >= 0, "body has the file content") +} + +[test] +def test_parse_frontmatter_malformed(t : T?) { + let doc <- load_fixture(t, "malformed-frontmatter.md") + t |> success(!empty(doc.parse_error), "missing closing fence sets parse_error") + // Per locked-in behavior: body is the whole text on parse error + t |> success(find(doc.body, "Frontmatter without closing fence") >= 0, "body contains the original text on error") +} + +[test] +def test_parse_frontmatter_inline_links(t : T?) { + let text = "---\nslug: x\nlinks: [a, b, c]\n---\nbody" + let doc <- parse_doc_text(text) + t |> equal(length(doc.frontmatter.links), 3) + if (length(doc.frontmatter.links) >= 3) { + t |> equal(doc.frontmatter.links[0], "a") + t |> equal(doc.frontmatter.links[1], "b") + t |> equal(doc.frontmatter.links[2], "c") + } +} + +[test] +def test_parse_frontmatter_empty_inline_links(t : T?) { + let text = "---\nslug: x\nlinks: []\n---\nbody" + let doc <- parse_doc_text(text) + t |> equal(length(doc.frontmatter.links), 0) +} + +// ─── ## Questions section ───────────────────────────────────────────── + +[test] +def test_parse_questions_section(t : T?) { + let doc <- load_fixture(t, "sample-doc.md") + t |> equal(length(doc.questions), 3) + if (length(doc.questions) >= 3) { + t |> equal(doc.questions[0], "How do I do the canonical thing?") + t |> equal(doc.questions[1], "canonical fixture") + t |> equal(doc.questions[2], "sample doc question") + } +} + +[test] +def test_parse_questions_missing(t : T?) { + let doc <- load_fixture(t, "sample-doc-minimal.md") + t |> equal(length(doc.questions), 0) +} + +[test] +def test_parse_questions_stops_at_next_heading(t : T?) { + let text = "## Questions\n- one\n- two\n## Other\n- three\n" + let qs <- parse_questions(text) + t |> equal(length(qs), 2) + if (length(qs) >= 2) { + t |> equal(qs[0], "one") + t |> equal(qs[1], "two") + } +} + +// ─── slug generation ───────────────────────────────────────────────── + +[test] +def test_slug_from_title_basic(t : T?) { + t |> equal(slug_from_title("How do I X?"), "how-do-i-x") + t |> equal(slug_from_title("What's the pattern for foo?"), "what-s-the-pattern-for-foo") + t |> equal(slug_from_title("UPPERCASE"), "uppercase") + t |> equal(slug_from_title("multi spaces"), "multi-spaces") + t |> equal(slug_from_title("trim-trailing-"), "trim-trailing") + t |> equal(slug_from_title("-leading"), "leading") +} + +[test] +def test_slug_from_title_empty_returns_empty(t : T?) { + t |> equal(slug_from_title(""), "") + t |> equal(slug_from_title("???"), "") + t |> equal(slug_from_title(" "), "") +} + +[test] +def test_slug_dedupe_collision(t : T?) { + var existing : table + existing |> insert("foo") + existing |> insert("foo-2") + let s = slug_from_title_dedupe("Foo", existing) + t |> equal(s, "foo-3") +} + +[test] +def test_slug_dedupe_no_collision(t : T?) { + let existing : table + let s = slug_from_title_dedupe("Bar", existing) + t |> equal(s, "bar") +} + +[test] +def test_dedupe_slug_preserves_underscore(t : T?) { + var existing : table + existing |> insert("my_doc") + let s = dedupe_slug("my_doc", existing) + t |> equal(s, "my_doc-2") +} + +[test] +def test_dedupe_slug_no_collision_returns_verbatim(t : T?) { + let existing : table + let s = dedupe_slug("any-shape_keeps", existing) + t |> equal(s, "any-shape_keeps") +} + +// Long titles otherwise produce slugs that `is_valid_slug` rejects, making +// the doc unfetchable via cmd_get/tool_get. +[test] +def test_slug_from_title_caps_at_max_len(t : T?) { + let s = slug_from_title(repeat("a", MAX_SLUG_LEN * 2)) + t |> equal(length(s), MAX_SLUG_LEN) + t |> success(is_valid_slug(s), "capped slug passes is_valid_slug") +} + +// If the cap lands on a `-` (dash inserted between word boundaries), the +// trailing dash must be trimmed so the slug doesn't end with `-`. +[test] +def test_slug_from_title_cap_trims_trailing_dash(t : T?) { + // "aaa...(MAX-1) bbb...(200)" → raw has `-` exactly at position MAX-1. + let prefix = repeat("a", MAX_SLUG_LEN - 1) + let suffix = repeat("b", 200) + let title = "{prefix} {suffix}" + let s = slug_from_title(title) + t |> success(length(s) <= MAX_SLUG_LEN, "len {length(s)} <= {MAX_SLUG_LEN}") + t |> success(!ends_with(s, "-"), "no trailing dash after cap: {s}") +} + +// On collision, base must be capped enough to leave room for `-N` suffix +// so candidate stays within MAX_SLUG_LEN. +[test] +def test_dedupe_slug_caps_long_base_when_suffixing(t : T?) { + let huge = repeat("a", MAX_SLUG_LEN) + let huge_capped = repeat("a", MAX_SLUG_LEN - 6) + var existing : table + existing |> insert(huge) + existing |> insert(huge_capped) + let s = dedupe_slug(huge, existing) + t |> success(length(s) <= MAX_SLUG_LEN, "deduped len {length(s)} <= {MAX_SLUG_LEN}") + t |> success(is_valid_slug(s), "deduped slug passes is_valid_slug: {s}") + t |> equal(s, "{huge_capped}-2") +} + +// ─── question sanitization ────────────────────────────────────────── + +[test] +def test_sanitize_question_replaces_newlines(t : T?) { + t |> equal(sanitize_question("hello\nworld"), "hello world") + t |> equal(sanitize_question("a\r\nb"), "a b") + t |> equal(sanitize_question(" trim\nme "), "trim me") +} + +[test] +def test_sanitize_question_blank_returns_empty(t : T?) { + t |> equal(sanitize_question(" \n\r "), "") + t |> equal(sanitize_question(""), "") +} + +// ─── slug validation ───────────────────────────────────────────────── + +[test] +def test_is_valid_slug_accepts(t : T?) { + t |> success(is_valid_slug("alpha"), "alpha") + t |> success(is_valid_slug("alpha-beta"), "alpha-beta") + t |> success(is_valid_slug("a_b"), "a_b") + t |> success(is_valid_slug("a1"), "a1") + t |> success(is_valid_slug("why-darken-bd"), "why-darken-bd") + t |> success(is_valid_slug("0starts-with-digit"), "digit-start") +} + +[test] +def test_is_valid_slug_rejects(t : T?) { + t |> success(!is_valid_slug(""), "empty") + t |> success(!is_valid_slug("../etc"), "path traversal") + t |> success(!is_valid_slug("foo/bar"), "forward slash") + t |> success(!is_valid_slug("foo\\bar"), "back slash") + t |> success(!is_valid_slug("-leading"), "leading dash") + t |> success(!is_valid_slug("_leading"), "leading underscore") + t |> success(!is_valid_slug("foo.md"), "contains dot") + t |> success(!is_valid_slug("foo bar"), "space") + t |> success(!is_valid_slug("FOO"), "uppercase") + let huge = repeat("a", 200) + t |> success(!is_valid_slug(huge), "over 128 chars") +} + +// ─── read/write error contracts ────────────────────────────────────── + +[test] +def test_read_doc_clears_stale_error(t : T?) { + let r = create_temp_directory_result("mouse_test") + if (!(r is value)) { + t |> failure("could not create temp dir: {unsafe(r.error)}") + return + } + let root = unsafe(r.value) + let fm = Frontmatter(slug = "alpha", title = "Alpha", created = "2026-05-08", last_verified = "2026-05-08") + let path = path_join(root, "alpha.md") + var werr : string + let wrote = write_doc(path, fm, "body content\n", werr) + t |> success(wrote, "seed write: {werr}") + var doc : ParsedDoc + var err = "leftover from previous call" + let ok = read_doc(path, doc, err) + t |> success(ok, "read_doc succeeded") + t |> equal(err, "") + t |> equal(doc.frontmatter.slug, "alpha") + rmdir_rec(root) +} + +[test] +def test_read_doc_sets_error_on_missing(t : T?) { + var doc : ParsedDoc + var err : string + let ok = read_doc("/nonexistent-mouse-test-path/missing.md", doc, err) + t |> success(!ok, "read of missing path returns false") + t |> success(!empty(err), "error message populated") + t |> success(find(err, "missing.md") >= 0, "error mentions the path: {err}") +} + +// ─── body hash ─────────────────────────────────────────────────────── + +[test] +def test_body_hash_stable(t : T?) { + let h1 = hash_body("hello world") + let h2 = hash_body("hello world") + t |> equal(h1, h2) + let h3 = hash_body("hello worlD") // different last char + t |> success(h1 != h3, "different bodies produce different hashes") +} + +[test] +def test_body_hash_empty(t : T?) { + let h = hash_body("") + t |> success(!empty(h), "empty body still produces a hash string") +} + +// ─── round trip serialize/parse ────────────────────────────────────── + +[test] +def test_serialize_round_trip(t : T?) { + var fm = Frontmatter( + slug = "x", + title = "test title", + created = "2026-01-02", + last_verified = "2026-05-08") + fm.links |> push("a") + fm.links |> push("b") + let text = serialize_doc(fm, "body content here\n") + let doc <- parse_doc_text(text) + t |> equal(doc.frontmatter.slug, "x") + t |> equal(doc.frontmatter.title, "test title") + t |> equal(doc.frontmatter.created, "2026-01-02") + t |> equal(doc.frontmatter.last_verified, "2026-05-08") + t |> equal(length(doc.frontmatter.links), 2) + t |> success(find(doc.body, "body content here") >= 0, "body round-tripped") +}