Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions daslib/linq.das
Original file line number Diff line number Diff line change
Expand Up @@ -846,8 +846,7 @@ def skip_while_to_array(var src : iterator<auto(TT)>; predicate : block<(arg : T

def take(arr : array<auto(TT)>; var total : int) : array<TT -& -const> {
//! Yields only the first `total` elements
let len = length(arr)
let taking = (total < len) ? total : len
let taking = min(total, length(arr))
return <- subarray(arr, 0..taking)
}

Expand Down
2 changes: 1 addition & 1 deletion modules/dasSQLITE/daslib/sqlite_linq.das
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ def _first_opt(arr : array<auto(TT)>) : Option<TT -const -#> {
//! Compat-mode fallback: returns ``some(arr[0])`` if non-empty, ``none`` otherwise.
//! Inside `_sql(...)` the macro intercepts this call before evaluation
//! and emits ` LIMIT 1` with the OneOpt materializer.
if (length(arr) > 0) {
if (!empty(arr)) {
return some(arr[0])
}
return none(type<TT -const -#>)
Expand Down
4 changes: 3 additions & 1 deletion skills/mouse.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,8 @@ Read this BEFORE asking, adding, or curating mouse cards. The hard rule (MOUSE F
|---|---|
| About to research a "how do I X?" / "what's the pattern for Y?" / "why does Z behave this way?" question | `mouse__ask` first |
| Just answered such a question through your own research | `mouse__add` before moving on |
| Wrap-up after a meaningful chunk of work | See `skills/task_wrap_up.md` (review `mouse log --misses` + un-asked questions) |
| `mouse__ask` returned cards but none of them address what you actually asked | `mouse__bad(queryId)` immediately, then research and `mouse__add` if the answer's worth caching |
| Wrap-up after a meaningful chunk of work | See `skills/task_wrap_up.md` (review `mouse log --misses` for zero-result asks, `mouse log --review` for unrated hits) |
| Symbol/field/type lookup ("where is X defined?", "all references to Y") | NOT mouse — use daslang MCP (`find_symbol`, `grep_usage`, `find_references`) |
| Categorical convention (gen2 syntax, build flags, formatting) | NOT mouse — `CLAUDE.md` / `skills/*.md` |
| Project state (in-progress branches, who's doing what) | NOT mouse — `git log` / memory |
Expand All @@ -23,6 +24,7 @@ The mouse MCP server is deferred — the call dance is `ToolSearch select:mcp__m
- **Free-form natural language is fine.** The retriever ORs words and ranks by BM25 + Jaccard title-similarity. Don't write FTS5 syntax unless you specifically need phrase matching (then pass `rawQuery=true`).
- **One question per call.** Three sub-questions → three asks. They hit different cards; compressing them into one query dilutes BM25 scoring.
- **Plan-mode sweep.** During planning, ask the mouse early and often — design questions, prior-art questions, gotcha-recall, trade-off recall. Each cached answer saves a research detour; each cache miss is one `mouse__add` away from being free next time.
- **Mark no-match (false-positive hits).** The response begins with `query_id: N`. If you scan the returned cards and **none** of them address the question — BM25 matched on shared tokens but the corpus has no real answer yet — call `mouse__bad(queryId=N)` before moving on. That converts a false-positive hit into the same actionable signal as `match_count = 0`. Skip when you're unsure whether a card kind-of helped — only the clear-negative signal carries information; we never mark hits as good (default-positive bias would make `useful=true` ratings noise).

## Adding

Expand Down
10 changes: 9 additions & 1 deletion skills/task_wrap_up.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,18 @@ bin/daslang utils/mouse/main.das -- log --misses

(On Windows MSVC layout the binary is `bin/Release/daslang.exe` — same args after.)

For each recent miss:
For each recent miss (`match_count = 0`):
- **Did this session answer it?** If yes — `mouse__add` (or `mouse add` from CLI). Next session won't redo the work.
- **Did you _almost_ ask mouse this session but didn't?** Try asking now — misses-you-skipped don't show up in `--misses`. If the work you just did has the answer, add it.

```bash
bin/daslang utils/mouse/main.das -- log --review
```

The `--review` queue is hits the agent didn't rate at ask time: `match_count > 0 AND useful IS NULL`. Some are real false positives — BM25 matched on shared tokens but none of the returned cards actually answered the question. For each row, scan the listed top slug against the question:
- **Did the top slug actually address the question?** If clearly yes — leave it (implicit positive). If clearly no — call `mouse__bad` with the row's id (CLI: `mouse bad <id>`). That row joins the `--bad` queue. If this session has the real answer, also `mouse__add` so next session retrieves the right card instead of the false-positive one.
- **Unsure?** Skip — better to leave unrated than guess. Only the negative signal carries information; we never mark hits as good.

```bash
bin/daslang utils/mouse/main.das -- log
```
Expand Down
1 change: 1 addition & 0 deletions tests/json/safe.das
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ def safe_operators(t : T?) {

t |> equal((js?["a"] ?as _longint) ?? -1l, 1l)
t |> equal(js?.a ?? -1, 1)
t |> equal(js?.a ?? -1l, 1l) // JSON int (_longint) → int64 via ??
t |> equal(js?.b ?? false, true)
t |> equal(js?.c is _null, true)
t |> equal(js?.d ?? "", "str")
Expand Down
5 changes: 4 additions & 1 deletion utils/mouse/OVERVIEW.md
Original file line number Diff line number Diff line change
Expand Up @@ -55,10 +55,12 @@ Frontmatter fields: `slug` (stable ID, used for cross-refs), `title` (1-line des

| Operation | CLI | MCP tool | Notes |
|---|---|---|---|
| Retrieve | `mouse ask "<q>"` | `mouse__ask` | Top-K BM25 ranked, each annotated with a Jaccard title-similarity. Words OR-joined; `--raw-query` / `rawQuery=true` passes raw FTS5 syntax (phrases, NEAR, explicit AND/OR). |
| Retrieve | `mouse ask "<q>"` | `mouse__ask` | Top-K BM25 ranked, each annotated with a Jaccard title-similarity. Words OR-joined; `--raw-query` / `rawQuery=true` passes raw FTS5 syntax (phrases, NEAR, explicit AND/OR). Response begins with `query_id:` — capture for `mouse__bad`. |
| Add Q&A | `mouse add "<q>" --body "..."` | `mouse__add` | Advisory similar list always; hard-blocks only on Jaccard ≥ 0.7. `--force` / `force=true` overrides the block. |
| Get doc | `mouse get <slug>` | `mouse__get` | Body + frontmatter + reverse-link footer. |
| Rebuild | `mouse rebuild` | `mouse__rebuild` | Force full rescan + signature reset. Normally not needed — every entry point auto-reindexes via the git-staleness check. |
| Mark no-match | `mouse bad <id>` | `mouse__bad` | Flags a previous ask as a false-positive hit (BM25 returned cards but none addressed the question). Sets `query_log.useful = 0`. Idempotent. We never write `useful = 1` — implicit positive avoids sycophancy noise. |
| Recent log | `mouse log [--misses\|--bad\|--review]` | `mouse__log` | Browse the query log. `mode=misses` → zero-result asks (the existing add-candidate queue). `mode=bad` → already-marked false positives. `mode=review` → `match_count > 0 AND useful IS NULL` (the wrap-up rating queue). |
| Serve MCP | `mouse serve` | (this _is_ the server) | stdio JSON-RPC. |

**Dupe-on-add gate.** `add` always runs a Jaccard-scored similarity check against the corpus and surfaces the top matches (whether it created or not). With `force=false` (default), it hard-blocks only when the top match scores ≥ 0.7 — a near-paraphrase. Below that threshold, the add proceeds and the similar list is shown for awareness. The caller (LLM or human) is the actual decider; the threshold just stops obvious near-paraphrases from sneaking in. Below 0.5 nothing is surfaced unless content overlap is genuine.
Expand All @@ -79,6 +81,7 @@ The SQLite schema (managed via `[sql_migration]` from `sqlite/sqlite_migrate`):
- `links` — composite-PK pair `(from_slug, to_slug)` for cross-refs.
- `search_idx` — FTS5 virtual table; per-doc concatenation of title + question aliases + body. BM25 ranks via the `@sql_fts_rank` column.
- `index_meta` — `(key, value)` k/v table. Currently stores the staleness signature; future-proof for other persistent metadata.
- `query_log` — append-only log of every ask: `id` (PK), `asked_at`, `question`, `match_count`, `top_slug`, `source` (`cli` / `mcp`), `useful` (nullable: `NULL` = unrated, `0` = caller marked the hit irrelevant). Survives `rebuild` (which only wipes the doc cache). Two signals into curation: `match_count = 0` rows are the canonical miss-candidate queue (`mouse log --misses`); `match_count > 0 AND useful IS NULL` is the rating queue for retrospective review during wrap-up (`mouse log --review`).

Rebuild is whole-corpus delete+repopulate — simple, correct, fast for small corpora. Incremental update (re-index only changed `body_hash`) is a vNext optimization once the corpus is large enough that whole-rebuild matters.

Expand Down
57 changes: 51 additions & 6 deletions utils/mouse/index.das
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,10 @@ struct SearchRow {

// Append-only log of every ask. Survives rebuild() (which only wipes the
// doc cache). The miss list is the input signal for `mouse__add` —
// "questions asked but no doc covers them yet".
// "questions asked but no doc covers them yet". `useful` captures the
// false-positive case: hit rows the caller marked as no-match (BM25
// returned cards but none addressed the question). NULL = unrated;
// 0 = no-match. We never write 1 — implicit positive avoids sycophancy.
[sql_table(name = "query_log")]
struct QueryLog {
@sql_primary_key id : int64
Expand All @@ -95,6 +98,7 @@ struct QueryLog {
match_count : int
top_slug : string
source : string // "cli" | "mcp"
@safe_when_uninitialized useful : Option<int>
}

[sql_migration(version = 1, description = "create docs/links/search_idx")]
Expand All @@ -106,7 +110,13 @@ def migration_001(db : SqlRunner) {

[sql_migration(version = 2, description = "add query_log")]
def migration_002(db : SqlRunner) {
db |> create_table(type<QueryLog>)
// Raw SQL pinned to the v2-era schema. Originally this was
// `db |> create_table(type<QueryLog>)`, but `create_table` regenerates
// DDL from the current struct on every fresh-DB migration — so as soon
// as a later migration evolves QueryLog (e.g. v4 adds `useful`), v2
// would create the column too, and v4's add_column would duplicate.
// Pinning v2's DDL here decouples migration history from struct shape.
db |> exec("CREATE TABLE query_log (id INTEGER PRIMARY KEY, asked_at TEXT NOT NULL, question TEXT NOT NULL, match_count INTEGER NOT NULL, top_slug TEXT NOT NULL, source TEXT NOT NULL)")
}

// Persisted across cold opens: the staleness signature used by
Expand All @@ -123,6 +133,11 @@ def migration_003(db : SqlRunner) {
db |> create_table(type<IndexMeta>)
}

[sql_migration(version = 4, description = "add useful flag to query_log")]
def migration_004(db : SqlRunner) {
db |> add_column(type<QueryLog>, "useful")
}

def with_index_db(root : string; blk : block<(db : SqlRunner) : void>) {
ensure_root(root)
with_latest_sqlite(db_path(root)) $(db) {
Expand Down Expand Up @@ -451,24 +466,54 @@ def now_iso(db : SqlRunner) : string {
}

def log_query(db : SqlRunner; question : string;
hits : array<SearchHit>; source : string) {
hits : array<SearchHit>; source : string) : int64 {
let top = !empty(hits) ? hits[0].slug : ""
db |> insert(QueryLog(
asked_at = now_iso(db),
question = question,
match_count = length(hits),
top_slug = top,
source = source))
source = source,
useful = none(type<int>)))
return db |> last_insert_rowid()
}

// Mark a query_log row as no-match (useful = 0). Returns rows affected
// (0 if id doesn't exist; 1 on success). Idempotent — re-marking is a
// no-op SET to the same value.
def mark_no_match(db : SqlRunner; id : int64) : int {
return db |> _sql_update(type<QueryLog>, _.id == id, (useful = some(0)))
}

def recent_queries(db : SqlRunner; n : int; misses_only : bool) : array<QueryLog> {
if (misses_only) {
enum LogMode {
All //!< every row (newest first)
Misses //!< match_count == 0 (FTS5 returned nothing)
Bad //!< useful == 0 (caller marked the hit irrelevant)
Review //!< match_count > 0 AND useful IS NULL (queue for retrospective rating)
}

def recent_queries(db : SqlRunner; n : int; mode : LogMode) : array<QueryLog> {
if (mode == LogMode.Misses) {
var rows <- _sql(db |> select_from(type<QueryLog>)
|> _where(_.match_count == 0)
|> _order_by_descending(_.id)
|> take(n))
return <- rows
}
if (mode == LogMode.Bad) {
var rows <- _sql(db |> select_from(type<QueryLog>)
|> _where(_.useful |> unwrap_or(-1) == 0)
|> _order_by_descending(_.id)
|> take(n))
return <- rows
}
if (mode == LogMode.Review) {
var rows <- _sql(db |> select_from(type<QueryLog>)
|> _where(_.match_count > 0 && _.useful |> is_none)
|> _order_by_descending(_.id)
|> take(n))
return <- rows
}
var rows <- _sql(db |> select_from(type<QueryLog>)
|> _order_by_descending(_.id)
|> take(n))
Expand Down
Loading
Loading