diff --git a/README.md b/README.md
index 22b9ba1..0ceafdb 100644
--- a/README.md
+++ b/README.md
@@ -84,12 +84,13 @@ The web UI is a static Preact app served by `mind-map serve` over HTTP. It uses
Both modes use the same wiki engine and the same wiki directory (`~/.mind-map/wiki` by default). Multiple stdio processes can safely share the same wiki via SQLite page locking.
-## MCP Tools (10 total)
+## MCP Tools (11 total)
| Tool | Description |
|------|-------------|
| `search_pages` | Full-text search across page titles and content (SQLite FTS5) |
-| `get_wiki_context` | Wiki overview: page count, top-level directories, recent pages |
+| `get_wiki_digest` | Per-conversation orientation: page count, word/phrase cloud, active-use recents LRU, per-area counts, ~4 KB rendered markdown. Call this at the start of every new conversation. |
+| `get_wiki_context` | Wiki overview: page count, top-level directories, recent pages (mtime-sorted). Also returns the digest fields for new clients. |
| `get_page` | Read a page with parsed frontmatter, body, outgoing links, and backlinks |
| `create_page` | Create a new page (markdown with optional YAML frontmatter) |
| `update_page` | Update an existing page's content |
@@ -102,6 +103,7 @@ Both modes use the same wiki engine and the same wiki directory (`~/.mind-map/wi
## Wiki Features
+- **Per-conversation digest**: a compact orientation blob (cloud of top terms, recents LRU, area counts, rendered markdown) for LLMs to consume at conversation start. Always-current; background job rebuilds every 5 minutes; persisted to SQLite across restarts.
- **YAML frontmatter**: structured metadata on every page (`title`, `type`, `status`, custom fields)
- **Wikilinks**: `[[target]]` and `[[display|target]]` syntax, resolved to clickable links
- **Backlink index**: every page knows what links to it
diff --git a/SKILL.md b/SKILL.md
index 97ac592..5845ab2 100644
--- a/SKILL.md
+++ b/SKILL.md
@@ -4,6 +4,7 @@ description: A wiki for AI agents and humans -- search, read, and write markdown
tools:
- search_pages
- get_wiki_context
+ - get_wiki_digest
- get_page
- create_page
- update_page
@@ -47,12 +48,29 @@ Use mind-map as your **persistent memory**:
## Getting Oriented
-**Always start by understanding what's already in the wiki:**
+**Always start a new conversation with the digest:**
+```
+get_wiki_digest()
+→ returns a compact markdown blob: page count, top word/phrase cloud
+ (what this wiki is about), pages you or other agents recently
+ touched (intent, not file-mtime), and per-area page counts.
+ ~4 KB cap, ~1K tokens — designed to fit any context budget.
+```
+
+The digest is always-current: a background job rebuilds the cloud
+every few minutes and the recents LRU updates on every page op.
+Persisted to SQLite so a fresh server restart already has signal.
+
+If you need the legacy mtime-sorted "recently modified pages" list
+or the filesystem-derived top-level directory list, call:
```
get_wiki_context()
-→ returns page count, top-level directories, and 20 most recently modified pages
+→ same shape as before, plus the digest fields layered on for free.
```
+New clients should prefer `get_wiki_digest`; `get_wiki_context`
+remains for backwards compatibility.
+
## Searching
```
diff --git a/cmd/mind-map/main.go b/cmd/mind-map/main.go
index 0090799..a0234c6 100644
--- a/cmd/mind-map/main.go
+++ b/cmd/mind-map/main.go
@@ -12,6 +12,7 @@ import (
"time"
"github.com/aniongithub/mind-map/internal/config"
+ "github.com/aniongithub/mind-map/internal/digest"
"github.com/aniongithub/mind-map/internal/httpapi"
"github.com/aniongithub/mind-map/internal/logging"
mindmcp "github.com/aniongithub/mind-map/internal/mcp"
@@ -87,17 +88,57 @@ func init() {
func runStdio(cmd *cobra.Command, args []string) error {
dir, _ := cmd.Flags().GetString("dir")
- w, err := wiki.Open(dir)
+ cfgPath := config.DefaultPath()
+ cfg, err := config.Load(cfgPath)
+ if err != nil {
+ slog.Warn("failed to load config, using defaults", slog.Any("error", err))
+ cfg = config.DefaultConfig()
+ }
+
+ w, err := wiki.Open(dir, wiki.WithOptions(wikiOptionsFromConfig(cfg)))
if err != nil {
return fmt.Errorf("open wiki: %w", err)
}
defer w.Close()
+ // Spin up the digest's background maintenance (cloud rebuild +
+ // recents flush) for the duration of the stdio session. Stop
+ // before Close so a mid-rebuild ticker doesn't race the DB
+ // shutdown.
+ dm := digest.NewManager(w, digestOptionsFromConfig(cfg))
+ dm.Start(cmd.Context())
+ defer dm.Stop()
+
s := mindmcp.NewServer(w, nil, getVersion())
slog.Info("mind-map MCP server starting", slog.String("mode", "stdio"), slog.String("wiki", w.Root()))
return s.MCPServer().Run(cmd.Context(), &mcpsdk.StdioTransport{})
}
+// wikiOptionsFromConfig maps the digest section of config.Config to
+// the construction-time knobs the Wiki cares about (recents capacity,
+// render cap, stopword extras). Zero/missing values keep the Wiki's
+// own defaults — DigestConfig is documented as fully optional.
+func wikiOptionsFromConfig(cfg *config.Config) wiki.Options {
+ d := cfg.Digest
+ return wiki.Options{
+ RecentsSize: d.RecentsSize,
+ MaxRenderBytes: d.MaxRenderBytes,
+ StopwordsExtra: d.StopwordsExtra,
+ }
+}
+
+// digestOptionsFromConfig maps the digest section to the runtime
+// (ticker / rebuild) knobs the digest.Manager cares about. Same
+// "zero means default" contract.
+func digestOptionsFromConfig(cfg *config.Config) digest.Options {
+ d := cfg.Digest
+ return digest.Options{
+ CloudRefresh: d.ParseCloudRefresh(),
+ CloudSize: d.CloudSize,
+ StopwordsExtra: d.StopwordsExtra,
+ }
+}
+
func runServe(cmd *cobra.Command, args []string) error {
dir, _ := cmd.Flags().GetString("dir")
logFile, _ := cmd.Flags().GetString("log-file")
@@ -145,12 +186,6 @@ func runServe(cmd *cobra.Command, args []string) error {
// runHTTPServer wires the HTTP handler from internal/httpapi and serves it.
// Shared by the interactive `serve` command and the system service.
func runHTTPServer(addr, dir, webuiDir string, idleTimeout time.Duration, stopCh chan struct{}) error {
- w, err := wiki.Open(dir)
- if err != nil {
- return fmt.Errorf("open wiki: %w", err)
- }
- defer w.Close()
-
cfgPath := config.DefaultPath()
cfg, err := config.Load(cfgPath)
if err != nil {
@@ -158,6 +193,32 @@ func runHTTPServer(addr, dir, webuiDir string, idleTimeout time.Duration, stopCh
cfg = config.DefaultConfig()
}
+ w, err := wiki.Open(dir, wiki.WithOptions(wikiOptionsFromConfig(cfg)))
+ if err != nil {
+ return fmt.Errorf("open wiki: %w", err)
+ }
+ defer w.Close()
+
+ // Background digest maintenance runs for the lifetime of the
+ // HTTP server. We use a context derived from stopCh so that the
+ // graceful /api/restart path (which closes stopCh) also stops
+ // the tickers cleanly. Stopping before Close ensures the LRU
+ // flush in the manager's final tick doesn't race with db.Close.
+ dctx, dcancel := context.WithCancel(context.Background())
+ defer dcancel()
+ go func() {
+ select {
+ case <-stopCh:
+ dcancel()
+ case <-dctx.Done():
+ // Normal function return; the defer above cancelled us.
+ return
+ }
+ }()
+ dm := digest.NewManager(w, digestOptionsFromConfig(cfg))
+ dm.Start(dctx)
+ defer dm.Stop()
+
handler := httpapi.New(httpapi.Deps{
Wiki: w,
CfgPath: cfgPath,
diff --git a/internal/config/config.go b/internal/config/config.go
index 0e447a4..84dc10f 100644
--- a/internal/config/config.go
+++ b/internal/config/config.go
@@ -120,9 +120,55 @@ func (s *SyncConfig) Remotes() []string {
return remotes
}
+// DigestConfig holds tunables for the per-conversation orientation
+// digest (cloud rebuild, recents LRU, render cap, stopword extras).
+// All fields are optional; zero or invalid values fall back to the
+// built-in defaults. Documented in detail in mind-map/plans/digest.
+type DigestConfig struct {
+ // CloudSize caps the top-K terms surfaced in the word cloud.
+ // Default 50. Tunable up if your wiki is large enough that 50
+ // terms feels too sparse; down if context budget is tight.
+ CloudSize int `json:"cloud_size,omitempty"`
+
+ // RecentsSize caps the active-use LRU ring. Default 20. Applied
+ // at wiki Open; live changes via /api/settings take effect after
+ // the next server restart.
+ RecentsSize int `json:"recents_size,omitempty"`
+
+ // CloudRefresh controls how often the cloud rebuilds. Default 5m.
+ // Accepts any time.ParseDuration value; values below 30 seconds
+ // are clamped up so a busy wiki doesn't burn CPU.
+ CloudRefresh string `json:"cloud_refresh,omitempty"`
+
+ // StopwordsExtra extends the built-in English stopword list.
+ // Words are case-folded on load. Useful for domain-specific
+ // noise like "TODO" or "FIXME".
+ StopwordsExtra []string `json:"stopwords_extra,omitempty"`
+
+ // MaxRenderBytes caps the rendered markdown blob. Default 4096
+ // (~1K tokens for most LLMs). Trim discipline when over: drop
+ // recents, then cloud, never areas/header/footer.
+ MaxRenderBytes int `json:"max_render_bytes,omitempty"`
+}
+
+// ParseCloudRefresh returns the cloud rebuild interval. Returns the
+// default (5m) if empty or invalid. Floor at 30 seconds — anything
+// faster is wasted CPU for a signal nobody reads that often.
+func (d *DigestConfig) ParseCloudRefresh() time.Duration {
+ if d.CloudRefresh == "" {
+ return 5 * time.Minute
+ }
+ v, err := time.ParseDuration(d.CloudRefresh)
+ if err != nil || v < 30*time.Second {
+ return 5 * time.Minute
+ }
+ return v
+}
+
// Config holds all runtime settings.
type Config struct {
- Sync SyncConfig `json:"sync"`
+ Sync SyncConfig `json:"sync"`
+ Digest DigestConfig `json:"digest,omitempty"`
}
// DefaultConfig returns a Config with sensible defaults.
diff --git a/internal/config/config_test.go b/internal/config/config_test.go
index 0af2e6a..bf7acb6 100644
--- a/internal/config/config_test.go
+++ b/internal/config/config_test.go
@@ -172,3 +172,81 @@ func TestSaveAndLoad(t *testing.T) {
t.Errorf("loaded mapping prefix = %q", loaded.Sync.Mappings[0].Prefix)
}
}
+
+func TestParseCloudRefresh(t *testing.T) {
+ tests := []struct {
+ input string
+ want time.Duration
+ }{
+ {"5m", 5 * time.Minute},
+ {"10m", 10 * time.Minute},
+ {"1h", 1 * time.Hour},
+ // Floor: anything < 30s clamps to the default to protect a
+ // busy wiki from CPU churn.
+ {"1s", 5 * time.Minute},
+ {"", 5 * time.Minute}, // empty → default
+ {"junk", 5 * time.Minute}, // invalid → default
+ }
+ for _, tc := range tests {
+ d := DigestConfig{CloudRefresh: tc.input}
+ if got := d.ParseCloudRefresh(); got != tc.want {
+ t.Errorf("ParseCloudRefresh(%q) = %v, want %v", tc.input, got, tc.want)
+ }
+ }
+}
+
+func TestDigestConfig_RoundtripJSON(t *testing.T) {
+ dir := t.TempDir()
+ path := filepath.Join(dir, "config.json")
+
+ cfg := DefaultConfig()
+ cfg.Digest.CloudSize = 75
+ cfg.Digest.RecentsSize = 30
+ cfg.Digest.CloudRefresh = "10m"
+ cfg.Digest.StopwordsExtra = []string{"TODO", "FIXME"}
+ cfg.Digest.MaxRenderBytes = 8192
+
+ if err := Save(path, cfg); err != nil {
+ t.Fatalf("Save: %v", err)
+ }
+ loaded, err := Load(path)
+ if err != nil {
+ t.Fatalf("Load: %v", err)
+ }
+ if loaded.Digest.CloudSize != 75 {
+ t.Errorf("CloudSize = %d, want 75", loaded.Digest.CloudSize)
+ }
+ if loaded.Digest.RecentsSize != 30 {
+ t.Errorf("RecentsSize = %d, want 30", loaded.Digest.RecentsSize)
+ }
+ if loaded.Digest.ParseCloudRefresh() != 10*time.Minute {
+ t.Errorf("CloudRefresh = %v, want 10m", loaded.Digest.ParseCloudRefresh())
+ }
+ if len(loaded.Digest.StopwordsExtra) != 2 || loaded.Digest.StopwordsExtra[0] != "TODO" {
+ t.Errorf("StopwordsExtra = %v", loaded.Digest.StopwordsExtra)
+ }
+ if loaded.Digest.MaxRenderBytes != 8192 {
+ t.Errorf("MaxRenderBytes = %d, want 8192", loaded.Digest.MaxRenderBytes)
+ }
+}
+
+func TestDigestConfig_BackwardsCompatible(t *testing.T) {
+ // A config file written before the digest section existed must
+ // still load without errors and yield zero-valued digest fields
+ // (which the consumers treat as "use defaults").
+ dir := t.TempDir()
+ path := filepath.Join(dir, "config.json")
+ if err := os.WriteFile(path, []byte(`{"sync":{"enabled":false,"interval":"30s"}}`), 0o600); err != nil {
+ t.Fatalf("write legacy config: %v", err)
+ }
+ loaded, err := Load(path)
+ if err != nil {
+ t.Fatalf("Load legacy config: %v", err)
+ }
+ if loaded.Digest.CloudSize != 0 {
+ t.Errorf("expected zero CloudSize on legacy config, got %d", loaded.Digest.CloudSize)
+ }
+ if loaded.Digest.ParseCloudRefresh() != 5*time.Minute {
+ t.Errorf("expected default 5m on legacy config, got %v", loaded.Digest.ParseCloudRefresh())
+ }
+}
diff --git a/internal/digest/manager.go b/internal/digest/manager.go
new file mode 100644
index 0000000..9282464
--- /dev/null
+++ b/internal/digest/manager.go
@@ -0,0 +1,203 @@
+// Package digest runs the background maintenance for a wiki's per-
+// conversation orientation digest: a periodic rebuild of the word/
+// phrase cloud and a periodic flush of the active-use recents LRU
+// to SQLite.
+//
+// The package mirrors internal/sync in shape: a Manager constructed
+// over a *wiki.Wiki, with Start(ctx) / Stop() lifecycle that the
+// embedder (cmd/mind-map, internal/httpapi) supervises. Keeping the
+// tickers out of the Wiki itself preserves the same separation sync
+// already established — the storage engine has no goroutines of its
+// own; lifecycle is the embedder's concern.
+package digest
+
+import (
+ "context"
+ "log/slog"
+ "sync"
+ "time"
+
+ "github.com/aniongithub/mind-map/internal/wiki"
+)
+
+// Default tick intervals match the plan. Config-driven overrides
+// land in Step 7; until then these are the only knobs and they're
+// reasonable for any wiki size below the millions of pages.
+const (
+ defaultCloudRefresh = 5 * time.Minute
+ defaultRecentsRefresh = 30 * time.Second
+
+ // defaultCloudSize matches the plan's cloud_size default. The
+ // top-K selection is the only knob that materially affects the
+ // rendered digest's word density; everything else is plumbing.
+ defaultCloudSize = 50
+)
+
+// Manager runs the two background tickers (cloud rebuild + recents
+// flush) for a single wiki. Construct one with NewManager, hand its
+// Start a context tied to the process lifetime, and call Stop before
+// closing the wiki — closing the wiki out from under a mid-rebuild
+// ticker is a `sql: database is closed` race waiting to happen.
+//
+// Safe for concurrent Start/Stop (idempotent via sync.Once); a single
+// Manager is one-shot — once Stop has been called, the Manager cannot
+// be Started again. Construct a fresh one if you need a restart.
+type Manager struct {
+ w *wiki.Wiki
+
+ cloudRefresh time.Duration
+ recentsRefresh time.Duration
+ cloudSize int
+ stopwordsExtra []string
+
+ startOnce sync.Once
+ stopOnce sync.Once
+ cancel context.CancelFunc
+ done chan struct{}
+}
+
+// Options tunes Manager behavior. Zero-value Options uses the
+// package defaults (5m cloud rebuild, 30s recents flush, top-50
+// cloud terms). Step 7 will wire these through config.json.
+type Options struct {
+ CloudRefresh time.Duration
+ RecentsRefresh time.Duration
+ CloudSize int
+ // StopwordsExtra appends to the built-in English stopword list.
+ // Mirrors plan's digest.stopwords_extra config knob.
+ StopwordsExtra []string
+}
+
+// NewManager constructs an unstarted Manager. Pass zero Options for
+// defaults.
+func NewManager(w *wiki.Wiki, opts Options) *Manager {
+ if opts.CloudRefresh <= 0 {
+ opts.CloudRefresh = defaultCloudRefresh
+ }
+ if opts.RecentsRefresh <= 0 {
+ opts.RecentsRefresh = defaultRecentsRefresh
+ }
+ if opts.CloudSize <= 0 {
+ opts.CloudSize = defaultCloudSize
+ }
+ return &Manager{
+ w: w,
+ cloudRefresh: opts.CloudRefresh,
+ recentsRefresh: opts.RecentsRefresh,
+ cloudSize: opts.CloudSize,
+ stopwordsExtra: opts.StopwordsExtra,
+ }
+}
+
+// Start kicks off the two tickers. Idempotent: a second call is a
+// no-op. Returns immediately after spawning goroutines; use Stop to
+// wait for clean shutdown.
+//
+// The cloud is rebuilt synchronously once before the goroutine loop
+// starts so a freshly-opened wiki has cloud terms in its digest
+// without a 5-minute warm-up. On cold start over a 1k-page wiki this
+// takes < 100ms; we accept that latency on Start so the first
+// post-open digest read is useful.
+func (m *Manager) Start(ctx context.Context) {
+ m.startOnce.Do(func() {
+ ctx, m.cancel = context.WithCancel(ctx)
+ m.done = make(chan struct{})
+
+ // Synchronous first build so cold-start digests have an
+ // About: line. We deliberately don't gate on whether a
+ // persisted cloud was loaded: even if it was, the on-disk
+ // content may have shifted while the server was off, and
+ // the cost is small. A failure here logs and continues —
+ // the tickers below will retry.
+ m.rebuildCloud(ctx)
+
+ go m.run(ctx)
+ slog.Info("digest manager started",
+ slog.Duration("cloud_refresh", m.cloudRefresh),
+ slog.Duration("recents_refresh", m.recentsRefresh),
+ slog.Int("cloud_size", m.cloudSize),
+ )
+ })
+}
+
+// Stop cancels the tickers and blocks until the loop goroutine has
+// exited. Idempotent. Safe to call after Start, after another Stop,
+// or even without ever calling Start (in which case it returns
+// immediately).
+//
+// A final recents flush runs as the loop exits so the last few touches
+// between ticker fires aren't lost on shutdown. The Wiki's own Close()
+// also calls persistRecents — both paths converge on the same row,
+// and the SQLite write is atomic, so the redundancy is harmless.
+func (m *Manager) Stop() {
+ m.stopOnce.Do(func() {
+ if m.cancel == nil {
+ return // Stop without Start: nothing to do.
+ }
+ m.cancel()
+ <-m.done
+ slog.Info("digest manager stopped")
+ })
+}
+
+// run is the goroutine that drives both tickers. The cloud rebuild
+// is much heavier than the recents flush, but both are well below the
+// 30s recents tick on any reasonable wiki size, so a shared goroutine
+// with two tickers is simpler than two goroutines and adequately
+// non-blocking for the workload.
+func (m *Manager) run(ctx context.Context) {
+ defer close(m.done)
+
+ cloudTick := time.NewTicker(m.cloudRefresh)
+ defer cloudTick.Stop()
+ recentsTick := time.NewTicker(m.recentsRefresh)
+ defer recentsTick.Stop()
+
+ for {
+ select {
+ case <-ctx.Done():
+ // Final flush so we don't lose the last ~30s of
+ // touches. Use a detached background context: the
+ // loop's ctx is already cancelled, but the DB write
+ // itself should still get a chance to complete.
+ m.flushRecents(context.Background())
+ return
+ case <-cloudTick.C:
+ m.rebuildCloud(ctx)
+ case <-recentsTick.C:
+ m.flushRecents(ctx)
+ }
+ }
+}
+
+// rebuildCloud runs one cloud rebuild + persistence cycle. Failures
+// are logged and swallowed — the digest must degrade gracefully on
+// transient errors rather than crashing a long-running service.
+func (m *Manager) rebuildCloud(ctx context.Context) {
+ start := time.Now()
+ terms, err := m.w.BuildCloud(ctx, m.cloudSize, m.stopwordsExtra)
+ if err != nil {
+ slog.Warn("digest cloud rebuild failed", slog.Any("error", err))
+ return
+ }
+ m.w.SetCloud(terms)
+ if err := m.w.PersistCloud(ctx); err != nil {
+ slog.Warn("digest cloud persist failed", slog.Any("error", err))
+ }
+ slog.Info("digest cloud rebuilt",
+ slog.Int("terms", len(terms)),
+ slog.Duration("elapsed", time.Since(start)),
+ )
+}
+
+// flushRecents writes the LRU to wiki_state if it's been touched
+// since the last write. The dirty gate avoids gratuitous SQLite writes
+// on an idle server.
+func (m *Manager) flushRecents(ctx context.Context) {
+ if !m.w.RecentsDirty() {
+ return
+ }
+ if err := m.w.PersistRecents(ctx); err != nil {
+ slog.Warn("digest recents persist failed", slog.Any("error", err))
+ }
+}
diff --git a/internal/digest/manager_test.go b/internal/digest/manager_test.go
new file mode 100644
index 0000000..d0f2759
--- /dev/null
+++ b/internal/digest/manager_test.go
@@ -0,0 +1,139 @@
+package digest
+
+import (
+ "context"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+ "time"
+
+ "github.com/aniongithub/mind-map/internal/wiki"
+)
+
+// testWiki creates a temporary wiki with a few seed pages so the
+// cloud rebuild has something to count. Kept private to this test
+// file — the public wiki package has its own testWiki, but we can't
+// import test helpers across packages.
+func testWiki(t *testing.T) *wiki.Wiki {
+ t.Helper()
+ dir := t.TempDir()
+
+ pages := map[string]string{
+ "index.md": "# Home\n\nThis wiki is about mind-map, digest, and SQLite.\n",
+ "projects/mind-map.md": "# mind-map\n\nA wiki engine. SQLite-backed. Digest support.\n",
+ "notes/sqlite.md": "# SQLite\n\nSQLite is fast and embedded. mind-map uses SQLite.\n",
+ }
+ for name, content := range pages {
+ full := filepath.Join(dir, name)
+ if err := os.MkdirAll(filepath.Dir(full), 0o755); err != nil {
+ t.Fatalf("mkdir %s: %v", name, err)
+ }
+ if err := os.WriteFile(full, []byte(content), 0o644); err != nil {
+ t.Fatalf("seed %s: %v", name, err)
+ }
+ }
+
+ w, err := wiki.Open(dir)
+ if err != nil {
+ t.Fatalf("Open: %v", err)
+ }
+ t.Cleanup(func() { w.Close() })
+ return w
+}
+
+func TestManager_StartTriggersImmediateCloudRebuild(t *testing.T) {
+ w := testWiki(t)
+
+ m := NewManager(w, Options{
+ // Long tick so the ticker doesn't fire during the test —
+ // we want to assert the *synchronous* initial build only.
+ CloudRefresh: time.Hour,
+ RecentsRefresh: time.Hour,
+ })
+ m.Start(context.Background())
+ defer m.Stop()
+
+ // After Start, the cloud cache should be populated and the digest
+ // markdown should contain an About: line.
+ d, err := w.Digest(context.Background())
+ if err != nil {
+ t.Fatalf("Digest: %v", err)
+ }
+ if len(d.Cloud) == 0 {
+ t.Fatalf("cloud should be populated after Start, got empty")
+ }
+ if !strings.Contains(d.Markdown, "About:") {
+ t.Fatalf("digest missing About: line:\n%s", d.Markdown)
+ }
+}
+
+func TestManager_StopIsIdempotent(t *testing.T) {
+ w := testWiki(t)
+ m := NewManager(w, Options{CloudRefresh: time.Hour, RecentsRefresh: time.Hour})
+ m.Start(context.Background())
+
+ m.Stop()
+ m.Stop() // second Stop must not panic or block
+}
+
+func TestManager_StopWithoutStartIsNoOp(t *testing.T) {
+ w := testWiki(t)
+ m := NewManager(w, Options{})
+ m.Stop() // must not panic, must not hang
+}
+
+func TestManager_RecentsFlushOnTick(t *testing.T) {
+ w := testWiki(t)
+ ctx := context.Background()
+
+ m := NewManager(w, Options{
+ CloudRefresh: time.Hour,
+ RecentsRefresh: 50 * time.Millisecond,
+ })
+ m.Start(ctx)
+ defer m.Stop()
+
+ // Touch via a real Wiki op so dirty flips on.
+ if _, err := w.GetPage(ctx, "index"); err != nil {
+ t.Fatalf("GetPage: %v", err)
+ }
+ if !w.RecentsDirty() {
+ t.Fatalf("LRU should be dirty after GetPage")
+ }
+
+ // Wait for the ticker to fire and flush.
+ deadline := time.Now().Add(time.Second)
+ for time.Now().Before(deadline) {
+ if !w.RecentsDirty() {
+ return // success: ticker flushed and cleared dirty
+ }
+ time.Sleep(20 * time.Millisecond)
+ }
+ t.Fatalf("LRU still dirty after 1s; ticker did not flush")
+}
+
+func TestManager_StopFlushesRecents(t *testing.T) {
+ w := testWiki(t)
+ ctx := context.Background()
+
+ m := NewManager(w, Options{
+ // Long ticks so only the Stop-time flush can save us.
+ CloudRefresh: time.Hour,
+ RecentsRefresh: time.Hour,
+ })
+ m.Start(ctx)
+
+ if _, err := w.GetPage(ctx, "index"); err != nil {
+ t.Fatalf("GetPage: %v", err)
+ }
+ if !w.RecentsDirty() {
+ t.Fatalf("LRU should be dirty after touch")
+ }
+
+ m.Stop()
+
+ if w.RecentsDirty() {
+ t.Fatalf("Stop should have flushed dirty LRU; still dirty")
+ }
+}
diff --git a/internal/httpapi/server.go b/internal/httpapi/server.go
index cf96774..8b4874a 100644
--- a/internal/httpapi/server.go
+++ b/internal/httpapi/server.go
@@ -161,6 +161,7 @@ func (s *Server) shutdown() {
func (s *Server) register(mux *http.ServeMux) {
mux.HandleFunc("GET /api/version", s.getVersion)
mux.HandleFunc("GET /api/context", s.getContext)
+ mux.HandleFunc("GET /api/digest", s.getDigest)
mux.HandleFunc("GET /api/pages", s.listPages)
mux.HandleFunc("GET /api/pages/{path...}", s.getPage)
mux.HandleFunc("POST /api/pages", s.createPage)
@@ -306,6 +307,27 @@ func (s *Server) getContext(rw http.ResponseWriter, r *http.Request) {
writeJSON(rw, wctx)
}
+// getDigest handles GET /api/digest. Returns the full Digest struct
+// (page count, cloud terms, recents LRU, per-area summaries, rendered
+// markdown). Intended for two callers:
+//
+// - Agents / MCP clients that prefer the HTTP path over the MCP
+// tool (e.g. tests, scripts, or alternate clients).
+// - The WebUI, which can render its own widgets (e.g. a word-cloud
+// visualization) off the structured fields rather than parsing
+// the markdown.
+//
+// Cheap on cache hit, sub-millisecond on miss. Safe to call frequently
+// (e.g. WebUI polling); the in-memory digestCache absorbs the load.
+func (s *Server) getDigest(rw http.ResponseWriter, r *http.Request) {
+ d, err := s.deps.Wiki.Digest(r.Context())
+ if err != nil {
+ http.Error(rw, err.Error(), http.StatusInternalServerError)
+ return
+ }
+ writeJSON(rw, d)
+}
+
func (s *Server) listPages(rw http.ResponseWriter, r *http.Request) {
prefix := r.URL.Query().Get("prefix")
pages, err := s.deps.Wiki.ListPages(r.Context(), prefix)
diff --git a/internal/httpapi/server_test.go b/internal/httpapi/server_test.go
index bbee42e..c08f747 100644
--- a/internal/httpapi/server_test.go
+++ b/internal/httpapi/server_test.go
@@ -358,3 +358,50 @@ func TestReindexDetectsDirectFilesystemChanges(t *testing.T) {
t.Errorf("page still not indexed after reindex (got %d body=%s)", rec.Code, rec.Body.String())
}
}
+
+func TestGetDigest(t *testing.T) {
+ h := newTestServer(t)
+
+ // Seed a page so the digest has something to summarize.
+ rec := doJSON(t, h, "POST", "/api/pages", map[string]string{
+ "path": "topics/sqlite",
+ "content": "# SQLite\n\nSQLite is a fast embedded database.\n",
+ })
+ if rec.Code != 201 {
+ t.Fatalf("seed: %d %s", rec.Code, rec.Body.String())
+ }
+
+ rec = doJSON(t, h, "GET", "/api/digest", nil)
+ if rec.Code != 200 {
+ t.Fatalf("digest: %d %s", rec.Code, rec.Body.String())
+ }
+
+ var d wiki.Digest
+ if err := json.Unmarshal(rec.Body.Bytes(), &d); err != nil {
+ t.Fatalf("unmarshal: %v\n%s", err, rec.Body.String())
+ }
+
+ if d.PageCount < 1 {
+ t.Errorf("page count = %d, want >= 1", d.PageCount)
+ }
+ if d.Markdown == "" {
+ t.Errorf("markdown empty")
+ }
+ if !strings.Contains(d.Markdown, "This wiki contains") {
+ t.Errorf("markdown missing header sentence:\n%s", d.Markdown)
+ }
+ if len(d.Areas) == 0 {
+ t.Errorf("expected at least one area, got none")
+ }
+ // Recently active should include the page we just created
+ // (CreatePage touches the LRU).
+ found := false
+ for _, p := range d.Recents {
+ if p == "topics/sqlite" {
+ found = true
+ }
+ }
+ if !found {
+ t.Errorf("recents missing topics/sqlite: %v", d.Recents)
+ }
+}
diff --git a/internal/mcp/server.go b/internal/mcp/server.go
index a58839c..86d622f 100644
--- a/internal/mcp/server.go
+++ b/internal/mcp/server.go
@@ -61,9 +61,14 @@ func (s *Server) registerTools() {
mcp.AddTool(s.server, &mcp.Tool{
Name: "get_wiki_context",
- Description: "Get wiki orientation: page count, top-level directories, and 20 most recently modified pages.",
+ Description: "Get wiki orientation: page count, top-level directories, and 20 most recently modified pages. Also returns the digest (cloud_terms, recents LRU, per-area counts, rendered markdown) for new clients — older clients can ignore the extra fields.",
}, s.getWikiContext)
+ mcp.AddTool(s.server, &mcp.Tool{
+ Name: "get_wiki_digest",
+ Description: "Get a compact, always-current per-conversation orientation of this wiki. Returns: a rendered markdown blob (suitable to paste into context), a word/phrase cloud across all page bodies (what this wiki is about), an LRU of pages the user or agent has actively touched (intent, not file-mtime), and per-area page counts. Call this at the start of every new conversation. Cheaper and more deterministic than searching blindly; complements search_pages once you know what to look for.",
+ }, s.getWikiDigest)
+
mcp.AddTool(s.server, &mcp.Tool{
Name: "get_page",
Description: "Read a wiki page with parsed frontmatter, body, outgoing links, and backlinks.",
@@ -175,6 +180,24 @@ func (s *Server) getWikiContext(ctx context.Context, _ *mcp.CallToolRequest, _ a
return textResult(wctx)
}
+func (s *Server) getWikiDigest(ctx context.Context, _ *mcp.CallToolRequest, _ any) (*mcp.CallToolResult, any, error) {
+ start := time.Now()
+ d, err := s.wiki.Digest(ctx)
+ if err != nil {
+ slog.Error("tool.get_wiki_digest failed", slog.Any("error", err))
+ return nil, nil, err
+ }
+ slog.Info("tool.get_wiki_digest",
+ slog.Int("page_count", d.PageCount),
+ slog.Int("cloud_terms", len(d.Cloud)),
+ slog.Int("recents", len(d.Recents)),
+ slog.Int("areas", len(d.Areas)),
+ slog.Int("bytes", len(d.Markdown)),
+ slog.Duration("elapsed", time.Since(start)),
+ )
+ return textResult(d)
+}
+
func (s *Server) getPage(ctx context.Context, _ *mcp.CallToolRequest, input pagePathInput) (*mcp.CallToolResult, any, error) {
start := time.Now()
page, err := s.wiki.GetPage(ctx, input.Path)
diff --git a/internal/mcp/server_test.go b/internal/mcp/server_test.go
index dd30bc8..8bd38b0 100644
--- a/internal/mcp/server_test.go
+++ b/internal/mcp/server_test.go
@@ -176,6 +176,7 @@ func TestListTools(t *testing.T) {
expected := map[string]bool{
"search_pages": false,
"get_wiki_context": false,
+ "get_wiki_digest": false,
"get_page": false,
"create_page": false,
"update_page": false,
@@ -207,6 +208,40 @@ func TestGetWikiContext(t *testing.T) {
if ctx.PageCount != 4 {
t.Errorf("PageCount = %d, want 4", ctx.PageCount)
}
+ // New digest fields should be populated on the same response so
+ // existing get_wiki_context callers get the orientation upgrade
+ // for free (plan open question #4 — keep old shape, add fields).
+ if ctx.Markdown == "" {
+ t.Errorf("expected digest markdown to be populated on get_wiki_context")
+ }
+ if len(ctx.Areas) == 0 {
+ t.Errorf("expected areas to be populated on get_wiki_context")
+ }
+}
+
+func TestGetWikiDigest(t *testing.T) {
+ session := setupTestServer(t)
+ text := callTool(t, session, "get_wiki_digest", nil)
+
+ var d wiki.Digest
+ if err := json.Unmarshal([]byte(text), &d); err != nil {
+ t.Fatalf("unmarshal: %v\n%s", err, text)
+ }
+ if d.PageCount != 4 {
+ t.Errorf("PageCount = %d, want 4", d.PageCount)
+ }
+ if d.Markdown == "" {
+ t.Errorf("Markdown empty")
+ }
+ if !strings.Contains(d.Markdown, "This wiki contains") {
+ t.Errorf("markdown missing header sentence:\n%s", d.Markdown)
+ }
+ if !strings.Contains(d.Markdown, "## Areas") {
+ t.Errorf("markdown missing Areas section:\n%s", d.Markdown)
+ }
+ if len(d.Areas) == 0 {
+ t.Errorf("expected at least one area in structured output")
+ }
}
func TestGetPage(t *testing.T) {
diff --git a/internal/wiki/cloud.go b/internal/wiki/cloud.go
new file mode 100644
index 0000000..5fab2ba
--- /dev/null
+++ b/internal/wiki/cloud.go
@@ -0,0 +1,318 @@
+package wiki
+
+import (
+ "context"
+ "sort"
+ "strings"
+ "sync"
+ "unicode"
+)
+
+// CloudTerm is a single entry in the rendered word/phrase cloud.
+type CloudTerm struct {
+ Term string `json:"term"`
+ Count int `json:"count"`
+}
+
+// defaultStopwords is the built-in English stopword list applied to
+// every wiki's cloud. Users add domain-specific extras via config
+// (digest.stopwords_extra) which are merged on top.
+//
+// Kept intentionally conservative: only true function words and the
+// most generic English filler. Domain terms (even common ones like
+// "wiki" or "page") are left to the per-wiki frequency signal to
+// dampen — a wiki that's literally *about* wikis should be allowed
+// to say so.
+var defaultStopwords = map[string]struct{}{
+ "a": {}, "an": {}, "and": {}, "are": {}, "as": {}, "at": {},
+ "be": {}, "but": {}, "by": {}, "can": {},
+ "do": {}, "does": {}, "for": {}, "from": {},
+ "had": {}, "has": {}, "have": {}, "he": {}, "her": {}, "here": {},
+ "hers": {}, "him": {}, "his": {}, "how": {},
+ "i": {}, "if": {}, "in": {}, "into": {}, "is": {}, "it": {}, "its": {},
+ "just": {}, "may": {}, "might": {}, "must": {},
+ "no": {}, "not": {}, "now": {}, "of": {}, "off": {}, "on": {}, "one": {},
+ "only": {}, "or": {}, "other": {}, "our": {}, "ours": {}, "out": {},
+ "over": {}, "own": {},
+ "s": {}, "she": {}, "should": {}, "so": {}, "some": {}, "such": {},
+ "t": {}, "than": {}, "that": {}, "the": {}, "their": {}, "them": {},
+ "then": {}, "there": {}, "these": {}, "they": {}, "this": {}, "those": {},
+ "to": {}, "too": {},
+ "under": {}, "until": {}, "up": {}, "upon": {},
+ "was": {}, "we": {}, "were": {}, "what": {}, "when": {}, "where": {},
+ "which": {}, "while": {}, "who": {}, "whom": {}, "why": {}, "will": {},
+ "with": {}, "would": {},
+ "you": {}, "your": {}, "yours": {},
+}
+
+// cloudBuilder accumulates unigram and bigram counts across pages.
+// It is reset and re-run from scratch on each rebuild; the plan's
+// 5-minute ticker (Step 6) calls Build() and stores the result.
+type cloudBuilder struct {
+ stopwords map[string]struct{}
+}
+
+// newCloudBuilder constructs a builder with the default stopword set
+// merged with the user's extras. Extras are case-folded to match the
+// tokenizer's lowercase output.
+func newCloudBuilder(extra []string) *cloudBuilder {
+ sw := make(map[string]struct{}, len(defaultStopwords)+len(extra))
+ for k := range defaultStopwords {
+ sw[k] = struct{}{}
+ }
+ for _, w := range extra {
+ w = strings.ToLower(strings.TrimSpace(w))
+ if w != "" {
+ sw[w] = struct{}{}
+ }
+ }
+ return &cloudBuilder{stopwords: sw}
+}
+
+// isStopword reports whether t is filtered out of the cloud. In
+// addition to the configured stopword set, single-character tokens
+// and pure-numeric tokens are dropped: neither carries useful "about"
+// signal and both massively inflate the long tail.
+func (b *cloudBuilder) isStopword(t string) bool {
+ if len(t) < 2 {
+ return true
+ }
+ if _, ok := b.stopwords[t]; ok {
+ return true
+ }
+ allDigit := true
+ for _, r := range t {
+ if !unicode.IsDigit(r) {
+ allDigit = false
+ break
+ }
+ }
+ return allDigit
+}
+
+// tokenize splits a body into lowercase word tokens. The rules are
+// deliberately simple and deterministic:
+//
+// - Lowercase everything.
+// - A token is a maximal run of letters / digits / underscores /
+// hyphens. Hyphens and underscores are kept because identifiers
+// like "mind-map" or "page_count" are exactly the kinds of terms
+// we want to surface intact.
+// - Wikilink markup ([[...]]) is stripped but the target text
+// inside is tokenized normally — a link to [[projects/mind-map]]
+// contributes "projects" and "mind-map" to the page's tokens.
+// - Markdown punctuation (#, *, _, `, etc.) becomes a separator.
+// - Code fences and inline code are NOT stripped: code identifiers
+// are part of what a technical wiki is about, and dropping them
+// flattens the cloud.
+func (b *cloudBuilder) tokenize(body string) []string {
+ // Cheaply strip the wikilink delimiters so [[a/b]] surfaces both
+ // "a" and "b" without us having to special-case the parser. The
+ // pipe form [[display|target]] is left as-is; the tokenizer's
+ // non-alnum-split will handle both halves.
+ body = strings.ReplaceAll(body, "[[", " ")
+ body = strings.ReplaceAll(body, "]]", " ")
+
+ tokens := make([]string, 0, len(body)/6)
+ var cur strings.Builder
+ flush := func() {
+ if cur.Len() > 0 {
+ tokens = append(tokens, cur.String())
+ cur.Reset()
+ }
+ }
+ for _, r := range body {
+ switch {
+ case unicode.IsLetter(r) || unicode.IsDigit(r):
+ cur.WriteRune(unicode.ToLower(r))
+ case r == '-' || r == '_':
+ // Mid-token punctuation: keep only if it joins two
+ // alnum runs. Leading/trailing get trimmed below.
+ cur.WriteRune(r)
+ default:
+ flush()
+ }
+ }
+ flush()
+
+ // Trim leading/trailing hyphens and underscores (e.g. "--foo")
+ // that survived the above without splitting cleanly.
+ for i, t := range tokens {
+ tokens[i] = strings.Trim(t, "-_")
+ }
+ return tokens
+}
+
+// addPage folds one page's tokens into the running unigram and bigram
+// counts.
+//
+// Bigrams require BOTH ends to pass the stopword filter (plan open
+// question #2 lean): otherwise common phrases like "the wiki" would
+// dominate purely because "the" is high-frequency, even though the
+// pair is no more informative than "wiki" alone.
+func (b *cloudBuilder) addPage(body string, unigrams, bigrams map[string]int) {
+ tokens := b.tokenize(body)
+
+ var prev string
+ for _, t := range tokens {
+ if t == "" {
+ prev = ""
+ continue
+ }
+ stop := b.isStopword(t)
+ if !stop {
+ unigrams[t]++
+ }
+ if prev != "" && !stop && !b.isStopword(prev) {
+ bigrams[prev+" "+t]++
+ }
+ prev = t
+ }
+}
+
+// topK selects the K highest-count entries from the given map. Ties
+// break alphabetically so the output is stable across rebuilds —
+// otherwise a digest cache invalidation could shuffle the cloud for
+// no reason a user would understand.
+func topK(counts map[string]int, k int) []CloudTerm {
+ if k <= 0 || len(counts) == 0 {
+ return nil
+ }
+ terms := make([]CloudTerm, 0, len(counts))
+ for t, n := range counts {
+ terms = append(terms, CloudTerm{Term: t, Count: n})
+ }
+ sort.Slice(terms, func(i, j int) bool {
+ if terms[i].Count != terms[j].Count {
+ return terms[i].Count > terms[j].Count
+ }
+ return terms[i].Term < terms[j].Term
+ })
+ if len(terms) > k {
+ terms = terms[:k]
+ }
+ return terms
+}
+
+// buildCloud computes the top-K most frequent terms across all page
+// bodies. The result mixes unigrams and bigrams: bigrams are scored
+// by their own frequency (no boost), so a phrase only beats a single
+// word when it genuinely occurs more often.
+//
+// Caller owns the goroutine and the slot it's stored in; this function
+// just does the work. Step 6 wires it to the 5-minute ticker.
+// BuildCloud computes the top-K most frequent terms across all page
+// bodies. Exposed for the digest.Manager ticker — the implementation
+// lives on the Wiki because it reads `pages` directly; the supervisor
+// owns the scheduling.
+//
+// The result mixes unigrams and bigrams: bigrams are scored by their
+// own frequency (no boost), so a phrase only beats a single word when
+// it genuinely occurs more often.
+func (w *Wiki) BuildCloud(ctx context.Context, k int, stopwordsExtra []string) ([]CloudTerm, error) {
+ return w.buildCloud(ctx, k, stopwordsExtra)
+}
+
+// SetCloud installs a freshly-built cloud into the in-memory cache.
+// Pairs with BuildCloud; the supervisor calls Build → Set → Persist.
+func (w *Wiki) SetCloud(terms []CloudTerm) {
+ w.cloud.Set(terms)
+}
+
+// PersistCloud writes the current cloud cache to wiki_state. Called
+// by the digest.Manager after a successful rebuild.
+func (w *Wiki) PersistCloud(ctx context.Context) error {
+ return w.persistCloud(ctx)
+}
+
+func (w *Wiki) buildCloud(ctx context.Context, k int, stopwordsExtra []string) ([]CloudTerm, error) {
+ if err := ctx.Err(); err != nil {
+ return nil, err
+ }
+
+ rows, err := w.db.QueryContext(ctx, "SELECT body FROM pages")
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ b := newCloudBuilder(stopwordsExtra)
+ unigrams := make(map[string]int)
+ bigrams := make(map[string]int)
+
+ for rows.Next() {
+ if err := ctx.Err(); err != nil {
+ return nil, err
+ }
+ var body string
+ if err := rows.Scan(&body); err != nil {
+ continue
+ }
+ b.addPage(body, unigrams, bigrams)
+ }
+ if err := rows.Err(); err != nil {
+ return nil, err
+ }
+
+ // Merge the two count maps before selecting top-K. This lets a
+ // strong bigram outrank a weak unigram, and vice versa, on a
+ // single global scale.
+ merged := make(map[string]int, len(unigrams)+len(bigrams))
+ for t, n := range unigrams {
+ merged[t] = n
+ }
+ for t, n := range bigrams {
+ merged[t] = n
+ }
+ return topK(merged, k), nil
+}
+
+// cloudCache is a single-slot cache for the rebuilt cloud. The
+// 5-minute ticker (Step 6) calls Set; readers (digest renderer) call
+// Get. A read returns whatever was last set even if the ticker is
+// behind — the digest's job is "good orientation," not "perfectly
+// fresh stats."
+type cloudCache struct {
+ mu sync.RWMutex
+ terms []CloudTerm
+ // set is true once Set has been called at least once. Readers
+ // distinguish "no cloud yet" (cold start) from "cloud is empty"
+ // (truly empty wiki) by checking set.
+ set bool
+ // version is bumped on each Set. The digest cache uses it as a
+ // change signal so it can invalidate rendered output without
+ // re-comparing slices.
+ version uint64
+}
+
+func (c *cloudCache) Set(terms []CloudTerm) {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ // Defensive copy: caller may continue to mutate the slice.
+ cp := make([]CloudTerm, len(terms))
+ copy(cp, terms)
+ c.terms = cp
+ c.set = true
+ c.version++
+}
+
+// Get returns a copy of the current cloud and whether one has been
+// computed yet.
+func (c *cloudCache) Get() ([]CloudTerm, bool) {
+ c.mu.RLock()
+ defer c.mu.RUnlock()
+ if !c.set {
+ return nil, false
+ }
+ cp := make([]CloudTerm, len(c.terms))
+ copy(cp, c.terms)
+ return cp, true
+}
+
+// Version returns the monotonic change counter. Pairs with
+// recentsLRU.version() for digest cache invalidation.
+func (c *cloudCache) Version() uint64 {
+ c.mu.RLock()
+ defer c.mu.RUnlock()
+ return c.version
+}
diff --git a/internal/wiki/cloud_test.go b/internal/wiki/cloud_test.go
new file mode 100644
index 0000000..208ccae
--- /dev/null
+++ b/internal/wiki/cloud_test.go
@@ -0,0 +1,212 @@
+package wiki
+
+import (
+ "context"
+ "reflect"
+ "strings"
+ "testing"
+)
+
+func TestTokenize_Basic(t *testing.T) {
+ b := newCloudBuilder(nil)
+ got := b.tokenize("Hello, world! This is mind-map.")
+ want := []string{"hello", "world", "this", "is", "mind-map"}
+ if !reflect.DeepEqual(got, want) {
+ t.Fatalf("tokenize: got %v, want %v", got, want)
+ }
+}
+
+func TestTokenize_KeepsHyphensAndUnderscores(t *testing.T) {
+ b := newCloudBuilder(nil)
+ got := b.tokenize("page_count and mind-map are tokens")
+ if !contains(got, "page_count") {
+ t.Fatalf("expected page_count intact: %v", got)
+ }
+ if !contains(got, "mind-map") {
+ t.Fatalf("expected mind-map intact: %v", got)
+ }
+}
+
+func TestTokenize_StripsWikilinkBrackets(t *testing.T) {
+ b := newCloudBuilder(nil)
+ got := b.tokenize("see [[projects/mind-map]] for details")
+ // '/' is a separator, so we get the segments individually.
+ if !contains(got, "projects") || !contains(got, "mind-map") {
+ t.Fatalf("wikilink target words missing: %v", got)
+ }
+ for _, tok := range got {
+ if strings.ContainsAny(tok, "[]") {
+ t.Fatalf("bracket leaked into token %q", tok)
+ }
+ }
+}
+
+func TestTokenize_LowercasesUnicode(t *testing.T) {
+ b := newCloudBuilder(nil)
+ got := b.tokenize("Привет МИР")
+ want := []string{"привет", "мир"}
+ if !reflect.DeepEqual(got, want) {
+ t.Fatalf("tokenize unicode: got %v, want %v", got, want)
+ }
+}
+
+func TestIsStopword(t *testing.T) {
+ b := newCloudBuilder([]string{"TODO"})
+ cases := map[string]bool{
+ "the": true, // default
+ "todo": true, // user-added, case-folded
+ "wiki": false, // domain term, not filtered
+ "a": true, // length<2 short-circuit (and in defaults)
+ "x": true, // length<2
+ "42": true, // all-digit
+ "v1": false, // alnum mix, keep
+ "mind": false,
+ }
+ for tok, want := range cases {
+ if got := b.isStopword(tok); got != want {
+ t.Errorf("isStopword(%q) = %v, want %v", tok, got, want)
+ }
+ }
+}
+
+func TestAddPage_UnigramAndBigramCounts(t *testing.T) {
+ b := newCloudBuilder(nil)
+ uni := map[string]int{}
+ bi := map[string]int{}
+ b.addPage("wiki engine. wiki engine.", uni, bi)
+
+ if uni["wiki"] != 2 || uni["engine"] != 2 {
+ t.Fatalf("unigram counts wrong: %v", uni)
+ }
+ if bi["wiki engine"] != 2 {
+ t.Fatalf("bigram count wrong: %v", bi)
+ }
+ // "engine wiki" crosses a sentence boundary but our tokenizer
+ // treats '.' as a separator, not a sentence-aware split. The
+ // resulting bigram across "." is intentional — we don't have
+ // sentence info and a bigram across punctuation is still a
+ // real adjacent-token pair in the text.
+ if bi["engine wiki"] != 1 {
+ t.Fatalf("expected one engine->wiki bigram: %v", bi)
+ }
+}
+
+func TestAddPage_StopwordsFilterBothBigramEnds(t *testing.T) {
+ b := newCloudBuilder(nil)
+ uni := map[string]int{}
+ bi := map[string]int{}
+ // "the wiki" → unigram "wiki" counts (the is stopword),
+ // but bigram "the wiki" must NOT be recorded.
+ b.addPage("the wiki is here. the wiki is here.", uni, bi)
+
+ if _, ok := bi["the wiki"]; ok {
+ t.Fatalf("stopword-led bigram leaked: %v", bi)
+ }
+ if _, ok := bi["wiki is"]; ok {
+ t.Fatalf("stopword-tailed bigram leaked: %v", bi)
+ }
+ if uni["wiki"] != 2 {
+ t.Fatalf("unigram counts off: %v", uni)
+ }
+}
+
+func TestTopK_OrderingAndTieBreak(t *testing.T) {
+ counts := map[string]int{
+ "banana": 5,
+ "apple": 5,
+ "cherry": 3,
+ "date": 1,
+ }
+ got := topK(counts, 3)
+ want := []CloudTerm{
+ {Term: "apple", Count: 5},
+ {Term: "banana", Count: 5},
+ {Term: "cherry", Count: 3},
+ }
+ if !reflect.DeepEqual(got, want) {
+ t.Fatalf("topK: got %v, want %v", got, want)
+ }
+}
+
+func TestTopK_Empty(t *testing.T) {
+ if got := topK(nil, 5); got != nil {
+ t.Fatalf("nil input should return nil, got %v", got)
+ }
+ if got := topK(map[string]int{"a": 1}, 0); got != nil {
+ t.Fatalf("k=0 should return nil, got %v", got)
+ }
+}
+
+func TestBuildCloud_EndToEnd(t *testing.T) {
+ w, _ := testWiki(t)
+ defer w.Close()
+ ctx := context.Background()
+
+ // Seed extra content that should dominate the cloud.
+ if err := w.CreatePage(ctx, "topics/sqlite",
+ "# SQLite\n\nSQLite is a database. SQLite is fast. SQLite is small.\n"); err != nil {
+ t.Fatalf("seed: %v", err)
+ }
+
+ terms, err := w.buildCloud(ctx, 10, nil)
+ if err != nil {
+ t.Fatalf("buildCloud: %v", err)
+ }
+ if len(terms) == 0 {
+ t.Fatalf("expected non-empty cloud")
+ }
+
+ // "sqlite" should be the top unigram now (4+ occurrences across pages).
+ found := false
+ for _, term := range terms {
+ if term.Term == "sqlite" {
+ found = true
+ if term.Count < 3 {
+ t.Errorf("sqlite count surprisingly low: %d", term.Count)
+ }
+ }
+ }
+ if !found {
+ t.Fatalf("sqlite missing from top-10: %v", terms)
+ }
+
+ // No stopwords leaked.
+ for _, term := range terms {
+ for _, sw := range []string{"the", "is", "a", "and"} {
+ if term.Term == sw {
+ t.Errorf("stopword %q in cloud", sw)
+ }
+ }
+ }
+}
+
+func TestCloudCache_RoundTrip(t *testing.T) {
+ c := &cloudCache{}
+ if got, ok := c.Get(); ok {
+ t.Fatalf("uninitialized cache should report not-set; got %v", got)
+ }
+ c.Set([]CloudTerm{{Term: "x", Count: 1}})
+ got, ok := c.Get()
+ if !ok {
+ t.Fatalf("after Set, Get should report set")
+ }
+ if !reflect.DeepEqual(got, []CloudTerm{{Term: "x", Count: 1}}) {
+ t.Fatalf("roundtrip mismatch: %v", got)
+ }
+
+ // Mutating the returned slice must not affect the cache.
+ got[0].Term = "MUTATED"
+ again, _ := c.Get()
+ if again[0].Term != "x" {
+ t.Fatalf("cache leaked internal state: %v", again)
+ }
+}
+
+func contains(ss []string, s string) bool {
+ for _, x := range ss {
+ if x == s {
+ return true
+ }
+ }
+ return false
+}
diff --git a/internal/wiki/digest.go b/internal/wiki/digest.go
new file mode 100644
index 0000000..468a98c
--- /dev/null
+++ b/internal/wiki/digest.go
@@ -0,0 +1,309 @@
+package wiki
+
+import (
+ "context"
+ "fmt"
+ "sort"
+ "strings"
+ "sync"
+)
+
+// AreaSummary is one entry under `## Areas` in the rendered digest:
+// a top-level directory, how many pages live under it, and (if the
+// directory has an `index.md`) that index page's title as a one-line
+// description.
+type AreaSummary struct {
+ Path string `json:"path"`
+ PageCount int `json:"page_count"`
+ IndexTitle string `json:"index_title,omitempty"`
+}
+
+// Digest is the structured form of the per-conversation orientation
+// blob. The MCP `get_wiki_digest` tool and HTTP `/api/digest` endpoint
+// return this — the markdown is what an LLM consumes; the typed fields
+// let the WebUI render its own views (e.g. a word-cloud widget) without
+// re-parsing the markdown.
+type Digest struct {
+ PageCount int `json:"page_count"`
+ Cloud []CloudTerm `json:"cloud_terms"`
+ Recents []string `json:"recents"`
+ Areas []AreaSummary `json:"areas"`
+ Markdown string `json:"markdown"`
+}
+
+// defaultMaxRenderBytes is the soft cap on the rendered markdown.
+// Trim order when over: recents -> cloud -> areas (never). Matches
+// the plan's ~4 KB target. Tunable via config (Step 7).
+const defaultMaxRenderBytes = 4096
+
+// digestCache is a single-slot cache for the rendered Digest, keyed
+// by the (cloud version, recents seq) tuple at render time. The
+// digest itself is a few-hundred-byte structure; what we're saving
+// is the SQL roundtrip for area counts and the render loop, not the
+// allocation.
+type digestCache struct {
+ mu sync.Mutex
+ cloudVer uint64
+ recentsSeq uint64
+ pageCount int
+ cached *Digest
+}
+
+// get returns the cached digest if (cloudVer, recentsSeq, pageCount)
+// match the supplied values. pageCount is part of the key because a
+// page added or removed without touching the LRU (rare, but happens
+// on reindex for pure-content-change pages) still changes the header
+// sentence ("This wiki contains N pages...").
+//
+// Returns (nil, false) on a miss.
+func (c *digestCache) get(cloudVer, recentsSeq uint64, pageCount int) (*Digest, bool) {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ if c.cached == nil {
+ return nil, false
+ }
+ if c.cloudVer != cloudVer || c.recentsSeq != recentsSeq || c.pageCount != pageCount {
+ return nil, false
+ }
+ return c.cached, true
+}
+
+func (c *digestCache) set(cloudVer, recentsSeq uint64, pageCount int, d *Digest) {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ c.cloudVer = cloudVer
+ c.recentsSeq = recentsSeq
+ c.pageCount = pageCount
+ c.cached = d
+}
+
+// invalidate clears the cache. Used in tests and on schema rebuilds
+// (Step 4); CRUD doesn't need to call this because version bumps
+// already cover the cache invalidation contract.
+func (c *digestCache) invalidate() {
+ c.mu.Lock()
+ defer c.mu.Unlock()
+ c.cached = nil
+}
+
+// Digest returns the current orientation digest. Cheap on cache hit;
+// on miss, builds in O(pages) for the area counts and O(K) for the
+// render. Safe for concurrent callers.
+//
+// This is the function HTTP `/api/digest` and the MCP `get_wiki_digest`
+// tool call. It is also called transitively from the existing
+// `get_wiki_context` (see Step 5) so old clients see the new data
+// shape without breakage.
+func (w *Wiki) Digest(ctx context.Context) (*Digest, error) {
+ if err := ctx.Err(); err != nil {
+ return nil, err
+ }
+
+ pageCount, err := w.pageCount(ctx)
+ if err != nil {
+ return nil, fmt.Errorf("digest page count: %w", err)
+ }
+
+ cloudVer := w.cloud.Version()
+ recentsSeq := w.recents.version()
+
+ if d, ok := w.digest.get(cloudVer, recentsSeq, pageCount); ok {
+ return d, nil
+ }
+
+ areas, err := w.areaSummaries(ctx)
+ if err != nil {
+ return nil, fmt.Errorf("digest areas: %w", err)
+ }
+
+ cloudTerms, _ := w.cloud.Get() // ok == false → empty, render copes
+ recents := w.recents.snapshot()
+
+ d := &Digest{
+ PageCount: pageCount,
+ Cloud: cloudTerms,
+ Recents: recents,
+ Areas: areas,
+ }
+ d.Markdown = renderDigestMarkdown(d, w.renderCap())
+
+ w.digest.set(cloudVer, recentsSeq, pageCount, d)
+ return d, nil
+}
+
+// renderCap returns the effective byte cap to pass into the markdown
+// renderer. Normalized in Open() to:
+//
+// > 0 → trim to that size
+// == 0 → defaulted, never observed here
+// < 0 → no trimming
+//
+// The renderer treats <= 0 uniformly as "no trim," so we forward
+// negative values straight through.
+func (w *Wiki) renderCap() int {
+ return w.maxRenderBytes
+}
+
+// pageCount runs the same SELECT COUNT(*) the Context handler uses.
+// Lifted into a helper so Digest can share it.
+func (w *Wiki) pageCount(ctx context.Context) (int, error) {
+ var n int
+ if err := w.db.QueryRowContext(ctx, "SELECT COUNT(*) FROM pages").Scan(&n); err != nil {
+ return 0, err
+ }
+ return n, nil
+}
+
+// areaSummaries returns one entry per top-level directory in the wiki,
+// with the page count and (if the directory has `/index.md`)
+// the title of that index page. Sorted by descending page count, then
+// by name — same shape as the rendered markdown.
+//
+// An area with no pages under it cannot exist (the source data is the
+// `pages` table; empty dirs aren't tracked). A flat-rooted page like
+// "readme" with no slash is not an area; only paths containing `/`
+// contribute. This matches what topLevelDirs() exposes via filesystem
+// listing — the two should agree, but areaSummaries is the source of
+// truth for the digest because it's driven by indexed content, not
+// filesystem state.
+func (w *Wiki) areaSummaries(ctx context.Context) ([]AreaSummary, error) {
+ rows, err := w.db.QueryContext(ctx, "SELECT path, title FROM pages")
+ if err != nil {
+ return nil, err
+ }
+ defer rows.Close()
+
+ type acc struct {
+ count int
+ indexTitle string
+ }
+ bucket := map[string]*acc{}
+
+ for rows.Next() {
+ var path, title string
+ if err := rows.Scan(&path, &title); err != nil {
+ continue
+ }
+ slash := strings.IndexByte(path, '/')
+ if slash < 0 {
+ continue // flat-rooted, not an area
+ }
+ area := path[:slash]
+ a, ok := bucket[area]
+ if !ok {
+ a = &acc{}
+ bucket[area] = a
+ }
+ a.count++
+ // The area's index page is `/index`. Record its title
+ // once; if for some reason there are multiple (shouldn't be,
+ // PRIMARY KEY on path prevents it), the last one wins.
+ if path == area+"/index" {
+ a.indexTitle = title
+ }
+ }
+ if err := rows.Err(); err != nil {
+ return nil, err
+ }
+
+ out := make([]AreaSummary, 0, len(bucket))
+ for name, a := range bucket {
+ out = append(out, AreaSummary{
+ Path: name,
+ PageCount: a.count,
+ IndexTitle: a.indexTitle,
+ })
+ }
+ sort.Slice(out, func(i, j int) bool {
+ if out[i].PageCount != out[j].PageCount {
+ return out[i].PageCount > out[j].PageCount
+ }
+ return out[i].Path < out[j].Path
+ })
+ return out, nil
+}
+
+// renderDigestMarkdown produces the markdown blob shown to LLMs. The
+// shape mirrors the example in the plan; ordering of sections is
+// header -> cloud line -> Areas -> Recently active -> footer.
+//
+// When the assembled body exceeds maxBytes the renderer trims:
+// 1. drop recents from the tail until under cap, then
+// 2. drop cloud terms from the tail until under cap.
+//
+// Areas are never trimmed — they are the smallest section and the
+// most structurally important: an agent that loses the area list
+// loses the map of the wiki. The footer hint is also preserved.
+//
+// If maxBytes <= 0 no trimming is applied. Useful for tests that want
+// to verify full content.
+func renderDigestMarkdown(d *Digest, maxBytes int) string {
+ cloud := d.Cloud
+ recents := d.Recents
+
+ for {
+ var sb strings.Builder
+ writeDigestBody(&sb, d.PageCount, cloud, d.Areas, recents)
+ out := sb.String()
+ if maxBytes <= 0 || len(out) <= maxBytes {
+ return out
+ }
+ // Trim recents first.
+ if len(recents) > 0 {
+ recents = recents[:len(recents)-1]
+ continue
+ }
+ // Then trim cloud.
+ if len(cloud) > 0 {
+ cloud = cloud[:len(cloud)-1]
+ continue
+ }
+ // Already minimal; return what we have, even if over cap.
+ // Areas + header alone exceeding 4 KB would require a
+ // wiki with hundreds of top-level dirs — unlikely, but
+ // truncating areas would be a worse failure mode.
+ return out
+ }
+}
+
+func writeDigestBody(sb *strings.Builder, pageCount int, cloud []CloudTerm, areas []AreaSummary, recents []string) {
+ areaCount := len(areas)
+ if areaCount == 1 {
+ fmt.Fprintf(sb, "This wiki contains %d pages across 1 area.", pageCount)
+ } else {
+ fmt.Fprintf(sb, "This wiki contains %d pages across %d areas.", pageCount, areaCount)
+ }
+
+ if len(cloud) > 0 {
+ sb.WriteString(" About:\n")
+ for i, t := range cloud {
+ if i > 0 {
+ sb.WriteString(", ")
+ }
+ sb.WriteString(t.Term)
+ }
+ sb.WriteString("\n")
+ } else {
+ sb.WriteString("\n")
+ }
+
+ if len(areas) > 0 {
+ sb.WriteString("\n## Areas\n")
+ for _, a := range areas {
+ fmt.Fprintf(sb, "- %s (%d)", a.Path, a.PageCount)
+ if a.IndexTitle != "" {
+ fmt.Fprintf(sb, " — %s/index: %q", a.Path, a.IndexTitle)
+ }
+ sb.WriteString("\n")
+ }
+ }
+
+ if len(recents) > 0 {
+ sb.WriteString("\n## Recently active\n")
+ for _, p := range recents {
+ fmt.Fprintf(sb, "- %s\n", p)
+ }
+ }
+
+ sb.WriteString("\nFull skill: SKILL.md. Use `get_wiki_digest` for the live version.\n")
+}
diff --git a/internal/wiki/digest_test.go b/internal/wiki/digest_test.go
new file mode 100644
index 0000000..1f2ab37
--- /dev/null
+++ b/internal/wiki/digest_test.go
@@ -0,0 +1,297 @@
+package wiki
+
+import (
+ "context"
+ "os"
+ "path/filepath"
+ "strings"
+ "testing"
+)
+
+func TestDigest_StructuralFields(t *testing.T) {
+ w, _ := testWiki(t)
+ defer w.Close()
+ ctx := context.Background()
+
+ d, err := w.Digest(ctx)
+ if err != nil {
+ t.Fatalf("Digest: %v", err)
+ }
+
+ if d.PageCount == 0 {
+ t.Fatal("page count should be > 0")
+ }
+ if d.Markdown == "" {
+ t.Fatal("markdown should not be empty")
+ }
+ // testWiki creates pages under projects/ and people/ — at least
+ // two areas should surface.
+ if len(d.Areas) < 2 {
+ t.Fatalf("expected >= 2 areas, got %d: %v", len(d.Areas), d.Areas)
+ }
+ // Cloud is empty because the ticker hasn't run yet (cold start).
+ // That's the expected behavior; the digest should still render.
+ if d.Cloud != nil && len(d.Cloud) != 0 {
+ t.Fatalf("cloud should be empty on cold start, got %v", d.Cloud)
+ }
+}
+
+func TestDigest_MarkdownShape(t *testing.T) {
+ w, _ := testWiki(t)
+ defer w.Close()
+ ctx := context.Background()
+
+ // Seed cloud so we exercise the "About:" line too.
+ w.cloud.Set([]CloudTerm{
+ {Term: "wiki", Count: 10},
+ {Term: "mind-map", Count: 7},
+ })
+ // Seed recents.
+ w.recents.touch("projects/mind-map")
+ w.recents.touch("index")
+ // Bust the digest cache because we mutated state directly.
+ w.digest.invalidate()
+
+ d, err := w.Digest(ctx)
+ if err != nil {
+ t.Fatalf("Digest: %v", err)
+ }
+
+ md := d.Markdown
+ t.Logf("rendered:\n%s", md)
+
+ mustContain := []string{
+ "This wiki contains",
+ "About:",
+ "wiki, mind-map",
+ "## Areas",
+ "## Recently active",
+ "- index",
+ "- projects/mind-map",
+ "Full skill: SKILL.md",
+ "get_wiki_digest",
+ }
+ for _, s := range mustContain {
+ if !strings.Contains(md, s) {
+ t.Errorf("markdown missing %q\n---\n%s\n---", s, md)
+ }
+ }
+}
+
+func TestDigest_AreaCountsAndIndexTitle(t *testing.T) {
+ w, _ := testWiki(t)
+ defer w.Close()
+ ctx := context.Background()
+
+ // Add an index page under "projects" with a known title.
+ if err := w.CreatePage(ctx, "projects/index", `---
+title: Active Projects
+---
+# Active Projects
+`); err != nil {
+ t.Fatalf("create projects/index: %v", err)
+ }
+
+ d, err := w.Digest(ctx)
+ if err != nil {
+ t.Fatalf("Digest: %v", err)
+ }
+
+ var found *AreaSummary
+ for i := range d.Areas {
+ if d.Areas[i].Path == "projects" {
+ found = &d.Areas[i]
+ break
+ }
+ }
+ if found == nil {
+ t.Fatalf("projects area missing: %+v", d.Areas)
+ }
+ if found.IndexTitle != "Active Projects" {
+ t.Errorf("expected index title 'Active Projects', got %q", found.IndexTitle)
+ }
+ if found.PageCount < 2 {
+ t.Errorf("projects should have >=2 pages (mind-map + index), got %d", found.PageCount)
+ }
+
+ // The rendered area line should include the index title quoted.
+ if !strings.Contains(d.Markdown, `projects/index: "Active Projects"`) {
+ t.Errorf("markdown missing index title:\n%s", d.Markdown)
+ }
+}
+
+func TestDigest_CacheHit(t *testing.T) {
+ w, _ := testWiki(t)
+ defer w.Close()
+ ctx := context.Background()
+
+ // First call populates the cache.
+ first, err := w.Digest(ctx)
+ if err != nil {
+ t.Fatalf("first Digest: %v", err)
+ }
+
+ // Second call with no state change returns the *same* pointer
+ // (the cache stores the *Digest; a hit returns it as-is).
+ second, err := w.Digest(ctx)
+ if err != nil {
+ t.Fatalf("second Digest: %v", err)
+ }
+ if first != second {
+ t.Errorf("expected cache hit to return same *Digest pointer")
+ }
+}
+
+func TestDigest_CacheInvalidatedByLRUChange(t *testing.T) {
+ w, _ := testWiki(t)
+ defer w.Close()
+ ctx := context.Background()
+
+ first, err := w.Digest(ctx)
+ if err != nil {
+ t.Fatalf("first: %v", err)
+ }
+
+ // Touching the LRU bumps recents seq → cache miss next read.
+ w.recents.touch("index")
+
+ second, err := w.Digest(ctx)
+ if err != nil {
+ t.Fatalf("second: %v", err)
+ }
+ if first == second {
+ t.Errorf("expected fresh *Digest after recents change")
+ }
+ if !strings.Contains(second.Markdown, "- index") {
+ t.Errorf("new recents not reflected in markdown:\n%s", second.Markdown)
+ }
+}
+
+func TestDigest_CacheInvalidatedByCloudChange(t *testing.T) {
+ w, _ := testWiki(t)
+ defer w.Close()
+ ctx := context.Background()
+
+ first, err := w.Digest(ctx)
+ if err != nil {
+ t.Fatalf("first: %v", err)
+ }
+
+ w.cloud.Set([]CloudTerm{{Term: "wiki", Count: 1}})
+
+ second, err := w.Digest(ctx)
+ if err != nil {
+ t.Fatalf("second: %v", err)
+ }
+ if first == second {
+ t.Errorf("expected fresh *Digest after cloud set")
+ }
+ if !strings.Contains(second.Markdown, "About:") {
+ t.Errorf("cloud not reflected in markdown:\n%s", second.Markdown)
+ }
+}
+
+func TestRenderDigest_TrimToMaxBytes(t *testing.T) {
+ // Build a digest that's deliberately over-cap.
+ cloud := make([]CloudTerm, 50)
+ for i := range cloud {
+ cloud[i] = CloudTerm{Term: strings.Repeat("x", 20), Count: 1}
+ }
+ recents := make([]string, 50)
+ for i := range recents {
+ recents[i] = strings.Repeat("path", 20)
+ }
+ d := &Digest{
+ PageCount: 100,
+ Cloud: cloud,
+ Recents: recents,
+ Areas: []AreaSummary{{Path: "a", PageCount: 5}},
+ }
+
+ const cap = 512
+ md := renderDigestMarkdown(d, cap)
+ if len(md) > cap {
+ // The trimmer is best-effort: if the unavoidable parts
+ // (areas + header + footer) already exceed cap we accept
+ // being over. But in this test those are tiny, so we
+ // should be under.
+ t.Errorf("rendered len=%d > cap=%d", len(md), cap)
+ }
+ // Areas + header + footer must still be intact.
+ mustContain := []string{"## Areas", "- a (5)", "Full skill"}
+ for _, s := range mustContain {
+ if !strings.Contains(md, s) {
+ t.Errorf("trim dropped required section %q:\n%s", s, md)
+ }
+ }
+}
+
+func TestRenderDigest_NoCloudNoRecents(t *testing.T) {
+ d := &Digest{
+ PageCount: 3,
+ Areas: []AreaSummary{
+ {Path: "notes", PageCount: 3},
+ },
+ }
+ md := renderDigestMarkdown(d, 0)
+ if strings.Contains(md, "About:") {
+ t.Errorf("empty cloud should not produce About: line:\n%s", md)
+ }
+ if strings.Contains(md, "## Recently active") {
+ t.Errorf("empty recents should not produce section:\n%s", md)
+ }
+ if !strings.Contains(md, "## Areas") || !strings.Contains(md, "- notes (3)") {
+ t.Errorf("areas missing:\n%s", md)
+ }
+}
+
+func TestAreaSummaries_FlatRootedPagesIgnored(t *testing.T) {
+ w, _ := testWiki(t)
+ defer w.Close()
+ ctx := context.Background()
+
+ // `index` is flat-rooted; should not produce an "index" area.
+ areas, err := w.areaSummaries(ctx)
+ if err != nil {
+ t.Fatalf("areaSummaries: %v", err)
+ }
+ for _, a := range areas {
+ if a.Path == "index" {
+ t.Fatalf("flat-rooted page leaked into areas: %+v", areas)
+ }
+ }
+}
+
+func TestReindex_RemovesFromLRU(t *testing.T) {
+ w, dir := testWiki(t)
+ defer w.Close()
+ ctx := context.Background()
+
+ // Touch and verify presence.
+ if _, err := w.GetPage(ctx, "index"); err != nil {
+ t.Fatalf("GetPage: %v", err)
+ }
+ found := false
+ for _, p := range w.recents.snapshot() {
+ if p == "index" {
+ found = true
+ }
+ }
+ if !found {
+ t.Fatalf("index should be in LRU after GetPage")
+ }
+
+ // Raw-filesystem delete + reindex (simulating sync removing a file).
+ if err := os.Remove(filepath.Join(dir, "index.md")); err != nil {
+ t.Fatalf("remove file: %v", err)
+ }
+ if _, err := w.Reindex(ctx); err != nil {
+ t.Fatalf("reindex: %v", err)
+ }
+
+ for _, p := range w.recents.snapshot() {
+ if p == "index" {
+ t.Fatalf("reindex should have purged stale LRU entry: %v", w.recents.snapshot())
+ }
+ }
+}
diff --git a/internal/wiki/index.go b/internal/wiki/index.go
index e847f92..0379dc6 100644
--- a/internal/wiki/index.go
+++ b/internal/wiki/index.go
@@ -159,6 +159,13 @@ func (w *Wiki) Reindex(ctx context.Context) (ReindexStats, error) {
slog.Warn("reindex remove error", slog.String("page", pagePath), slog.Any("error", err))
continue
}
+ // Keep the recents LRU consistent with `pages`: a page
+ // that vanishes via raw-filesystem delete + reindex
+ // (common after `git pull` in sync) must drop from the
+ // LRU here, since DeletePage() was never called. Without
+ // this hook the digest's "recently active" can point at
+ // a 404.
+ w.recents.remove(pagePath)
removed++
}
}
diff --git a/internal/wiki/pages.go b/internal/wiki/pages.go
index 38291ec..d030cbd 100644
--- a/internal/wiki/pages.go
+++ b/internal/wiki/pages.go
@@ -50,6 +50,11 @@ func (w *Wiki) GetPage(ctx context.Context, pagePath string) (*Page, error) {
slog.Warn("failed to get backlinks", slog.String("page", pagePath), slog.Any("error", err))
}
+ // LRU touch reflects that the agent actually saw this page. We
+ // only reach here on a successful row scan, so a typo'd path that
+ // hit the "page not found" branch above will not pollute recents.
+ w.recents.touch(pagePath)
+
return &Page{
Path: pagePath,
Title: title,
@@ -148,7 +153,11 @@ func (w *Wiki) CreatePage(ctx context.Context, pagePath string, content string)
}
slog.Info("page created", slog.String("page", pagePath))
- return w.indexPage(ctx, pagePath)
+ if err := w.indexPage(ctx, pagePath); err != nil {
+ return err
+ }
+ w.recents.touch(pagePath)
+ return nil
}
// UpdatePage replaces the content of an existing page.
@@ -178,7 +187,11 @@ func (w *Wiki) UpdatePage(ctx context.Context, pagePath string, content string)
}
slog.Info("page updated", slog.String("page", pagePath))
- return w.indexPage(ctx, pagePath)
+ if err := w.indexPage(ctx, pagePath); err != nil {
+ return err
+ }
+ w.recents.touch(pagePath)
+ return nil
}
// DeletePage removes a page from the filesystem and index.
@@ -204,7 +217,13 @@ func (w *Wiki) DeletePage(ctx context.Context, pagePath string) error {
}
slog.Info("page deleted", slog.String("page", pagePath))
- return w.removePageIndex(ctx, pagePath)
+ if err := w.removePageIndex(ctx, pagePath); err != nil {
+ return err
+ }
+ // The page is gone; leaving it in recents would point the agent
+ // at a 404. Drop the entry rather than promote it.
+ w.recents.remove(pagePath)
+ return nil
}
// ErrDestinationExists is returned by MovePage when the destination
@@ -310,6 +329,10 @@ func (w *Wiki) MovePage(ctx context.Context, fromPath, toPath string, opts MoveO
return fmt.Errorf("index new page: %w", err)
}
+ // Treat a move as one continuous "active use" rather than dropping
+ // the old name and freshly inserting the new one. See recentsLRU.rename.
+ w.recents.rename(from, to)
+
slog.Info("page moved",
slog.String("from", from),
slog.String("to", to),
@@ -359,7 +382,14 @@ func (w *Wiki) GetBacklinks(ctx context.Context, pagePath string) ([]string, err
return nil, err
}
- return w.getBacklinks(ctx, pagePath)
+ backlinks, err := w.getBacklinks(ctx, pagePath)
+ if err != nil {
+ return nil, err
+ }
+ // GetBacklinks is "I'm looking at this page's incoming links" —
+ // an active use of the target page, even if its body wasn't read.
+ w.recents.touch(pagePath)
+ return backlinks, nil
}
// Link is a single source→target edge between two pages.
@@ -393,7 +423,13 @@ func (w *Wiki) AllLinks(ctx context.Context) ([]Link, error) {
return links, nil
}
-// Context returns a WikiContext overview.
+// Context returns a WikiContext overview. The legacy fields
+// (PageCount, RecentPages, TopLevelDirs) come from disk — recent_pages
+// is mtime-sorted, top_level_dirs is read from the filesystem — and
+// preserve the shape clients in the wild already depend on. The new
+// fields (Cloud, Recents, Areas, Markdown) come from the digest so
+// existing get_wiki_context callers get the orientation upgrade
+// without switching tool names.
func (w *Wiki) Context(ctx context.Context) (*WikiContext, error) {
if err := ctx.Err(); err != nil {
return nil, err
@@ -430,11 +466,25 @@ func (w *Wiki) Context(ctx context.Context) (*WikiContext, error) {
// Top-level dirs
dirs := w.topLevelDirs()
- return &WikiContext{
+ wctx := &WikiContext{
PageCount: count,
RecentPages: recent,
TopLevelDirs: dirs,
- }, nil
+ }
+
+ // Layer the digest's signals on top. A failure here doesn't fail
+ // the whole Context() call — the legacy fields are still valuable
+ // on their own, and the digest is an enhancement, not a contract.
+ if d, err := w.Digest(ctx); err == nil {
+ wctx.Cloud = d.Cloud
+ wctx.Recents = d.Recents
+ wctx.Areas = d.Areas
+ wctx.Markdown = d.Markdown
+ } else {
+ slog.Warn("context digest enrichment failed", slog.Any("error", err))
+ }
+
+ return wctx, nil
}
// --- locking ---
diff --git a/internal/wiki/recents.go b/internal/wiki/recents.go
new file mode 100644
index 0000000..2a45055
--- /dev/null
+++ b/internal/wiki/recents.go
@@ -0,0 +1,230 @@
+package wiki
+
+import (
+ "container/list"
+ "sync"
+)
+
+// recentsLRU is a fixed-capacity, most-recently-used-first ring of page
+// paths. It tracks pages the user or agent has *actively* touched —
+// Create, Update, Get, Move (both ends), Delete, GetBacklinks — rather
+// than what disk mtime says was changed most recently. The distinction
+// matters when sync's copyToWiki bumps mtimes for files the agent never
+// looked at; an mtime-based "recents" would surface those, an LRU
+// reflects intent.
+//
+// The structure is a doubly-linked list plus a path->element index, so
+// touch / remove / rename are all O(1). It is safe for concurrent use.
+//
+// Persistence (snapshot to SQLite on a slow ticker) lives in state.go
+// and the ticker lives in the wiki lifecycle code; recentsLRU itself
+// is intentionally storage-agnostic.
+type recentsLRU struct {
+ mu sync.Mutex
+ cap int
+ // ll holds paths with the most recently used at the front.
+ ll *list.List
+ // idx maps path -> list element for O(1) promote/remove.
+ idx map[string]*list.Element
+ // dirty is true when the in-memory state has diverged from the last
+ // persisted snapshot. The persistence ticker reads + clears it.
+ dirty bool
+ // seq is a monotonic counter bumped on every state-changing
+ // operation (touch / remove / rename / load). The digest cache
+ // uses it as a cheap "did anything change?" signal so it can
+ // invalidate rendered output without re-comparing snapshots.
+ // Wraps at uint64 max — irrelevant in practice.
+ seq uint64
+}
+
+// newRecentsLRU constructs an empty LRU with the given capacity.
+// A non-positive cap is clamped to the default (20).
+func newRecentsLRU(cap int) *recentsLRU {
+ if cap <= 0 {
+ cap = 20
+ }
+ return &recentsLRU{
+ cap: cap,
+ ll: list.New(),
+ idx: make(map[string]*list.Element),
+ }
+}
+
+// touch records that the given page was just used. If the page is
+// already in the ring it's promoted to the front; otherwise it's
+// inserted at the front and the oldest entry is evicted if the ring
+// is at capacity.
+//
+// Empty paths are ignored — callers don't need to guard the call site.
+func (r *recentsLRU) touch(path string) {
+ if path == "" {
+ return
+ }
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ if elem, ok := r.idx[path]; ok {
+ r.ll.MoveToFront(elem)
+ r.dirty = true
+ r.seq++
+ return
+ }
+ elem := r.ll.PushFront(path)
+ r.idx[path] = elem
+ if r.ll.Len() > r.cap {
+ oldest := r.ll.Back()
+ if oldest != nil {
+ r.ll.Remove(oldest)
+ delete(r.idx, oldest.Value.(string))
+ }
+ }
+ r.dirty = true
+ r.seq++
+}
+
+// remove drops a path from the ring. Called when a page is deleted;
+// the path is gone so including it in recents would mislead the agent.
+// No-op if the path isn't tracked.
+func (r *recentsLRU) remove(path string) {
+ if path == "" {
+ return
+ }
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ elem, ok := r.idx[path]
+ if !ok {
+ return
+ }
+ r.ll.Remove(elem)
+ delete(r.idx, path)
+ r.dirty = true
+ r.seq++
+}
+
+// rename relabels an entry in place, preserving its position in the
+// ring. Called on MovePage so the move shows up as one touch (at the
+// new name) rather than two (old name drops out, new name is fresh).
+//
+// If `from` isn't tracked, behaves as touch(to). If `to` is already
+// tracked, the older `from` entry is removed and `to` is promoted —
+// this is the same as if the agent had used `to` directly.
+func (r *recentsLRU) rename(from, to string) {
+ if from == to {
+ r.touch(to)
+ return
+ }
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ fromElem, hasFrom := r.idx[from]
+ toElem, hasTo := r.idx[to]
+
+ switch {
+ case hasFrom && hasTo:
+ // Both present: drop `from`, promote `to`.
+ r.ll.Remove(fromElem)
+ delete(r.idx, from)
+ r.ll.MoveToFront(toElem)
+ case hasFrom:
+ // Relabel in place at the same position.
+ fromElem.Value = to
+ delete(r.idx, from)
+ r.idx[to] = fromElem
+ r.ll.MoveToFront(fromElem)
+ case hasTo:
+ r.ll.MoveToFront(toElem)
+ default:
+ // Neither tracked: insert `to` fresh.
+ elem := r.ll.PushFront(to)
+ r.idx[to] = elem
+ if r.ll.Len() > r.cap {
+ oldest := r.ll.Back()
+ if oldest != nil {
+ r.ll.Remove(oldest)
+ delete(r.idx, oldest.Value.(string))
+ }
+ }
+ }
+ r.dirty = true
+ r.seq++
+}
+
+// snapshot returns the tracked paths, most recent first. The returned
+// slice is owned by the caller and safe to mutate.
+func (r *recentsLRU) snapshot() []string {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ out := make([]string, 0, r.ll.Len())
+ for e := r.ll.Front(); e != nil; e = e.Next() {
+ out = append(out, e.Value.(string))
+ }
+ return out
+}
+
+// load replaces the ring's contents with the given paths (treated as
+// most-recent-first). Used by the persistence layer on Wiki.Open to
+// restore the last snapshot. Clears the dirty flag — what we just
+// loaded matches what's on disk.
+func (r *recentsLRU) load(paths []string) {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+
+ r.ll.Init()
+ r.idx = make(map[string]*list.Element, len(paths))
+ for _, p := range paths {
+ if p == "" {
+ continue
+ }
+ if _, dup := r.idx[p]; dup {
+ continue
+ }
+ elem := r.ll.PushBack(p)
+ r.idx[p] = elem
+ if r.ll.Len() >= r.cap {
+ break
+ }
+ }
+ r.dirty = false
+ r.seq++
+}
+
+// version returns the monotonic change counter. The digest cache uses
+// this as an invalidation signal: cache the rendered output keyed by
+// (cloudVersion, recentsVersion), and rebuild when either advances.
+//
+// Cheap (one lock + load) so callers can invoke it on every read.
+func (r *recentsLRU) version() uint64 {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ return r.seq
+}
+
+// takeDirty returns whether the ring has unsaved changes and clears
+// the flag in the same operation. The persistence ticker uses this to
+// skip writes when nothing has changed.
+func (r *recentsLRU) takeDirty() bool {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ was := r.dirty
+ r.dirty = false
+ return was
+}
+
+// peekDirty returns whether the ring has unsaved changes without
+// clearing the flag. Used by the digest.Manager's tick gate so the
+// "did anything change?" probe doesn't race with the write that
+// follows.
+func (r *recentsLRU) peekDirty() bool {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ return r.dirty
+}
+
+// len returns the number of tracked paths. Test helper.
+func (r *recentsLRU) len() int {
+ r.mu.Lock()
+ defer r.mu.Unlock()
+ return r.ll.Len()
+}
diff --git a/internal/wiki/recents_test.go b/internal/wiki/recents_test.go
new file mode 100644
index 0000000..e016a9e
--- /dev/null
+++ b/internal/wiki/recents_test.go
@@ -0,0 +1,230 @@
+package wiki
+
+import (
+ "context"
+ "reflect"
+ "testing"
+)
+
+func TestRecentsLRU_TouchAndOrder(t *testing.T) {
+ r := newRecentsLRU(3)
+
+ r.touch("a")
+ r.touch("b")
+ r.touch("c")
+ if got, want := r.snapshot(), []string{"c", "b", "a"}; !reflect.DeepEqual(got, want) {
+ t.Fatalf("after initial touches: got %v, want %v", got, want)
+ }
+
+ // Re-touching an existing entry promotes it.
+ r.touch("a")
+ if got, want := r.snapshot(), []string{"a", "c", "b"}; !reflect.DeepEqual(got, want) {
+ t.Fatalf("after promote: got %v, want %v", got, want)
+ }
+}
+
+func TestRecentsLRU_Eviction(t *testing.T) {
+ r := newRecentsLRU(2)
+ r.touch("a")
+ r.touch("b")
+ r.touch("c") // evicts "a"
+
+ got := r.snapshot()
+ if len(got) != 2 || got[0] != "c" || got[1] != "b" {
+ t.Fatalf("expected [c b], got %v", got)
+ }
+}
+
+func TestRecentsLRU_EmptyTouchIgnored(t *testing.T) {
+ r := newRecentsLRU(5)
+ r.touch("")
+ if r.len() != 0 {
+ t.Fatalf("empty path should not be tracked, len=%d", r.len())
+ }
+}
+
+func TestRecentsLRU_Remove(t *testing.T) {
+ r := newRecentsLRU(5)
+ r.touch("a")
+ r.touch("b")
+ r.touch("c")
+
+ r.remove("b")
+ if got, want := r.snapshot(), []string{"c", "a"}; !reflect.DeepEqual(got, want) {
+ t.Fatalf("after remove b: got %v, want %v", got, want)
+ }
+
+ // Remove of missing path is a no-op.
+ r.remove("zzz")
+ if got, want := r.snapshot(), []string{"c", "a"}; !reflect.DeepEqual(got, want) {
+ t.Fatalf("noop remove changed state: got %v, want %v", got, want)
+ }
+}
+
+func TestRecentsLRU_RenameInPlace(t *testing.T) {
+ r := newRecentsLRU(5)
+ r.touch("a")
+ r.touch("b")
+ r.touch("c") // order: c b a
+
+ // Rename "b" -> "x": should land at the front (promoted) per the
+ // plan's "treat a move as active use of the new name" rule.
+ r.rename("b", "x")
+ if got, want := r.snapshot(), []string{"x", "c", "a"}; !reflect.DeepEqual(got, want) {
+ t.Fatalf("after rename b->x: got %v, want %v", got, want)
+ }
+}
+
+func TestRecentsLRU_RenameDestExists(t *testing.T) {
+ r := newRecentsLRU(5)
+ r.touch("a")
+ r.touch("b")
+ r.touch("c") // c b a
+
+ // Rename "a" -> "c" (overwrite move): the old "a" entry should
+ // drop out, "c" should be promoted.
+ r.rename("a", "c")
+ if got, want := r.snapshot(), []string{"c", "b"}; !reflect.DeepEqual(got, want) {
+ t.Fatalf("after rename a->c (dest exists): got %v, want %v", got, want)
+ }
+}
+
+func TestRecentsLRU_RenameFromMissing(t *testing.T) {
+ r := newRecentsLRU(5)
+ r.touch("a")
+ // Rename of an untracked source: equivalent to touching the dest.
+ r.rename("zzz", "b")
+ if got, want := r.snapshot(), []string{"b", "a"}; !reflect.DeepEqual(got, want) {
+ t.Fatalf("after rename zzz->b: got %v, want %v", got, want)
+ }
+}
+
+func TestRecentsLRU_LoadSnapshotRoundtrip(t *testing.T) {
+ r := newRecentsLRU(5)
+ r.load([]string{"a", "b", "c"})
+
+ if got, want := r.snapshot(), []string{"a", "b", "c"}; !reflect.DeepEqual(got, want) {
+ t.Fatalf("after load: got %v, want %v", got, want)
+ }
+ if r.takeDirty() {
+ t.Fatalf("load should clear dirty flag")
+ }
+}
+
+func TestRecentsLRU_LoadRespectsCapacity(t *testing.T) {
+ r := newRecentsLRU(2)
+ r.load([]string{"a", "b", "c", "d"})
+ if r.len() != 2 {
+ t.Fatalf("load should stop at cap; len=%d", r.len())
+ }
+}
+
+func TestRecentsLRU_DirtyTracking(t *testing.T) {
+ r := newRecentsLRU(3)
+ if r.takeDirty() {
+ t.Fatalf("fresh ring should not be dirty")
+ }
+ r.touch("a")
+ if !r.takeDirty() {
+ t.Fatalf("touch should mark dirty")
+ }
+ if r.takeDirty() {
+ t.Fatalf("takeDirty should clear the flag")
+ }
+}
+
+// --- integration: touches fire on the right Wiki ops ---
+
+func TestWiki_LRUIntegration(t *testing.T) {
+ w, _ := testWiki(t)
+ defer w.Close()
+ ctx := context.Background()
+
+ // testWiki() seeded the wiki, and Reindex on Open() does not touch
+ // the LRU (indexing is plumbing, not "user used the page").
+ if got := w.recents.snapshot(); len(got) != 0 {
+ t.Fatalf("LRU should be empty after Open; got %v", got)
+ }
+
+ // GetPage touches.
+ if _, err := w.GetPage(ctx, "index"); err != nil {
+ t.Fatalf("GetPage: %v", err)
+ }
+ if got := w.recents.snapshot(); !reflect.DeepEqual(got, []string{"index"}) {
+ t.Fatalf("after GetPage: got %v", got)
+ }
+
+ // Failed GetPage does NOT touch.
+ if _, err := w.GetPage(ctx, "does/not/exist"); err == nil {
+ t.Fatalf("expected error on missing page")
+ }
+ if got := w.recents.snapshot(); !reflect.DeepEqual(got, []string{"index"}) {
+ t.Fatalf("failed GetPage polluted LRU: %v", got)
+ }
+
+ // CreatePage touches.
+ if err := w.CreatePage(ctx, "scratch", "# Scratch\n"); err != nil {
+ t.Fatalf("CreatePage: %v", err)
+ }
+ if got := w.recents.snapshot(); got[0] != "scratch" {
+ t.Fatalf("CreatePage should put scratch at front: %v", got)
+ }
+
+ // UpdatePage touches.
+ if err := w.UpdatePage(ctx, "index", "# Welcome (updated)\n"); err != nil {
+ t.Fatalf("UpdatePage: %v", err)
+ }
+ if got := w.recents.snapshot(); got[0] != "index" {
+ t.Fatalf("UpdatePage should promote index: %v", got)
+ }
+
+ // GetBacklinks touches.
+ if _, err := w.GetBacklinks(ctx, "projects/mind-map"); err != nil {
+ t.Fatalf("GetBacklinks: %v", err)
+ }
+ if got := w.recents.snapshot(); got[0] != "projects/mind-map" {
+ t.Fatalf("GetBacklinks should promote target: %v", got)
+ }
+
+ // MovePage renames in the LRU.
+ if err := w.MovePage(ctx, "scratch", "notes/scratch", MoveOptions{}); err != nil {
+ t.Fatalf("MovePage: %v", err)
+ }
+ snap := w.recents.snapshot()
+ for _, p := range snap {
+ if p == "scratch" {
+ t.Fatalf("old name still in LRU after move: %v", snap)
+ }
+ }
+ if snap[0] != "notes/scratch" {
+ t.Fatalf("move dest should be at front: %v", snap)
+ }
+
+ // DeletePage removes.
+ if err := w.DeletePage(ctx, "notes/scratch"); err != nil {
+ t.Fatalf("DeletePage: %v", err)
+ }
+ for _, p := range w.recents.snapshot() {
+ if p == "notes/scratch" {
+ t.Fatalf("deleted page still in LRU: %v", w.recents.snapshot())
+ }
+ }
+}
+
+// CreatePage that fails (page already exists) must NOT touch.
+func TestWiki_LRUNoTouchOnFailedCreate(t *testing.T) {
+ w, _ := testWiki(t)
+ defer w.Close()
+ ctx := context.Background()
+
+ // Drain the LRU to a known state.
+ w.recents.load(nil)
+
+ // "index" already exists in testWiki.
+ if err := w.CreatePage(ctx, "index", "# dup\n"); err == nil {
+ t.Fatalf("expected CreatePage to fail on existing page")
+ }
+ if got := w.recents.snapshot(); len(got) != 0 {
+ t.Fatalf("failed CreatePage polluted LRU: %v", got)
+ }
+}
diff --git a/internal/wiki/state.go b/internal/wiki/state.go
new file mode 100644
index 0000000..79bb030
--- /dev/null
+++ b/internal/wiki/state.go
@@ -0,0 +1,225 @@
+package wiki
+
+import (
+ "context"
+ "database/sql"
+ "encoding/json"
+ "errors"
+ "fmt"
+ "log/slog"
+ "time"
+)
+
+// wiki_state schema: a small key/value table for cross-restart persistence
+// of derived structures (recents LRU, word/phrase cloud). Distinct from
+// the `pages` index, which is rebuildable from disk — wiki_state holds
+// signals that *can't* be recovered from the markdown files alone:
+//
+// - "recent_lru" — the active-use ring (intent, not mtime). Lost on
+// restart without persistence; that's exactly the case the digest
+// plan is designed to avoid.
+// - "cloud" — the word/phrase cloud is rebuildable but expensive
+// (one full table scan + tokenization). Persisting it means a
+// freshly-restarted server has a digest immediately, not after
+// the first ticker tick (up to 5 minutes later).
+//
+// We intentionally do NOT persist the rendered digest markdown: it's
+// sub-ms to re-assemble from cloud + LRU, and the in-memory
+// digestCache already covers "don't re-format on every hit".
+
+const (
+ stateKeyRecentLRU = "recent_lru"
+ stateKeyCloud = "cloud"
+)
+
+// initStateSchema creates the wiki_state table. Called from initSchema.
+// Idempotent.
+func (w *Wiki) initStateSchema() error {
+ _, err := w.db.Exec(`
+ CREATE TABLE IF NOT EXISTS wiki_state (
+ key TEXT PRIMARY KEY,
+ value TEXT NOT NULL,
+ updated TEXT NOT NULL
+ );`)
+ return err
+}
+
+// recentsState is the on-disk shape of the persisted LRU. Stored as a
+// JSON document under wiki_state["recent_lru"].value. Items are listed
+// most-recent-first, matching recentsLRU.snapshot().
+type recentsState struct {
+ Items []string `json:"items"`
+}
+
+// cloudState is the on-disk shape of the persisted cloud.
+type cloudState struct {
+ Terms []CloudTerm `json:"terms"`
+}
+
+// loadState pulls the persisted LRU + cloud out of wiki_state into
+// memory. Called once at the end of Open(), after Reindex. Failures
+// are logged but non-fatal — a missing or corrupt row just means the
+// process starts with an empty signal, which is the same state a
+// brand-new wiki ships with.
+func (w *Wiki) loadState(ctx context.Context) {
+ if items, ok := w.readStateKey(ctx, stateKeyRecentLRU); ok {
+ var s recentsState
+ if err := json.Unmarshal([]byte(items), &s); err != nil {
+ slog.Warn("wiki_state recent_lru parse failed", slog.Any("error", err))
+ } else {
+ // Filter against the current index so paths that vanished
+ // while the server was off (deleted, renamed via raw
+ // filesystem, or sync-pulled away) don't reappear in the
+ // LRU as 404 candidates. Reindex has already run by this
+ // point, so `pages` is the authoritative set.
+ filtered := w.filterAgainstIndex(ctx, s.Items)
+ w.recents.load(filtered)
+ slog.Info("recents loaded from wiki_state",
+ slog.Int("persisted", len(s.Items)),
+ slog.Int("kept", len(filtered)),
+ )
+ }
+ }
+
+ if terms, ok := w.readStateKey(ctx, stateKeyCloud); ok {
+ var s cloudState
+ if err := json.Unmarshal([]byte(terms), &s); err != nil {
+ slog.Warn("wiki_state cloud parse failed", slog.Any("error", err))
+ } else {
+ // Use the persisted cloud as-is. The cloud is global
+ // frequency counts, not per-page references — even if
+ // some pages have vanished the previous distribution
+ // is still a reasonable approximation until the next
+ // rebuild ticker fires (default: within 5 minutes of
+ // startup).
+ w.cloud.Set(s.Terms)
+ slog.Info("cloud loaded from wiki_state", slog.Int("terms", len(s.Terms)))
+ }
+ }
+}
+
+// filterAgainstIndex returns only those paths that currently exist in
+// the `pages` table, preserving input order. Used on Open() to drop
+// stale persisted recents whose underlying pages vanished while the
+// server was off.
+//
+// One query: SELECT path FROM pages where path IN (...). We do it via
+// a map probe rather than a SQL IN-clause because (a) the input slice
+// is small (~20 entries by default) and (b) building a variable-length
+// IN-clause with placeholders for SQLite is awkward.
+func (w *Wiki) filterAgainstIndex(ctx context.Context, paths []string) []string {
+ if len(paths) == 0 {
+ return nil
+ }
+ rows, err := w.db.QueryContext(ctx, "SELECT path FROM pages")
+ if err != nil {
+ slog.Warn("filterAgainstIndex query failed", slog.Any("error", err))
+ return paths // fail open: keep all, let the next CRUD reconcile
+ }
+ defer rows.Close()
+ present := make(map[string]struct{})
+ for rows.Next() {
+ var p string
+ if rows.Scan(&p) == nil {
+ present[p] = struct{}{}
+ }
+ }
+ out := make([]string, 0, len(paths))
+ for _, p := range paths {
+ if _, ok := present[p]; ok {
+ out = append(out, p)
+ }
+ }
+ return out
+}
+
+// readStateKey returns the value for a wiki_state key, or "", false if
+// not present or the read failed. Read errors other than "no row" are
+// logged so a real DB problem doesn't silently degrade the digest.
+func (w *Wiki) readStateKey(ctx context.Context, key string) (string, bool) {
+ var value string
+ err := w.db.QueryRowContext(ctx, "SELECT value FROM wiki_state WHERE key = ?", key).Scan(&value)
+ if err == nil {
+ return value, true
+ }
+ // sql.ErrNoRows is the common case (first run on a wiki) — silent.
+ if errors.Is(err, sql.ErrNoRows) {
+ return "", false
+ }
+ slog.Warn("wiki_state read failed", slog.String("key", key), slog.Any("error", err))
+ return "", false
+}
+
+// writeStateKey upserts a wiki_state row. The (key, value, updated)
+// triple is atomic via INSERT OR REPLACE — readers either see the old
+// or the new value, never a torn write.
+func (w *Wiki) writeStateKey(ctx context.Context, key, value string) error {
+ now := time.Now().UTC().Format(time.RFC3339Nano)
+ _, err := w.db.ExecContext(ctx,
+ "INSERT OR REPLACE INTO wiki_state (key, value, updated) VALUES (?, ?, ?)",
+ key, value, now,
+ )
+ return err
+}
+
+// persistRecents writes the current LRU snapshot to wiki_state. Called
+// by the 30s persistence ticker (Step 6) and from Close() for a clean
+// shutdown. Safe to call concurrently with reads — the LRU snapshot is
+// taken under its own lock and the SQLite write is atomic.
+//
+// If the LRU's dirty flag is unset, this is still safe to call (we'll
+// rewrite the same bytes); callers wanting to skip a redundant write
+// should gate on takeDirty() before calling.
+func (w *Wiki) persistRecents(ctx context.Context) error {
+ state := recentsState{Items: w.recents.snapshot()}
+ data, err := json.Marshal(state)
+ if err != nil {
+ return fmt.Errorf("marshal recents: %w", err)
+ }
+ return w.writeStateKey(ctx, stateKeyRecentLRU, string(data))
+}
+
+// persistCloud writes the current cloud cache to wiki_state. Called
+// after a successful rebuild (Step 6). No-ops if the cloud has never
+// been populated — there's nothing meaningful to write yet, and we
+// don't want to clobber a previously-good persisted copy with an
+// empty placeholder.
+func (w *Wiki) persistCloud(ctx context.Context) error {
+ terms, ok := w.cloud.Get()
+ if !ok {
+ return nil
+ }
+ state := cloudState{Terms: terms}
+ data, err := json.Marshal(state)
+ if err != nil {
+ return fmt.Errorf("marshal cloud: %w", err)
+ }
+ return w.writeStateKey(ctx, stateKeyCloud, string(data))
+}
+
+// PersistRecents is the exported entry point for the digest.Manager's
+// 30-second flush ticker. The internal persistRecents helper is also
+// called by Close() for a clean shutdown flush.
+//
+// PersistRecents clears the LRU's dirty flag on success: a follow-up
+// RecentsDirty() will report false until the next touch. Callers that
+// want to skip a redundant write should peek with RecentsDirty before
+// calling this; PersistRecents itself always writes.
+func (w *Wiki) PersistRecents(ctx context.Context) error {
+ if err := w.persistRecents(ctx); err != nil {
+ return err
+ }
+ // Clear dirty only after a successful write — if the write failed,
+ // the in-memory state is still ahead of disk and the next tick
+ // should retry.
+ w.recents.takeDirty()
+ return nil
+}
+
+// RecentsDirty reports whether the LRU has unsaved changes since the
+// last successful PersistRecents. Read-only — does not clear the flag.
+// The digest.Manager uses this to skip redundant writes on an idle
+// server.
+func (w *Wiki) RecentsDirty() bool {
+ return w.recents.peekDirty()
+}
diff --git a/internal/wiki/state_test.go b/internal/wiki/state_test.go
new file mode 100644
index 0000000..59e809c
--- /dev/null
+++ b/internal/wiki/state_test.go
@@ -0,0 +1,171 @@
+package wiki
+
+import (
+ "context"
+ "reflect"
+ "testing"
+)
+
+func TestState_PersistAndLoadRecents(t *testing.T) {
+ w, dir := testWiki(t)
+ ctx := context.Background()
+
+ // Touch a few pages, then close the wiki — Close() flushes the LRU.
+ if _, err := w.GetPage(ctx, "projects/mind-map"); err != nil {
+ t.Fatalf("GetPage: %v", err)
+ }
+ if _, err := w.GetPage(ctx, "people/alice"); err != nil {
+ t.Fatalf("GetPage: %v", err)
+ }
+ beforeClose := w.recents.snapshot()
+ if err := w.Close(); err != nil {
+ t.Fatalf("Close: %v", err)
+ }
+
+ // Reopen the same wiki directory; the LRU should rehydrate.
+ w2, err := Open(dir)
+ if err != nil {
+ t.Fatalf("reopen: %v", err)
+ }
+ defer w2.Close()
+
+ got := w2.recents.snapshot()
+ if !reflect.DeepEqual(got, beforeClose) {
+ t.Fatalf("LRU not restored:\n before: %v\n after: %v", beforeClose, got)
+ }
+}
+
+func TestState_PersistAndLoadCloud(t *testing.T) {
+ w, dir := testWiki(t)
+ ctx := context.Background()
+
+ // Seed and persist the cloud directly (the ticker isn't running
+ // in tests; Step 6 owns that wiring).
+ terms := []CloudTerm{
+ {Term: "wiki", Count: 5},
+ {Term: "mind-map", Count: 3},
+ }
+ w.cloud.Set(terms)
+ if err := w.persistCloud(ctx); err != nil {
+ t.Fatalf("persistCloud: %v", err)
+ }
+ if err := w.Close(); err != nil {
+ t.Fatalf("Close: %v", err)
+ }
+
+ w2, err := Open(dir)
+ if err != nil {
+ t.Fatalf("reopen: %v", err)
+ }
+ defer w2.Close()
+
+ loaded, ok := w2.cloud.Get()
+ if !ok {
+ t.Fatalf("cloud not restored (ok=false)")
+ }
+ if !reflect.DeepEqual(loaded, terms) {
+ t.Fatalf("cloud roundtrip mismatch:\n before: %v\n after: %v", terms, loaded)
+ }
+}
+
+func TestState_LoadFiltersStalePaths(t *testing.T) {
+ w, dir := testWiki(t)
+ ctx := context.Background()
+
+ // Touch a real page and a fake one. We can't get a fake into the
+ // LRU via Wiki methods (they validate), so use the LRU directly.
+ if _, err := w.GetPage(ctx, "index"); err != nil {
+ t.Fatalf("GetPage: %v", err)
+ }
+ w.recents.touch("ghost/page/that/does/not/exist")
+
+ if err := w.persistRecents(ctx); err != nil {
+ t.Fatalf("persistRecents: %v", err)
+ }
+ if err := w.Close(); err != nil {
+ t.Fatalf("Close: %v", err)
+ }
+
+ // Reopen; the ghost path should be dropped on load because it
+ // isn't in `pages`.
+ w2, err := Open(dir)
+ if err != nil {
+ t.Fatalf("reopen: %v", err)
+ }
+ defer w2.Close()
+
+ for _, p := range w2.recents.snapshot() {
+ if p == "ghost/page/that/does/not/exist" {
+ t.Fatalf("stale path leaked through filter: %v", w2.recents.snapshot())
+ }
+ }
+ // The real one survives.
+ found := false
+ for _, p := range w2.recents.snapshot() {
+ if p == "index" {
+ found = true
+ }
+ }
+ if !found {
+ t.Fatalf("real path dropped by filter: %v", w2.recents.snapshot())
+ }
+}
+
+func TestState_EmptyWikiNoErrors(t *testing.T) {
+ // A fresh wiki has no wiki_state rows. Open() must not error,
+ // and the LRU / cloud must be empty.
+ dir := t.TempDir()
+ w, err := Open(dir)
+ if err != nil {
+ t.Fatalf("Open empty wiki: %v", err)
+ }
+ defer w.Close()
+
+ if w.recents.len() != 0 {
+ t.Fatalf("expected empty LRU on fresh wiki, got %v", w.recents.snapshot())
+ }
+ if _, ok := w.cloud.Get(); ok {
+ t.Fatalf("expected unpopulated cloud on fresh wiki")
+ }
+}
+
+func TestState_CorruptRecentsRowFallsBack(t *testing.T) {
+ w, dir := testWiki(t)
+ ctx := context.Background()
+
+ // Inject a malformed JSON row directly.
+ if err := w.writeStateKey(ctx, stateKeyRecentLRU, "{not valid json"); err != nil {
+ t.Fatalf("writeStateKey: %v", err)
+ }
+ if err := w.Close(); err != nil {
+ t.Fatalf("Close: %v", err)
+ }
+
+ // Reopen must not error; LRU should be empty (load failed silently).
+ // Close flushes the (just-emptied) LRU, so the corrupt row gets
+ // overwritten by a valid one on shutdown — that's also fine.
+ w2, err := Open(dir)
+ if err != nil {
+ t.Fatalf("reopen with corrupt row: %v", err)
+ }
+ defer w2.Close()
+
+ if w2.recents.len() != 0 {
+ t.Fatalf("expected empty LRU after corrupt row; got %v", w2.recents.snapshot())
+ }
+}
+
+func TestState_PersistCloudNoOpWhenUnset(t *testing.T) {
+ w, _ := testWiki(t)
+ defer w.Close()
+ ctx := context.Background()
+
+ // cloud has never been Set on this wiki; persisting must not
+ // write a placeholder (would clobber a previously-good copy).
+ if err := w.persistCloud(ctx); err != nil {
+ t.Fatalf("persistCloud unset: %v", err)
+ }
+ if _, ok := w.readStateKey(ctx, stateKeyCloud); ok {
+ t.Fatalf("expected no wiki_state[cloud] row when cloud is unset")
+ }
+}
diff --git a/internal/wiki/wiki.go b/internal/wiki/wiki.go
index 84b4778..2c668ac 100644
--- a/internal/wiki/wiki.go
+++ b/internal/wiki/wiki.go
@@ -13,6 +13,7 @@ import (
"log/slog"
"os"
"path/filepath"
+ "sync"
"time"
_ "modernc.org/sqlite" // pure-Go SQLite driver (no CGO required)
@@ -43,23 +44,130 @@ type SearchResult struct {
Snippet string `json:"snippet"`
}
-// WikiContext provides an overview of the wiki for orientation.
+// WikiContext provides an overview of the wiki for orientation. The
+// legacy fields (PageCount, RecentPages, TopLevelDirs) reflect disk
+// state — recent_pages is sorted by file mtime, top_level_dirs is read
+// from the filesystem — and remain available for clients that already
+// depend on that shape (opencode, Claude Code in the wild, per the
+// plan's open question #4).
+//
+// The newer fields (Cloud, Recents, Areas, Markdown) are the digest
+// signals: cloud terms across all page bodies, the active-use LRU
+// (intent, not mtime), per-area page counts pulled from the index,
+// and the rendered markdown an LLM can use directly. New clients
+// should prefer `get_wiki_digest` for these, but `get_wiki_context`
+// returns them too so existing tool wiring still benefits from the
+// orientation upgrade without a client change.
type WikiContext struct {
PageCount int `json:"page_count"`
RecentPages []Page `json:"recent_pages"`
TopLevelDirs []string `json:"top_level_dirs"`
+
+ // Cloud is the top-K word/phrase cloud across all page bodies.
+ // Empty until the first ticker fires on a freshly-opened wiki.
+ Cloud []CloudTerm `json:"cloud_terms,omitempty"`
+ // Recents is the active-use LRU — paths the user/agent actually
+ // touched (Create/Update/Get/Move/GetBacklinks). Distinct from
+ // RecentPages which is mtime-based.
+ Recents []string `json:"recents,omitempty"`
+ // Areas is the per-top-level-directory page count + index title.
+ // Driven by the indexed `pages` table, not filesystem listing.
+ Areas []AreaSummary `json:"areas,omitempty"`
+ // Markdown is the rendered digest blob — the same string an LLM
+ // would consume from `get_wiki_digest`. Included here so the
+ // existing get_wiki_context call gives clients an upgrade path
+ // without a tool-name change.
+ Markdown string `json:"markdown,omitempty"`
+}
+
+// Options tunes Wiki construction. All fields are optional; the zero
+// value gives the built-in defaults (recents capacity 20, render cap
+// 4 KB, no extra stopwords). Pass with WithOptions to Open():
+//
+// w, err := wiki.Open(dir, wiki.WithOptions(wiki.Options{
+// RecentsSize: 50,
+// MaxRenderBytes: 8192,
+// }))
+//
+// Options is value-passed; mutating it after Open has no effect.
+type Options struct {
+ // RecentsSize is the active-use LRU capacity. Default 20.
+ RecentsSize int
+ // MaxRenderBytes caps the rendered digest markdown. Default 4096.
+ MaxRenderBytes int
+ // StopwordsExtra is forwarded to the cloud builder when invoked
+ // directly via BuildCloud. The digest.Manager passes its own
+ // copy through Options on its Manager; this field is here so
+ // non-Manager callers (tests, ad-hoc tools) get the same set.
+ StopwordsExtra []string
+}
+
+// OpenOption configures wiki.Open. Use WithOptions or future targeted
+// helpers; the variadic form keeps Open(dir) source-compatible.
+type OpenOption func(*Options)
+
+// WithOptions sets the entire Options struct in one call. The most
+// common embedder path: read config, build Options, pass to Open.
+func WithOptions(opts Options) OpenOption {
+ return func(o *Options) { *o = opts }
}
// Wiki is the core engine. Create one with Open().
type Wiki struct {
- root string // absolute path to wiki directory
- db *sql.DB // SQLite database with FTS5
- sessionID string // unique ID for this process, used for page locks
+ root string // absolute path to wiki directory
+ db *sql.DB // SQLite database with FTS5
+ sessionID string // unique ID for this process, used for page locks
+ // recents tracks pages the user/agent has actively touched. See
+ // recents.go for the rationale (intent vs. disk mtime). Persistence
+ // to SQLite is layered on in state.go; here it just lives in memory.
+ recents *recentsLRU
+ // cloud holds the most recent word/phrase cloud rebuild. Populated
+ // by the 5-minute ticker (Step 6); cold start renders without it.
+ cloud *cloudCache
+ // digest caches the rendered markdown blob, invalidated by cloud
+ // version + recents seq changes. See digest.go.
+ digest *digestCache
+ // maxRenderBytes is the soft cap applied by Digest(); 0 means no
+ // trim (used by tests).
+ maxRenderBytes int
+ // stopwordsExtra is forwarded to buildCloud when called directly
+ // without an explicit extras list.
+ stopwordsExtra []string
+ // closed guards Close() against double-invocation: testWiki and
+ // other callers commonly stack defer Close on top of t.Cleanup.
+ // Without this guard, the second Close() runs persistRecents
+ // against an already-closed DB and logs a spurious warning.
+ closeOnce sync.Once
+ closeErr error
}
// Open opens (or creates) a wiki rooted at the given directory.
// It initializes the SQLite index and performs an initial scan.
-func Open(root string) (*Wiki, error) {
+// Pass OpenOption values (typically a single WithOptions) to tune the
+// digest signals; the default options match the digest plan's
+// recommended values (LRU=20, render cap=4096, no extra stopwords).
+func Open(root string, opts ...OpenOption) (*Wiki, error) {
+ o := Options{
+ RecentsSize: 20,
+ MaxRenderBytes: defaultMaxRenderBytes,
+ }
+ for _, fn := range opts {
+ fn(&o)
+ }
+ if o.RecentsSize <= 0 {
+ o.RecentsSize = 20
+ }
+ // MaxRenderBytes semantics:
+ // > 0 → trim to that many bytes
+ // == 0 → fall back to default (4096) — most likely an
+ // uninitialized Options struct
+ // < 0 → no trimming (tests / power users)
+ // The field is normalized to those three states here so digest
+ // rendering can just check the sign without re-deriving intent.
+ if o.MaxRenderBytes == 0 {
+ o.MaxRenderBytes = defaultMaxRenderBytes
+ }
+
absRoot, err := filepath.Abs(root)
if err != nil {
return nil, fmt.Errorf("resolve wiki root: %w", err)
@@ -79,7 +187,16 @@ func Open(root string) (*Wiki, error) {
}
sessionID := fmt.Sprintf("pid-%d-%d", os.Getpid(), time.Now().UnixNano())
- w := &Wiki{root: absRoot, db: db, sessionID: sessionID}
+ w := &Wiki{
+ root: absRoot,
+ db: db,
+ sessionID: sessionID,
+ recents: newRecentsLRU(o.RecentsSize),
+ cloud: &cloudCache{},
+ digest: &digestCache{},
+ maxRenderBytes: o.MaxRenderBytes,
+ stopwordsExtra: o.StopwordsExtra,
+ }
if err := w.initSchema(); err != nil {
db.Close()
return nil, fmt.Errorf("init schema: %w", err)
@@ -97,15 +214,35 @@ func Open(root string) (*Wiki, error) {
return nil, fmt.Errorf("initial index: %w", err)
}
+ // Load persisted derived state (recents LRU, word cloud) after
+ // reindex so any stale entries pointing at pages that vanished
+ // while the server was off get filtered against the fresh index.
+ // Failures are logged but non-fatal — a corrupt state row just
+ // degrades to "fresh-wiki" behavior, not a crash.
+ w.loadState(context.Background())
+
slog.Info("wiki opened", slog.String("root", absRoot))
return w, nil
}
// Close releases page locks held by this session and closes the database.
+// Idempotent — safe to call multiple times (e.g. when a test stacks
+// defer Close on top of testWiki's t.Cleanup).
func (w *Wiki) Close() error {
- slog.Info("wiki closing", slog.String("root", w.root))
- w.db.Exec("DELETE FROM page_locks WHERE holder = ?", w.sessionID)
- return w.db.Close()
+ w.closeOnce.Do(func() {
+ slog.Info("wiki closing", slog.String("root", w.root))
+ // Flush the LRU one last time so a clean shutdown doesn't
+ // lose the last ~30 seconds of touches between ticker fires.
+ // Errors are logged, not propagated — we'd rather close
+ // cleanly with a slightly stale snapshot than leak the DB
+ // handle.
+ if err := w.persistRecents(context.Background()); err != nil {
+ slog.Warn("recents flush on close failed", slog.Any("error", err))
+ }
+ w.db.Exec("DELETE FROM page_locks WHERE holder = ?", w.sessionID)
+ w.closeErr = w.db.Close()
+ })
+ return w.closeErr
}
// Root returns the wiki's root directory.
@@ -165,6 +302,10 @@ func (w *Wiki) initSchema() error {
return err
}
+ if err := w.initStateSchema(); err != nil {
+ return fmt.Errorf("wiki_state schema: %w", err)
+ }
+
// Clean up stale locks (older than 5 minutes) from crashed processes
_, err := w.db.Exec("DELETE FROM page_locks WHERE acquired < ?",
time.Now().Add(-5*time.Minute).UTC().Format(time.RFC3339))
diff --git a/webui/src/App.tsx b/webui/src/App.tsx
index 4d6c126..e293d34 100644
--- a/webui/src/App.tsx
+++ b/webui/src/App.tsx
@@ -4,6 +4,7 @@ import { Logo } from './Logo';
import { PageBrowser } from './PageBrowser';
import { GraphView } from './GraphView';
import { searchTokens, searchRegex, Highlighted } from './search';
+import { TagInput } from './TagInput';
import { marked } from 'marked';
import mermaid from 'mermaid';
@@ -16,8 +17,22 @@ interface SyncSettings {
mappings?: { prefix: string; remote: string }[];
}
+// DigestSettings mirrors internal/config.DigestConfig. All fields are
+// optional on the wire — a config file without a digest section
+// loads with zero values, which the consumers interpret as "use the
+// built-in defaults". The UI surfaces the same contract: empty
+// numeric inputs and an empty tag list keep server defaults intact.
+interface DigestSettings {
+ cloud_size?: number;
+ recents_size?: number;
+ cloud_refresh?: string;
+ stopwords_extra?: string[];
+ max_render_bytes?: number;
+}
+
interface Settings {
sync: SyncSettings;
+ digest?: DigestSettings;
}
async function loadSettings(): Promise {
@@ -339,6 +354,21 @@ export function App() {
setSettingsSaved(false);
};
+ // updateDigest is the per-field mutator for the Digest section.
+ // It accepts the field's actual value type (number for numeric
+ // knobs, string for cloud_refresh, string[] for stopwords). The
+ // server omits the digest section entirely when it's never been
+ // set, so we lazily materialize an empty object on first touch.
+ const updateDigest = (field: K, value: DigestSettings[K]) => {
+ if (!settings) return;
+ setSettings({
+ ...settings,
+ digest: { ...(settings.digest ?? {}), [field]: value },
+ });
+ setSettingsDirty(true);
+ setSettingsSaved(false);
+ };
+
const renderMarkdown = (body: string): string => {
// Convert [[wikilinks]] to clickable links before rendering
const withLinks = body.replace(/\[\[([^\]|]+)(?:\|([^\]]+))?\]\]/g, (_, target, display) => {
@@ -562,6 +592,81 @@ export function App() {
)}
+
+
Digest
+
+ The per-conversation orientation digest summarizes what this wiki is about. A background job rebuilds the word/phrase cloud on a schedule; the recents LRU updates on every page op. All fields are optional — leave blank to use defaults.
+
+
+
+
+
+ Domain-specific noise to exclude from the cloud (e.g. TODO, FIXME, see, also). Comma, space, or Enter to add a tag; Backspace on empty input removes the last one.
+
+ updateDigest('stopwords_extra', next)}
+ placeholder="Type a word and press space, comma, or Enter"
+ />
+
+
+
+
+
Top-K terms in the word/phrase cloud. Default 50.
+ {
+ const v = (e.target as HTMLInputElement).value;
+ updateDigest('cloud_size', v === '' ? undefined : parseInt(v, 10));
+ }}
+ placeholder="50"
+ />
+
+
+
+
+
Active-use LRU capacity. Default 20. Applied on next restart.
+ {
+ const v = (e.target as HTMLInputElement).value;
+ updateDigest('recents_size', v === '' ? undefined : parseInt(v, 10));
+ }}
+ placeholder="20"
+ />
+
+
+
+
+
How often the cloud rebuilds (e.g. 5m, 10m). Floor: 30s. Default 5m.
Soft cap on the rendered markdown blob. Default 4096 (~1K tokens). Set to 0 to disable trimming.
+ {
+ const v = (e.target as HTMLInputElement).value;
+ updateDigest('max_render_bytes', v === '' ? undefined : parseInt(v, 10));
+ }}
+ placeholder="4096"
+ />
+
+
+
Index
diff --git a/webui/src/TagInput.tsx b/webui/src/TagInput.tsx
new file mode 100644
index 0000000..96048e0
--- /dev/null
+++ b/webui/src/TagInput.tsx
@@ -0,0 +1,130 @@
+import { useState, useRef } from 'preact/hooks';
+
+interface TagInputProps {
+ value: string[];
+ onChange: (next: string[]) => void;
+ placeholder?: string;
+ // Maximum number of tags. When reached, further input is blocked
+ // until the user removes a tag. Omitted = no limit.
+ maxTags?: number;
+}
+
+// TagInput is a controlled "chips + textbox" control: type a word,
+// hit comma, space, or Enter, and it becomes a tag. Backspace on an
+// empty input deletes the previous tag (standard chip-input UX —
+// matches Gmail's To: line, GitHub's labels, etc.). Pasting a
+// comma- or whitespace-separated string creates multiple tags in
+// one shot.
+//
+// Values are de-duplicated case-insensitively but preserved in the
+// case the user typed — we don't want to fold "JWT" into "jwt" on
+// the way back to the server. The consumer of the values (the cloud
+// builder) is the one that case-folds for matching; storing the
+// user's intent verbatim respects what they typed.
+export function TagInput({ value, onChange, placeholder, maxTags }: TagInputProps) {
+ const [draft, setDraft] = useState('');
+ const inputRef = useRef(null);
+
+ const commit = (raw: string) => {
+ // Split on commas and whitespace so pasting a list works
+ // even if the user pasted "TODO, FIXME see" (mixed
+ // separators). Empty fragments are filtered out by trim.
+ const fragments = raw
+ .split(/[\s,]+/)
+ .map(s => s.trim())
+ .filter(Boolean);
+ if (fragments.length === 0) return;
+
+ const lowerExisting = new Set(value.map(v => v.toLowerCase()));
+ const additions: string[] = [];
+ for (const f of fragments) {
+ if (lowerExisting.has(f.toLowerCase())) continue;
+ if (maxTags && value.length + additions.length >= maxTags) break;
+ lowerExisting.add(f.toLowerCase());
+ additions.push(f);
+ }
+ if (additions.length > 0) onChange([...value, ...additions]);
+ setDraft('');
+ };
+
+ const removeAt = (idx: number) => {
+ const next = value.slice();
+ next.splice(idx, 1);
+ onChange(next);
+ // Keep focus on the input so the user can keep editing.
+ inputRef.current?.focus();
+ };
+
+ const onKeyDown = (e: KeyboardEvent) => {
+ // Commit triggers: Enter, comma, space. Comma and space need
+ // to be intercepted so they don't actually land in the input.
+ if (e.key === 'Enter' || e.key === ',' || e.key === ' ') {
+ // Don't commit on a leading space inside an in-progress
+ // word — user might be pasting and the paste handler
+ // will fire separately. Specifically: only commit when
+ // there's something to commit.
+ if (draft.trim() !== '') {
+ e.preventDefault();
+ commit(draft);
+ } else if (e.key === ',' || e.key === ' ') {
+ // Swallow stray separators on an empty input so the
+ // box doesn't fill with whitespace.
+ e.preventDefault();
+ }
+ return;
+ }
+ if (e.key === 'Backspace' && draft === '' && value.length > 0) {
+ e.preventDefault();
+ removeAt(value.length - 1);
+ }
+ };
+
+ const onPaste = (e: ClipboardEvent) => {
+ const pasted = e.clipboardData?.getData('text') ?? '';
+ if (/[\s,]/.test(pasted)) {
+ // The paste contains separators — handle the whole
+ // string as tags rather than letting it land in the
+ // input field where the user would have to manually
+ // split it.
+ e.preventDefault();
+ commit(pasted);
+ }
+ };
+
+ return (
+
inputRef.current?.focus()}>
+ {value.map((tag, idx) => (
+
+ {tag}
+
+
+ ))}
+ setDraft((e.target as HTMLInputElement).value)}
+ onKeyDown={onKeyDown}
+ onPaste={onPaste}
+ onBlur={() => {
+ // Commit any in-progress draft on blur so the user
+ // doesn't have to remember the keyboard ritual when
+ // they tab away or click Save.
+ if (draft.trim() !== '') commit(draft);
+ }}
+ />
+
+ );
+}
diff --git a/webui/src/styles.css b/webui/src/styles.css
index 97e54ce..a9abb7e 100644
--- a/webui/src/styles.css
+++ b/webui/src/styles.css
@@ -817,3 +817,74 @@ mark {
@media (prefers-color-scheme: dark) {
.settings-reindex-error { color: #ff8080; }
}
+
+/* Tag input (Digest > Extra Stopwords).
+ * Looks and behaves like a single .settings-field input: same border,
+ * same focus accent, same width cap. The internal chips wrap and the
+ * input field stretches to fill the remaining row. */
+.tag-input {
+ width: 100%;
+ max-width: 480px;
+ min-height: 36px;
+ padding: 4px 6px;
+ border: 1px solid var(--border);
+ background: var(--bg);
+ display: flex;
+ flex-wrap: wrap;
+ align-items: center;
+ gap: 4px;
+ cursor: text;
+}
+
+.tag-input:focus-within {
+ border-color: var(--accent);
+}
+
+.tag {
+ display: inline-flex;
+ align-items: center;
+ gap: 4px;
+ padding: 2px 4px 2px 8px;
+ background: var(--accent);
+ color: white;
+ font-family: var(--font-mono);
+ font-size: 12px;
+ line-height: 1.4;
+ border-radius: 2px;
+ user-select: none;
+}
+
+.tag-label {
+ /* Allow long tags to wrap or truncate gracefully if someone
+ * pastes a paragraph by mistake. Word-break here keeps the tag
+ * pill compact in the row. */
+ overflow-wrap: anywhere;
+ max-width: 200px;
+}
+
+.tag-remove {
+ background: transparent;
+ border: none;
+ color: white;
+ cursor: pointer;
+ padding: 0 4px;
+ font-size: 14px;
+ line-height: 1;
+ opacity: 0.8;
+}
+
+.tag-remove:hover {
+ opacity: 1;
+}
+
+.tag-input-field {
+ flex: 1;
+ min-width: 120px;
+ border: none;
+ outline: none;
+ background: transparent;
+ color: var(--fg);
+ font-family: var(--font-mono);
+ font-size: 13px;
+ padding: 4px 2px;
+}