diff --git a/.agents/skills/cli-cdp/SKILL.md b/.agents/skills/cli-cdp/SKILL.md new file mode 100644 index 00000000..8560acc9 --- /dev/null +++ b/.agents/skills/cli-cdp/SKILL.md @@ -0,0 +1,187 @@ +--- +name: cli-cdp +description: 'Chrome DevTools CLI for browser automation via shell commands. Use when interacting with web pages from the command line — navigating, clicking, filling forms, inspecting console/network, taking screenshots, or extracting page content. Triggers on: browse a page, automate Chrome, inspect console, check network requests, take a screenshot, fill a form, click a button.' +--- + +# Chrome DevTools CLI (cdp-cli) + +CLI for Chrome DevTools Protocol. All list commands output **NDJSON** (one JSON +object per line) — grep/tail/jq friendly. Requires Chrome running with remote +debugging. + +## Prerequisites + +Chrome must be running with `--remote-debugging-port`: + +```bash +cdp-cli launch # macOS: launches Chrome with debugging on :9223 +``` + +Or start Chrome manually with `--remote-debugging-port=9222` and pass +`--cdp-url http://localhost:9222`. + +## Page Identification + +All commands taking `` accept a **page ID** or a **title substring**. +Use `cdp-cli tabs` to discover pages. + +```bash +cdp-cli tabs # List all open pages (NDJSON) +cdp-cli tabs | grep "example" # Find by title +``` + +## Token-Conscious Defaults (Critical) + +cdp-cli is designed to minimize token usage by default: + +- **console** returns bare strings (not JSON objects) and only the last 10 + messages. Add `--with-type`, `--with-timestamp`, `--with-source` only when + needed. Use `-v` for all fields. +- **snapshot** defaults to accessibility tree (`--format ax`) which is compact. + Use `--format text` for plain text, `--format dom` for full DOM (heavy). +- **network** collects for only 0.1s by default. Increase with `--duration`. + +**Strategy:** Start with defaults, add verbosity flags only when needed. + +## Commands + +### Page Management + +```bash +cdp-cli tabs # List all pages (NDJSON) +cdp-cli new "https://example.com" # Open new tab +cdp-cli new # Open empty tab +cdp-cli go "" "https://url.com" # Navigate to URL +cdp-cli go "" back # History back +cdp-cli go "" forward # History forward +cdp-cli go "" reload # Reload +cdp-cli close "" # Close tab +``` + +### Snapshot & Evaluation + +```bash +cdp-cli snapshot "" # Accessibility tree (default, compact) +cdp-cli snapshot "" -f text # Plain text content +cdp-cli snapshot "" -f dom # Full DOM tree (JSON, heavy) + +cdp-cli eval "" "document.title" # Evaluate JS expression +cdp-cli eval "" "Array.from(document.querySelectorAll('h1')).map(h => h.textContent)" +``` + +Use the accessibility tree to discover elements by role/name, then construct +CSS selectors for click/fill. + +### Console + +```bash +cdp-cli console "" # Last 10 messages, bare strings +cdp-cli console "" -n 50 # Last 50 messages +cdp-cli console "" --all # All messages (or -n -1) +cdp-cli console "" -t error # Filter by type (log/error/warn/info) +cdp-cli console "" -d 2 # Collect for 2 seconds +cdp-cli console "" -i # --inspect: expand nested objects/arrays +cdp-cli console "" -v # --verbose: all fields (type, timestamp, source) +cdp-cli console "" --with-type # Add type field only +cdp-cli console "" --with-source # Add source location (url, line) +``` + +When truncated, stderr shows: `(N messages skipped. Use --tail M or --all to see more)` + +### Network + +```bash +cdp-cli network "" # Requests in last 0.1s (NDJSON) +cdp-cli network "" -d 5 # Collect for 5 seconds +cdp-cli network "" -t fetch # Filter by type (xhr, fetch, script, etc) +cdp-cli network "" -d 5 -t fetch # Combine filters +``` + +Filter results with grep: + +```bash +cdp-cli network "" | grep '"status":4' # 4xx errors +cdp-cli network "" | grep '"status":404' # 404s specifically +``` + +### Input Automation + +```bash +cdp-cli click "" "button" # Click by CSS selector +cdp-cli click "" "button" -d # Double click +cdp-cli click "" "button" -g # --user-gesture: required for + # WebXR, fullscreen, and other + # activation-gated APIs +cdp-cli click "" --node 42 # Click by backend DOM node ID +cdp-cli click "" "button" -w 5000 # Wait up to 5s for selector + +cdp-cli fill "" "user@example.com" "input[name='email']" +cdp-cli fill "" "secret" "input[type='password']" + +cdp-cli key "" enter +cdp-cli key "" tab +cdp-cli key "" escape +``` + +### Screenshots + +```bash +cdp-cli screenshot "" screenshot.jpg # Save JPEG (default) +cdp-cli screenshot "" out.png -f png # PNG format +cdp-cli screenshot "" out.webp -f webp # WebP format +cdp-cli screenshot "" out.jpg -q 50 # Lower quality (0-100) +``` + +## Error Handling + +All errors output NDJSON with `"error": true`: + +```json +{ "error": true, "message": "Page not found: example", "code": "PAGE_NOT_FOUND" } +``` + +## Common Workflows + +### Inspect and Interact + +```bash +cdp-cli tabs | grep "example" # Find page +cdp-cli snapshot "example" # Get accessibility tree +# Identify selectors from the tree, then: +cdp-cli fill "example" "query" "input" +cdp-cli click "example" "button[type='submit']" +cdp-cli screenshot "example" result.jpg +``` + +### Debug Console Errors + +```bash +cdp-cli console "localhost" -t error -d 10 # Errors over 10 seconds +cdp-cli console "localhost" -t error -v # With full source location +``` + +### Monitor Network + +```bash +cdp-cli network "localhost" -d 5 | grep '"status":4' # Failed requests +cdp-cli network "localhost" -d 5 -t fetch # API calls only +``` + +### Form Automation + +```bash +cdp-cli new "http://localhost:3000/login" +cdp-cli fill "localhost" "user@example.com" "#email" +cdp-cli fill "localhost" "password123" "#password" +cdp-cli click "localhost" "button[type=submit]" +sleep 2 +cdp-cli eval "localhost" "document.querySelector('.success')?.textContent" +``` + +## Global Options + +| Option | Default | Description | +| ----------- | ----------------------- | ---------------------------- | +| `--cdp-url` | `http://localhost:9223` | Chrome DevTools Protocol URL | +| `--help` | — | Show help | +| `--version` | — | Show version | diff --git a/.agents/skills/cli-cmux/SKILL.md b/.agents/skills/cli-cmux/SKILL.md new file mode 100644 index 00000000..3d1e5005 --- /dev/null +++ b/.agents/skills/cli-cmux/SKILL.md @@ -0,0 +1,212 @@ +--- +name: cli-cmux +description: 'Deep expertise in cmux — the terminal multiplexer with native browser views. Use when managing panes, reading terminal output, sending keystrokes, opening browser views, or manually testing web UIs and TUIs inside cmux. Triggers on: cmux, open a browser pane, split terminal, read screen, send keys, test this UI in cmux, preview in cmux.' +--- + +# cmux — Terminal Multiplexer with Native Browser + +cmux manages terminal panes and browser views through a Unix socket CLI. +You are already running inside cmux — your current pane has env vars +`CMUX_WORKSPACE_ID` and `CMUX_SURFACE_ID` set automatically. + +## Mental Model + +``` +window → workspace(s) → pane(s) → surface(s) + ↑ + terminal OR browser +``` + +- **Window**: OS window +- **Workspace**: A tab within a window (like tmux sessions) +- **Pane**: A visual split region +- **Surface**: The content inside a pane (terminal or browser); panes can have multiple surfaces as tabs + +## Addressing + +cmux uses **short refs** by default: `workspace:1`, `pane:2`, `surface:3`. +Most commands auto-target the caller's workspace/surface via env vars. + +```bash +# Discover current layout +cmux identify # What workspace/pane/surface am I in? +cmux list-panes # Panes in current workspace +cmux list-workspaces # All workspaces +``` + +## Core Operations + +### Layout — Splits and Panes + +```bash +# Create a new terminal pane (split from current) +cmux new-pane --direction right +cmux new-pane --direction down + +# Create a browser pane +cmux new-pane --type browser --direction right --url http://localhost:3000 + +# Focus a pane +cmux focus-pane --pane pane:3 + +# Resize +cmux resize-pane --pane pane:2 -R --amount 20 # grow rightward +cmux resize-pane --pane pane:2 -D --amount 10 # grow downward +``` + +### Terminal I/O — Read and Send + +```bash +# Read what's on screen in another pane +cmux read-screen --surface surface:2 +cmux read-screen --surface surface:2 --scrollback --lines 100 + +# Send text (like typing) +cmux send --surface surface:2 "npm run dev" +cmux send --surface surface:2 $'\n' # press Enter + +# Send special keys +cmux send-key --surface surface:2 Enter +cmux send-key --surface surface:2 Up +cmux send-key --surface surface:2 Ctrl+C +``` + +### Browser — Navigate and Interact + +```bash +# Open URL in an existing browser surface +cmux browser --surface surface:3 navigate http://localhost:3000 + +# Snapshot the DOM (interactive elements) +cmux browser --surface surface:3 snapshot --interactive + +# Click, fill, type using CSS selectors +cmux browser --surface surface:3 click "button.submit" +cmux browser --surface surface:3 fill "input[name=email]" "test@example.com" +cmux browser --surface surface:3 type "input[name=search]" "query" + +# Press keys, scroll +cmux browser --surface surface:3 press Enter +cmux browser --surface surface:3 scroll --dy 500 + +# Read state +cmux browser --surface surface:3 get url +cmux browser --surface surface:3 get title +cmux browser --surface surface:3 get text "h1" + +# Wait for conditions +cmux browser --surface surface:3 wait --selector ".loaded" +cmux browser --surface surface:3 wait --load-state complete +cmux browser --surface surface:3 wait --text "Dashboard" +``` + +--- + +## Workflow: Testing a Web UI + +The pattern: start the dev server in a terminal pane, open a browser pane +pointing at it, then interact via snapshot → act → re-snapshot. + +```bash +# 1. Start dev server in a new pane +cmux new-pane --direction down +# note the surface ref from output, e.g. surface:2 +cmux send --surface surface:2 "npm run dev" +cmux send-key --surface surface:2 Enter + +# 2. Wait for server to be ready +# (read screen until you see the ready message) +cmux read-screen --surface surface:2 + +# 3. Open browser pane pointing at dev server +cmux new-pane --type browser --direction right --url http://localhost:3000 +# note the browser surface ref, e.g. surface:3 + +# 4. Snapshot → interact → re-snapshot loop +cmux browser --surface surface:3 snapshot --interactive +cmux browser --surface surface:3 click "nav a[href='/settings']" +cmux browser --surface surface:3 wait --load-state complete +cmux browser --surface surface:3 snapshot --interactive + +# 5. Fill a form +cmux browser --surface surface:3 fill "input[name=username]" "testuser" +cmux browser --surface surface:3 fill "input[name=password]" "secret" +cmux browser --surface surface:3 click "button[type=submit]" +cmux browser --surface surface:3 wait --text "Welcome" +cmux browser --surface surface:3 snapshot --interactive +``` + +### Snapshot Tips + +- `--interactive` returns only interactive elements — much smaller output +- `--compact` reduces whitespace for smaller snapshots +- `--selector "main"` scopes to a region of the page +- `--max-depth 5` limits DOM tree depth +- Always re-snapshot after navigation or DOM mutations + +## Workflow: Testing a TUI + +The pattern: launch the TUI in a new pane, read-screen to observe, send/send-key to interact. + +```bash +# 1. Launch the TUI +cmux new-pane --direction right +# note surface ref, e.g. surface:2 +cmux send --surface surface:2 "my-cli-wizard" +cmux send-key --surface surface:2 Enter + +# 2. Read → interact → read loop +cmux read-screen --surface surface:2 +cmux send-key --surface surface:2 ArrowDown +cmux send-key --surface surface:2 ArrowDown +cmux send-key --surface surface:2 Enter +cmux read-screen --surface surface:2 + +# 3. Type text input +cmux send --surface surface:2 "my-project-name" +cmux send-key --surface surface:2 Enter +cmux read-screen --surface surface:2 + +# 4. Test cancellation +cmux send-key --surface surface:2 Ctrl+C +cmux read-screen --surface surface:2 + +# 5. Clean up +cmux send-key --surface surface:2 Ctrl+C +``` + +### TUI Tips + +- Use `read-screen` (not `read-screen --scrollback`) for current viewport +- Add `--lines 50` with `--scrollback` to limit output size +- Send `Ctrl+C` to kill the TUI when done +- Read the screen after each interaction to confirm state changes + +## Context Management + +cmux interactions are context-expensive. Minimize round-trips: + +1. **Batch related commands** — don't read-screen between every single keystroke +2. **Use `--interactive` and `--compact`** for browser snapshots +3. **Scope snapshots** with `--selector` when you only care about part of the page +4. **Delegate long test sessions to a Task subagent** — the subagent runs the + full interaction loop and returns only findings + +## Sidebar Metadata + +cmux provides sidebar status, progress, and logging for visibility: + +```bash +cmux set-status "phase" "testing" --icon "🧪" +cmux set-progress 0.5 --label "Running tests..." +cmux log "Found 3 issues" --level warn +cmux clear-progress +``` + +## Reference + +For full command details, see: + +- [reference/browser-commands.md](reference/browser-commands.md) — All browser subcommands +- [reference/terminal-io.md](reference/terminal-io.md) — Terminal read/send/key commands +- [reference/layout-commands.md](reference/layout-commands.md) — Pane, workspace, window management diff --git a/.agents/skills/cli-cmux/reference/browser-commands.md b/.agents/skills/cli-cmux/reference/browser-commands.md new file mode 100644 index 00000000..797dfd2f --- /dev/null +++ b/.agents/skills/cli-cmux/reference/browser-commands.md @@ -0,0 +1,211 @@ +# cmux Browser Commands Reference + +All browser commands target a browser surface: +```bash +cmux browser --surface [args...] +``` + +If `--surface` is omitted, cmux uses `CMUX_SURFACE_ID` (only works if the +caller IS a browser surface, which is rare for agents). + +## Opening a Browser + +```bash +# Open as new pane (most common) +cmux new-pane --type browser --direction right --url http://localhost:3000 + +# Open from the caller's workspace (creates a split) +cmux browser open http://localhost:3000 +cmux browser open-split http://localhost:3000 + +# Add a browser surface as a tab in an existing pane +cmux new-surface --type browser --pane pane:2 --url http://localhost:3000 +``` + +## Navigation + +```bash +cmux browser --surface navigate +cmux browser --surface back +cmux browser --surface forward +cmux browser --surface reload +cmux browser --surface url # alias for get-url +cmux browser --surface get-url +``` + +Add `--snapshot-after` to back/forward/reload to auto-snapshot after navigation. + +## DOM Snapshot + +```bash +cmux browser --surface snapshot +cmux browser --surface snapshot --interactive # only interactive elements +cmux browser --surface snapshot --compact # reduced whitespace +cmux browser --surface snapshot --cursor # include cursor-interactive +cmux browser --surface snapshot --max-depth 5 # limit tree depth +cmux browser --surface snapshot --selector "main" # scope to CSS selector +``` + +Combine flags: `snapshot --interactive --compact --selector ".content"` + +## Interaction + +### Click and Hover + +```bash +cmux browser --surface click +cmux browser --surface dblclick +cmux browser --surface hover +cmux browser --surface focus +cmux browser --surface scroll-into-view +``` + +### Form Input + +```bash +cmux browser --surface fill "text" # clear + type +cmux browser --surface fill # empty = clear input +cmux browser --surface type "text" # type without clearing +cmux browser --surface select "value" # select dropdown +cmux browser --surface check +cmux browser --surface uncheck +``` + +### Keyboard + +```bash +cmux browser --surface press Enter +cmux browser --surface press Tab +cmux browser --surface press Escape +cmux browser --surface keydown Shift +cmux browser --surface keyup Shift +``` + +### Scrolling + +```bash +cmux browser --surface scroll --dy 500 # scroll down +cmux browser --surface scroll --dy -500 # scroll up +cmux browser --surface scroll --dx 200 # scroll right +cmux browser --surface scroll --selector ".list" --dy 300 +``` + +All interaction commands accept `--snapshot-after` to auto-snapshot. + +## Reading State + +```bash +cmux browser --surface get url +cmux browser --surface get title +cmux browser --surface get text +cmux browser --surface get html +cmux browser --surface get value # input value +cmux browser --surface get attr +cmux browser --surface get count # element count +cmux browser --surface get box # bounding box +cmux browser --surface get styles # computed styles +``` + +### Visibility/State Checks + +```bash +cmux browser --surface is visible +cmux browser --surface is enabled +cmux browser --surface is checked +``` + +## Waiting + +```bash +cmux browser --surface wait --selector # element exists +cmux browser --surface wait --text "Dashboard" # text appears +cmux browser --surface wait --url-contains "/settings" # URL matches +cmux browser --surface wait --load-state complete # page loaded +cmux browser --surface wait --load-state interactive # DOM ready +cmux browser --surface wait --function "() => window.ready" # custom JS +cmux browser --surface wait --timeout-ms 10000 # custom timeout +``` + +## Semantic Locators + +Find elements by role, text, label, etc. instead of CSS selectors: + +```bash +cmux browser --surface find role button +cmux browser --surface find text "Sign In" +cmux browser --surface find label "Email" +cmux browser --surface find placeholder "Search..." +cmux browser --surface find testid "submit-btn" +cmux browser --surface find alt "Logo" +cmux browser --surface find title "Close" +cmux browser --surface find first +cmux browser --surface find last +cmux browser --surface find nth 3 +``` + +## JavaScript Evaluation + +```bash +cmux browser --surface eval "document.title" +cmux browser --surface eval "window.scrollTo(0, document.body.scrollHeight)" +``` + +## Frames + +```bash +cmux browser --surface frame "iframe.embed" # enter iframe +cmux browser --surface frame main # back to main frame +``` + +## Dialogs + +```bash +cmux browser --surface dialog accept +cmux browser --surface dialog accept "confirmation text" +cmux browser --surface dialog dismiss +``` + +## Tabs (within a browser surface) + +```bash +cmux browser --surface tab list +cmux browser --surface tab new http://localhost:3000/other +cmux browser --surface tab switch 2 +cmux browser --surface tab close +``` + +## State Persistence + +```bash +cmux browser --surface state save auth.json # save cookies/storage +cmux browser --surface state load auth.json # restore +``` + +## Cookies and Storage + +```bash +cmux browser --surface cookies get +cmux browser --surface cookies set '{"name":"token","value":"abc","domain":"localhost"}' +cmux browser --surface cookies clear + +cmux browser --surface storage local get +cmux browser --surface storage local set key value +cmux browser --surface storage session clear +``` + +## Console and Errors + +```bash +cmux browser --surface console list # browser console messages +cmux browser --surface console clear +cmux browser --surface errors list # JavaScript errors +cmux browser --surface errors clear +``` + +## Visual Debugging + +```bash +cmux browser --surface highlight # highlight element +cmux browser --surface addstyle "body { outline: 1px solid red; }" +cmux browser --surface addinitscript "console.log('loaded')" +``` diff --git a/.agents/skills/cli-cmux/reference/layout-commands.md b/.agents/skills/cli-cmux/reference/layout-commands.md new file mode 100644 index 00000000..fce9d415 --- /dev/null +++ b/.agents/skills/cli-cmux/reference/layout-commands.md @@ -0,0 +1,190 @@ +# cmux Layout Commands Reference + +Commands for managing windows, workspaces, panes, and surfaces. + +## Discovery + +```bash +cmux identify # Current workspace/pane/surface (with caller info) +cmux list-windows # All windows +cmux current-window # Active window +cmux list-workspaces # All workspaces +cmux current-workspace # Active workspace +cmux list-panes # Panes in current workspace +cmux list-panes --workspace workspace:2 +cmux list-pane-surfaces --pane pane:1 # Surfaces (tabs) in a pane +cmux list-panels # Sidebar panels +``` + +## Windows + +```bash +cmux new-window +cmux focus-window --window window:2 +cmux close-window --window window:2 +cmux rename-window "my window" +cmux next-window +cmux previous-window +cmux last-window +``` + +## Workspaces + +Workspaces are tabs within a window. + +```bash +cmux new-workspace +cmux new-workspace --command "htop" # start with a command +cmux select-workspace --workspace workspace:2 +cmux close-workspace --workspace workspace:2 +cmux rename-workspace "dev server" +cmux move-workspace-to-window --workspace workspace:2 --window window:1 +cmux reorder-workspace --workspace workspace:2 --index 0 +cmux workspace-action --action # custom actions +``` + +## Panes + +Panes are visual split regions within a workspace. + +```bash +# Create pane (splits from current or specified surface) +cmux new-pane --direction right +cmux new-pane --direction down +cmux new-pane --direction left +cmux new-pane --direction up +cmux new-pane --type browser --direction right --url http://localhost:3000 + +# Alternative: split from a specific surface/pane +cmux new-split right --surface surface:1 +cmux new-split down --pane pane:2 + +# Focus +cmux focus-pane --pane pane:2 +cmux last-pane + +# Resize (direction: -L left, -R right, -U up, -D down) +cmux resize-pane --pane pane:2 -R --amount 20 +cmux resize-pane --pane pane:2 -D --amount 10 + +# Swap pane positions +cmux swap-pane --pane pane:1 --target-pane pane:2 + +# Break pane out to its own workspace +cmux break-pane --pane pane:2 + +# Join a pane into another pane's workspace +cmux join-pane --target-pane pane:1 --pane pane:3 + +# Focus a panel (sidebar) +cmux focus-panel --panel panel:1 +``` + +## Surfaces + +Surfaces are the content inside panes — either terminal or browser. +A pane can have multiple surfaces as tabs. + +```bash +# Create new surface (tab) in an existing pane +cmux new-surface --type terminal --pane pane:1 +cmux new-surface --type browser --pane pane:1 --url http://localhost:3000 + +# Close a surface +cmux close-surface --surface surface:3 + +# Move surface to a different pane +cmux move-surface --surface surface:3 --pane pane:1 +cmux move-surface --surface surface:3 --pane pane:1 --index 0 + +# Reorder surface tabs +cmux reorder-surface --surface surface:3 --index 0 +cmux reorder-surface --surface surface:3 --before surface:1 + +# Drag surface to create a new split +cmux drag-surface-to-split --surface surface:3 right + +# Rename the tab +cmux rename-tab --surface surface:3 "Server" + +# Tab actions +cmux tab-action --action --surface surface:3 + +# Health check +cmux surface-health +cmux trigger-flash --surface surface:3 # visual flash for identification + +# Refresh all surfaces +cmux refresh-surfaces +``` + +## Search + +```bash +cmux find-window "server" # search window/workspace names +cmux find-window --content "error" # search screen content +cmux find-window --select "server" # find and focus +``` + +## Sidebar Metadata + +```bash +# Status key-value pairs +cmux set-status "phase" "testing" --icon "🧪" --color "#00ff00" +cmux clear-status "phase" +cmux list-status + +# Progress bar +cmux set-progress 0.5 --label "Running tests..." +cmux clear-progress + +# Log messages +cmux log "Build complete" --level info --source "build" +cmux log "Test failed" --level error +cmux list-log --limit 20 +cmux clear-log + +# Full sidebar state +cmux sidebar-state +``` + +## Notifications + +```bash +cmux notify --title "Build Complete" --body "All tests passed" +cmux list-notifications +cmux clear-notifications +``` + +## Hooks + +```bash +cmux set-hook pane-focus-in "cmux log 'focused pane'" +cmux set-hook --list +cmux set-hook --unset pane-focus-in +``` + +## Display + +```bash +cmux display-message "Hello" +cmux display-message --print "Current workspace: #{workspace_name}" +``` + +## JSON Output + +Add `--json` to any command for machine-readable output: + +```bash +cmux list-panes --json +cmux identify --json +``` + +## UUID Mode + +By default cmux shows short refs. To see UUIDs: + +```bash +cmux --id-format uuids list-panes +cmux --id-format both list-panes # show both refs and UUIDs +``` diff --git a/.agents/skills/cli-cmux/reference/terminal-io.md b/.agents/skills/cli-cmux/reference/terminal-io.md new file mode 100644 index 00000000..8df84297 --- /dev/null +++ b/.agents/skills/cli-cmux/reference/terminal-io.md @@ -0,0 +1,145 @@ +# cmux Terminal I/O Reference + +Commands for reading from and sending input to terminal surfaces. + +## Reading Screen Content + +```bash +# Read current viewport of a surface +cmux read-screen --surface + +# Read with scrollback history +cmux read-screen --surface --scrollback + +# Limit scrollback lines +cmux read-screen --surface --scrollback --lines 50 + +# tmux-compatible alias +cmux capture-pane --surface --scrollback --lines 100 +``` + +Without `--surface`, reads from caller's surface (usually your own — not useful). + +## Sending Text + +```bash +# Send literal text (as if typed) +cmux send --surface "npm run dev" + +# Send text with newline (Enter) +cmux send --surface $'npm run dev\n' + +# Send to a panel (sidebar) instead of a surface +cmux send-panel --panel "some text" +``` + +**Important**: `send` types text but does NOT press Enter. Append `\n` or +follow with `send-key Enter`. + +## Sending Keys + +```bash +# Basic keys +cmux send-key --surface Enter +cmux send-key --surface Tab +cmux send-key --surface Escape +cmux send-key --surface Backspace +cmux send-key --surface Space + +# Arrow keys +cmux send-key --surface Up +cmux send-key --surface Down +cmux send-key --surface Left +cmux send-key --surface Right + +# Modifier combinations +cmux send-key --surface Ctrl+C +cmux send-key --surface Ctrl+D +cmux send-key --surface Ctrl+Z +cmux send-key --surface Ctrl+L # clear screen + +# Panel variant +cmux send-key-panel --panel Enter +``` + +## Pipe Pane + +Stream surface output to a shell command: + +```bash +cmux pipe-pane --command "tee /tmp/surface-output.log" --surface +``` + +## Clear History + +```bash +cmux clear-history --surface +``` + +## Clipboard (Buffers) + +```bash +cmux set-buffer "text to copy" +cmux set-buffer --name mybuf "named buffer" +cmux list-buffers +cmux paste-buffer --surface +cmux paste-buffer --name mybuf --surface +``` + +## Wait for Signal + +Coordinate between panes using named signals: + +```bash +# In one pane/script: wait for a signal +cmux wait-for server-ready --timeout 30 + +# In another pane/script: send the signal +cmux wait-for --signal server-ready +``` + +## Respawn + +Restart the shell in a surface: + +```bash +cmux respawn-pane --surface +cmux respawn-pane --surface --command "zsh" +``` + +## Practical Patterns + +### Run a command and read its output + +```bash +cmux send --surface surface:2 "echo hello" +cmux send-key --surface surface:2 Enter +sleep 1 +cmux read-screen --surface surface:2 +``` + +### Wait for a dev server to start + +```bash +cmux send --surface surface:2 "npm run dev" +cmux send-key --surface surface:2 Enter + +# Poll read-screen until the ready message appears +# (use a loop in a script, or just read-screen a few times) +cmux read-screen --surface surface:2 +``` + +### Kill a running process + +```bash +cmux send-key --surface surface:2 Ctrl+C +``` + +### Navigate a menu-driven TUI + +```bash +cmux send-key --surface surface:2 Down +cmux send-key --surface surface:2 Down +cmux send-key --surface surface:2 Enter +cmux read-screen --surface surface:2 +``` diff --git a/AGENTS.md b/AGENTS.md index 79997ffd..c7adce61 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -49,7 +49,7 @@ Verification strategy is defined per-project in SPEC.md §Verification Design (t When a slice requires manual UI testing (outer-loop verification): -1. **Dev server**: use `/tool-cmux` to open a terminal pane, run `npm run dev` there -2. **Browser**: use `/tool-cdp-cli` to launch Chrome with DevTools Protocol, open the dev URL, and interact (snapshot, fill, click, eval, console) +1. **Dev server**: use `/cli-cmux` to open a terminal pane, run `npm run dev` there ; but DO NOT use for browser panes +2. **Browser**: use `/cli-cdp` to launch Chrome with DevTools Protocol, open the dev URL, and interact (snapshot, fill, click, eval, console) This keeps the dev server and browser observable without leaving the agent session. diff --git a/docs/design/BREADBOARD.md b/docs/design/BREADBOARD.md new file mode 100644 index 00000000..a29e5456 --- /dev/null +++ b/docs/design/BREADBOARD.md @@ -0,0 +1,228 @@ +# Breadboard: Brunch Web UI + +> Produced by `flow-shape-breadboard` · 2026-04-01 +> Inputs: SPEC.md §Requirements, §Decisions D17–D20, §Lexicon + +## Places + +| ID | Place | Route | Requirements served | Notes | +| -- | ------------------- | ------------------------- | ------------------- | ------------------------------------------------------------ | +| P1 | Project list | `/` | R1, R15 | Landing page. Shows projects with phase badges. | +| P2 | Interview workspace | `/project/:id` | R2–R12, R14 | Main view. Conversation + entity sidebar + phase indicator. | +| P3 | Export preview | `/project/:id/export` | R13 | Rendered markdown preview + download. Guarded by spec readiness. | + +P2 is the **primary place** — users spend 90%+ of time here. It contains three regions: +- **Conversation panel** (center) — active-path turns, structured question cards, streaming response +- **Entity sidebar** (right) — tabbed: decisions, assumptions, requirements, criteria. Each entity shows status + actions. +- **Header bar** — project name, phase indicator (scope → design → requirements → criteria), export button (enabled when ready) + +Turn tree navigation and branch switching live within P2 (collapsible panel or header dropdown), not a separate route. + +## UI Affordances + +### P1 — Project list + +| Affordance | Interaction | Effect | +| ------------------- | ------------- | ---------------------------------- | +| Project card | Click | Navigate to P2 | +| Phase badge | Read-only | Shows completion per phase | +| "New project" button| Click | `POST /api/projects` → navigate P2 | + +### P2 — Interview workspace + +**Conversation panel:** + +| Affordance | Interaction | Effect | +| ----------------------- | -------------- | --------------------------------------------------- | +| Turn card | Read | Shows question + options + grounding + impact + answer | +| Option buttons | Click | `POST /api/projects/:id/chat` with selected option | +| Free text input | Submit | `POST /api/projects/:id/chat` with text | +| Streaming response | Read | SSE stream renders thinking → text → turn-created | +| Phase transition prompt | Confirm/reject | Agent proposes, user confirms to advance phase | + +**Entity sidebar:** + +| Affordance | Interaction | Effect | +| ------------------------- | ----------------- | ------------------------------------------------------- | +| Decision list | Read | Shows active-path decisions with dependency edges | +| Decision → "revisit" | Click + confirm | `POST /api/projects/:id/branch` → conversation rewinds | +| Assumption list | Read | Shows assumptions with confidence badges | +| Assumption → verify | Click | `PUT /api/projects/:id/assumptions/:id` action=verify | +| Assumption → falsify | Click + confirm | `PUT /api/projects/:id/assumptions/:id` action=falsify → flag propagation | +| Assumption → edit content | Inline edit | `PUT /api/projects/:id/assumptions/:id` | +| Requirement list | Read | Shows requirements with reviewed_at / stale badges | +| Requirement → edit | Inline edit | `PUT /api/projects/:id/requirements/:id` → flag criteria | +| Requirement → delete | Click + confirm | `DELETE /api/projects/:id/requirements/:id` | +| Requirement → review | Click | `PUT /api/projects/:id/requirements/:id/review` | +| Criterion list | Read | Shows criteria with reviewed_at / stale badges | +| Criterion → edit | Inline edit | `PUT /api/projects/:id/criteria/:id` | +| Criterion → delete | Click + confirm | `DELETE /api/projects/:id/criteria/:id` | +| Criterion → review | Click | `PUT /api/projects/:id/criteria/:id/review` | +| "Stale" badge | Read | Visual flag on soft-invalidated entities | +| Dependency expand | Click on entity | Shows what this entity depends on / what depends on it | + +**Header bar:** + +| Affordance | Interaction | Effect | +| ------------------- | ----------- | ------------------------------------------------ | +| Phase indicator | Read | Shows scope → design → requirements → criteria with completion | +| Branch indicator | Click | Opens branch switcher dropdown | +| Branch → switch | Click | `POST /api/projects/:id/checkout` → refetch all | +| Export button | Click | Navigate to P3 (disabled until spec readiness) | + +### P3 — Export preview + +| Affordance | Interaction | Effect | +| ---------------- | ----------- | ------------------------------------ | +| Markdown preview | Read | Rendered from active-path entities | +| Download button | Click | Download .md file | +| Back link | Click | Navigate to P2 | + +## Code Affordances (API Routes) + +### Project management + +| Method | Route | Core operation | Returns | +| ------ | -------------------- | ------------------ | ------------------------------- | +| GET | /api/projects | listProjects | `Project[]` with phase status | +| POST | /api/projects | createProject | `Project` | +| GET | /api/projects/:id | getProject | `Project` + `Turn[]` (active path) + `PhaseStatus` | + +### Interview (SSE streaming) + +| Method | Route | Core operation | Returns | +| ------ | -------------------------- | -------------- | ------------------------- | +| POST | /api/projects/:id/chat | conductTurn | SSE stream (`DomainEvent` → AI SDK events) | + +### Turn tree + +| Method | Route | Core operation | Returns | +| ------ | ------------------------------ | -------------- | --------------------------- | +| POST | /api/projects/:id/branch | branch | `Project` (updated HEAD) | +| POST | /api/projects/:id/checkout | checkout | `Project` (updated HEAD) | + +### Entities (batch read) + +| Method | Route | Core operation | Returns | +| ------ | ------------------------------ | ----------------------------- | ---------------------------- | +| GET | /api/projects/:id/entities | getActive{Decisions,Assumptions} + getRequirements + getCriteria | `EntityBundle` | +| GET | /api/projects/:id/graph | getEntityGraph | DAG nodes + edges | + +### Entity lifecycle + +| Method | Route | Core operation | Returns | +| ------ | ------------------------------------------ | -------------------- | -------------- | +| PUT | /api/projects/:id/assumptions/:aid | update/verify/falsify| `Assumption` | +| POST | /api/projects/:id/requirements | createRequirement | `Requirement` | +| PUT | /api/projects/:id/requirements/:rid | updateRequirement | `Requirement` | +| DELETE | /api/projects/:id/requirements/:rid | deleteRequirement | — | +| PUT | /api/projects/:id/requirements/:rid/review | reviewRequirement | `Requirement` | +| POST | /api/projects/:id/criteria | createCriterion | `Criterion` | +| PUT | /api/projects/:id/criteria/:cid | updateCriterion | `Criterion` | +| DELETE | /api/projects/:id/criteria/:cid | deleteCriterion | — | +| PUT | /api/projects/:id/criteria/:cid/review | reviewCriterion | `Criterion` | + +### Export + +| Method | Route | Core operation | Returns | +| ------ | --------------------------- | -------------- | ---------------- | +| GET | /api/projects/:id/export | exportSpec | Markdown string | + +## Data Stores + +| Store | Technology | Holds | Lifecycle | +| ----------------- | ------------------------- | -------------------------------------- | ------------------ | +| SQLite | Drizzle + better-sqlite3 | All entities, turn tree, join tables | Persistent on disk | +| useChat state | @ai-sdk/react | Conversation messages (hydrated from turns) | Per-session, hydrated on mount | +| Entity state | React (fetch on demand) | Sidebar entity lists | Refetched after mutations | +| Phase state | Derived from turns | Phase indicator | Computed from active path | +| URL | React Router | Project ID, current view | Bookmarkable | + +## Wiring + +### Page load (P2) + +``` +Browser navigates to /project/:id + → GET /api/projects/:id → hydrate project + turns → useChat.setMessages() + → GET /api/projects/:id/entities → hydrate entity sidebar +``` + +### Interview turn + +``` +User submits answer (option click or text) + → POST /api/projects/:id/chat (SSE) + → useChat consumes stream (thinking → text-delta → ...) + → DomainEvent 'turn-created' signals turn saved + → DomainEvent 'observer-complete' signals entities extracted + → Client refetches /api/projects/:id/entities (sidebar updates) +``` + +### Decision revisit (branch) + +``` +User clicks "revisit" on a decision in sidebar + → Confirmation dialog ("This will branch the conversation") + → POST /api/projects/:id/branch { turnId: decision.sourceTurnId } + → Server: branch() moves HEAD to fork point + → Client: refetch /api/projects/:id (new active path → conversation rewinds) + → Client: refetch /api/projects/:id/entities (path exclusion → some entities gone) + → Sidebar shows stale badges on requirements traced to abandoned decisions +``` + +### Assumption falsification (flag propagation) + +``` +User clicks "falsify" on an assumption in sidebar + → Confirmation dialog ("This will flag dependent entities") + → PUT /api/projects/:id/assumptions/:id { action: 'falsify' } + → Server: walks graph edges, nulls reviewed_at on dependents + → Client: refetch /api/projects/:id/entities + → Sidebar shows stale badges on affected decisions, requirements, criteria +``` + +### Phase transition + +``` +Agent sets is_resolution = true on a turn + → DomainEvent 'phase-resolved' { phase: 'scope' } + → Client shows phase summary modal + → User confirms → next phase begins + → Phase indicator updates +``` + +### Export + +``` +User clicks Export (enabled when spec readiness = true) + → Navigate to /project/:id/export + → GET /api/projects/:id/export + → Server: collect active-path entities → render markdown template + → Client: render preview + download button +``` + +## Handoff + +### Candidate cards (vertical behaviors ready to scope) + +1. **Drizzle + core extraction refactor** — Migrate raw DDL to Drizzle schema, extract interview orchestration from Express handler into core service layer. Infrastructure slice. +2. **Multi-project routing** — React Router, project list page (P1), project-scoped API routes. Replaces current single-project `/api/projects/current`. +3. **Entity sidebar** — Fetch and render active-path entities alongside the conversation. Read-only initially. +4. **Entity lifecycle API** — CRUD + review + verify/falsify endpoints for the entity sidebar to write to. +5. **Decision revisit (branch + checkout)** — Turn tree branching via sidebar, path exclusion, stale badges. + +### Capsule impacts + +- **New lexicon**: `entity bundle`, `entity sidebar`, `stale badge`, `spec readiness predicate` +- **New boundary**: SSE stream now carries entity lifecycle signals (`observer-complete`, `phase-resolved`), not just conversation content. The DomainEvent contract (D19) is the coordination mechanism between the streaming conversation and the REST entity sidebar. + +### Open uncertainty + +- **Entity editing timing**: The sidebar affords direct editing at any time, but the spec says requirements are "confirmed during the requirements review phase." Tension between structured flow and direct manipulation. Recommendation: allow direct editing always, but the review *phase* is when the agent systematically walks the list. Direct edits outside the phase are the user's prerogative. +- **Turn tree visualization**: R9 says "navigate the turn tree" but doesn't specify the widget. Options: git-log-style branch graph, dropdown branch list, or timeline with fork indicators. Recommend: start with a branch dropdown (showing HEAD of each branch) — defer the visual tree to later. +- **Entity refetch coordination**: After a turn completes, should the client poll for entities, or should the SSE stream include a signal? DomainEvent `observer-complete` already exists in D19 — use it as the refetch trigger. No polling. + +### Next command + +`/ln-plan` — re-slice PLAN.md to incorporate the refactor, routing, and breadboarded affordances. diff --git a/docs/design/DESIGN_SCRATCH.md b/docs/design/DESIGN_SCRATCH.md new file mode 100644 index 00000000..0170a28e --- /dev/null +++ b/docs/design/DESIGN_SCRATCH.md @@ -0,0 +1,78 @@ +# WED KICKOFF + +## design and re-slice, follow up + +### RESTART + +we can /grill on that a bit, because a further reflection which affects our technical design is in the following notes I gathered separately, which I think feed in to the drizzle question + +q: how will this app be built and distributed and run? +q: what kinds of execution modes and interfaces does it need to provide? + +a: the core of it is a web application, with client, server, and relay to the claude agent SDK +a: once this core is running, it will soon be of interest for there to be more than just a web UI mode of interacting with it: a CLI interface would be useful, and an API interface of some sort, provided by an MCP server, is another obvious extension; +a: this means that in the build, we need a CLI executable that can be invoked in different ways, e.g.: + - `npx brunch` should launch the core; might eventually also launch a sidecar MCP server + - `npx brunch [command]` would then allow operations on the core + +Also, now that we have a new data model, we will need routing in the app. let's use /breadboarding to map out which routes we need and what we need in them + +--- + + + + + +### RESCOPE, RESEARCH +1. need e2e fast path, to persisting these structures, based on running an interview + - need a test question or a test problem, at least one + - need a phased interview at least the three phases we've described: + - Initial framing + - The design tree or decision tree drill down + - Extrapolation of requirements and then extrapolation of criteria +2. need to capture that and represent it in the UI + - things updating when signals come through from the back-end + - look for a more robust pattern that shows thinking and tool use in a chat using the AI SDK that we can drop in because right now we just have the pattern from the locking skeleton which is really not sufficient +3. when we run the scoping, considering whether Drizzle should be brought in; see if there are public skills available that make that sharper + - confirm that bare `better-sqlite3` is a good choice vs `drizzle-kit`? we might as pin this down now + + +### INTERVENTIONS/SPIKES + +1. confirm how the conversation history is managed in agent SDK, whether I re-submit the previous turns with each turn, or what: because our "turn tree" structure is essentially a branching conversation representation, and if we change the branches we need to submit the right preceding path, as the preceding history to the current query + - it seems the agent SDK keeps its own record of sessions, which means we need to *start a new session* for each root turn, and fork it when the turn tree forks*?? [Work with sessions - Claude API Docs](https://platform.claude.com/docs/en/agent-sdk/sessions#fork-to-explore-alternatives) + - problem: where does the source-of-truth live, for the session, and the conversation history therefore?? what is the interviewing agent going to __see__ when we're doing further conversation turns based on past ones? We're extracting and feeding structured information— what does it see?? +2. confirm how a chat UI with proper phased state machine (thinking, tool-uses, response) should be working and look for robust pattern examples of this, THEY MUST EXIST SOMEWHERE FFS + + + +### NEW IDEATIONS + +- we will need to support a "migrate" type workflow where the agent reads from a workspace to grasp what the code is +- the "bare kickoff" (default path) will need a prompt informed by *shape-up framing*. this helps inform downstream interviewing +- every turn should have a question with suggested answers and a "why" property; but also the format of how this is presented should encourage the user to provide grounding, even if they choose one of the offered options — so a radio style choice + optional text area and encouragement to explain why, what it relates to, any other accessory observations, would be important +- our verification harness should allow a terminal based stdin stdout path as well, which I can have any agent drive; this requires that brunch can do different projects simultaneously + +====== + +### OUTLINE of SPEC PARTS + +- goal and concept, w/ product and strategic framing +- (design) `decisions`; `assumptions` [driven by and connected to (interview) `turns`] + - might also need: `open_questions`, `evidence` +- `requirements` (extrapolation/projection of `decisions`) +- (acceptance) `criteria` (extrapolation/projection of `requirements`) + +WHAT ELSE TO TRACK +- a log of all changes? (the turns tree is this) +- a log of high-level history, w snapshots...? (this would require) + +======= + +### THOUGHTS ON THE TURNS STATE MACHINE + +the state machine or the flow control, the sequence diagram around the actual question asking and then answer capture needs to be that the primary agent is focusing on asking the right questions. It's driving the interview. It's thinking about what to ask next. When it kicks off the interview, it's generating a question based on whatever framing or minimal statement it has so far and then trying to work from the broadest questions down to the most specific ones from the top of the tree to the bottom of the tree. Working its way down the tree also implies traversing at some point once one path down the tree has been resolved to go backwards and traverse back to a previous point and work down another branch of the tree and so on. So it definitely needs to do this, and that's a pretty rigorous process. And already challenging to think about, so that's enough for one agent. + +The second agent, every time an answer is given, needs to capture a decision out of that. At the very minimum, it's the response, and I suppose the response has to be saved with a turn. The turn is going to contain question options and response, and response will be a choice of one of the options or it will be a free text response. Once that's been input by the user, the sidechain agent needs to evaluate that for essentially requirement definition. Decisions. A uh, and honestly, maybe these are really just the answers to the questions. It's a vague distinction between answering the question and what decision comes out about, except sometimes more than one decision could be extracted there, so I guess that's the difference. + +Let's say some decisions are pulled out, assumptions are pulled out if there are any, and that probably has to complete before the next question can be asked because I think a structured representation of the sidechain agent's extraction of the decisions and potential predicate assumptions are also useful inputs for the interviewing agent to the next question. So essentially, the interview agent and the sidechain agent, or the analyst, the observer agent, they basically have their own loop, but they also need to close between themselves before the next question can be posted. So there's something like a clutch mechanism going on there that probably requires a little state machine diff --git a/docs/design/REMODEL.md b/docs/design/REMODEL.md deleted file mode 100644 index 1f34e2a3..00000000 --- a/docs/design/REMODEL.md +++ /dev/null @@ -1,252 +0,0 @@ -# Domain Model - -Target domain model for Brunch. This is the product of an ideation process examining what the tool actually does vs. what the current schema and terminology imply. See REFACTORS.md for the structural/technical refactors. - -## What this tool is - -A spec elicitation tool that takes a natural-language project goal and, through an AI-driven interview process, produces a structured specification document. The output is a fire-and-forget artifact — a detailed spec intended to be consumed by an implementation harness (human or agent). The tool does not manage execution, task orchestration, or runtime state. - -## Scope line - -**Building now (high-certainty pathway):** -The process assumes the user has a reasonably well-defined goal. The tool's job is to reduce remaining uncertainty to the point where the spec is actionable. - -**Acknowledged but not built:** -- An ambiguity-first pathway for projects where the goal itself is unclear (research, spikes, invariant discovery). This would produce a refined goal + resolved invariants that feed into the high-certainty pathway. -- Task planning, execution DAGs, waves/epics. These are consumers of the spec, not part of spec elicitation. -- Runtime propagation of belief invalidation. In a fire-and-forget model, the spec captures the dependency structure (which decisions rest on which assumptions) but doesn't manage what happens if a belief is later falsified. - -## Process - -Three phases, all driven by the same interaction primitive (the interview exchange). - -### Phase 1 — Scope Establishment - -User states intent. LLM interviews to establish boundaries, surface hard requirements, and gauge certainty. Outputs: refined goal, scope (inclusions, exclusions, constraints), initial hard requirements. Acceptance criteria begin accumulating in the background. - -### Phase 2 — Design Tree Exploration - -LLM works down the design tree, interviewing the user on every aspect — implicit and explicit — of how things should work. Every question is a fork; the user's answer resolves it. The LLM provides at least two options per question, with a recommendation where possible, plus an open-ended "something else" option. Acceptance criteria continue accumulating. - -This phase may include: -- **Feature exploration** (shape-up style) — breadboarding affordances, fat-marker sketching of how a feature works, where it lives, how the user reaches it -- **Module design** — defining application boundaries and interfaces among technical components - -These are the same interaction pattern (interview exchange → decision), applied through different lenses. - -### Phase 3 — Acceptance Criteria Validation - -All criteria gathered (explicit + background) are surfaced and validated with the user. The LLM proposes additional criteria, walks through risks, failure modes, caveats, and suggests hardening of rules and contracts. - -## Entities - -### Interview Exchange - -The universal interaction primitive. Used across all three phases. - -| Field | Type | Description | -|---|---|---| -| id | uuid | | -| project_id | FK → Project | | -| phase | enum | `scope`, `design`, `criteria` | -| lens | enum? | `general`, `feature`, `architecture`, `data_model`, etc. (Phase 2) | -| question | text | What the LLM asked | -| why | text | Why this question matters | -| options | json | At least two alternatives | -| recommendation | text? | Which option the LLM recommends, if any | -| answer | text | What the user chose or typed | -| sort_order | int | Sequence within phase | - -### Project - -Identity and metadata only. Not a junk drawer. - -| Field | Type | Description | -|---|---|---| -| id | uuid | | -| name | text | | -| raw_input | text | What the user originally typed | -| model | text | Selected LLM model | -| pathway | enum | `high_certainty` (default). Future: `exploratory`, `hybrid` | -| phase | enum | `scope`, `design`, `criteria`, `complete` | -| created_at | datetime | | -| updated_at | datetime | | - -### Goal - -The refined statement of intent. Distinct from raw input. - -| Field | Type | Description | -|---|---|---| -| id | uuid | | -| project_id | FK → Project | | -| text | text | Current formulation | -| version | int | Refinement counter | -| created_at | datetime | | - -### Scope - -Boundaries around the goal. - -| Field | Type | Description | -|---|---|---| -| id | uuid | | -| project_id | FK → Project | | -| type | enum | `inclusion`, `exclusion`, `constraint` | -| text | text | The scope statement | -| source_exchange | FK → InterviewExchange? | Which exchange surfaced this | - -### Requirement - -What the system must do. User-stated or elicited. - -| Field | Type | Description | -|---|---|---| -| id | uuid | | -| project_id | FK → Project | | -| text | text | The requirement statement | -| rationale | text | Why this matters | -| priority | enum | `must`, `should`, `could` | -| source | enum | `user_stated`, `elicited` | -| source_exchange | FK → InterviewExchange? | Which exchange surfaced this | - -### Decision - -A resolved fork in the design tree. Tree-structured. - -| Field | Type | Description | -|---|---|---| -| id | uuid | | -| project_id | FK → Project | | -| question | text | The design question | -| options | json | Alternatives considered | -| recommendation | text? | LLM's recommendation | -| chosen | text | What the user chose | -| rationale | text | Why | -| lens | enum | `feature`, `architecture`, `data_model`, `integration`, `ux`, etc. | -| source_exchange | FK → InterviewExchange | The exchange that resolved this | -| sort_order | int | | - - -### Assumption - -A falsifiable belief the spec rests on. Not a design choice (that's a Decision). - -| Field | Type | Description | -|---|---|---| -| id | uuid | | -| project_id | FK → Project | | -| text | text | The belief statement | -| confidence | enum | `high`, `medium`, `low` | -| status | enum | `stated`, `validated`, `assumed` | -| impact_if_wrong | text | What breaks if this is false | -| source_exchange | FK → InterviewExchange? | Which exchange surfaced this | - -### Decision ↔ Assumption (join) - -Many-to-many. A decision may depend on multiple assumptions; an assumption may underpin multiple decisions. - -| Field | Type | Description | -|---|---|---| -| decision_id | FK → Decision | | -| assumption_id | FK → Assumption | | - -### Decision → Decision (dependency join) - -DAG structure. A decision may depend on multiple prior decisions. Edges are backward references: the LLM cites relevant upstream decisions when posing each new question. See REFACTORS.md for rationale and prior art (IBIS, QOC, DRL, ADRs). - -| Field | Type | Description | -|---|---|---| -| decision_id | FK → Decision | The decision that depends | -| depends_on_id | FK → Decision | The upstream decision | - - -### Acceptance Criterion - -A testable condition verifying a requirement. Gathered progressively, validated in Phase 3. - -| Field | Type | Description | -|---|---|---| -| id | uuid | | -| project_id | FK → Project | | -| requirement_id | FK → Requirement? | What this verifies (linked in Phase 3) | -| text | text | The testable statement | -| status | enum | `draft`, `proposed`, `validated` | -| verification_type | enum | `automated_test`, `benchmark`, `human_review`, `static_analysis`, `contract_check` | -| source_exchange | FK → InterviewExchange? | Which exchange surfaced this | - -### Risk - -A failure mode surfaced during Phase 3. - -| Field | Type | Description | -|---|---|---| -| id | uuid | | -| project_id | FK → Project | | -| description | text | What could go wrong | -| severity | enum | `high`, `medium`, `low` | -| likelihood | enum | `high`, `medium`, `low` | -| mitigation | text | How to address it | - -### Risk ↔ Decision / Criterion (joins) - -Risks may link to the decisions that introduced them and the criteria that guard against them. - -| Field | Type | Description | -|---|---|---| -| risk_id | FK → Risk | | -| decision_id | FK → Decision? | | -| criterion_id | FK → AcceptanceCriterion? | | - -### Spec Output - -The generated document. Stored as rendered output, regenerated as the underlying entities evolve. No confidence score — spec readiness is a function of workflow state (has the process completed?) and assumption risk (are low-confidence assumptions resolved?). Both are computable from the underlying entities, not a separate stored value. - -| Field | Type | Description | -|---|---|---| -| id | uuid | | -| project_id | FK → Project | | -| content | text | Markdown spec | -| version | int | Regeneration counter | -| created_at | datetime | | - -## Relationships summary - -``` -Project -├── Goal (1:many, versioned) -├── Scope (1:many) -├── InterviewExchange (1:many, ordered by phase + sort_order) -├── Requirement (1:many) -│ └── AcceptanceCriterion (1:many) -├── Decision (1:many, DAG via decision_dependency) -│ ├── → Decision (many:many, via decision_dependency) -│ └── ←→ Assumption (many:many) -├── Assumption (1:many) -├── Risk (1:many) -│ ├── → Decision (many:many) -│ └── → AcceptanceCriterion (many:many) -└── SpecOutput (1:many, versioned) -``` - -## What this replaces in the current schema - -| Current | Becomes | Notes | -|---|---|---| -| `project` (junk drawer) | `Project` (identity) + `Goal` + `SpecOutput` | Phase/workflow state on Project, not scattered across boolean flags | -| `assumption` table | `Decision` | Current "assumptions" are design choices with alternatives | -| `entry` table | `Requirement` + `AcceptanceCriterion` | Splits "what" from "how we verify" | -| `goal_iteration` | `Goal` (versioned) + `InterviewExchange` | Goal refinement is interview-driven, not a separate mechanism | -| `clarifying_state` JSON blob | Eliminated | All state lives in proper tables | -| `current_questions` / `current_answers` JSON columns | `InterviewExchange` rows | Structured, not stuffed into project | -| No risk model | `Risk` entity | First-class, linked to decisions and criteria | -| No assumption→decision links | `decision_assumption` join | Traces which beliefs underpin which choices | -| No real `Assumption` entity | `Assumption` (falsifiable beliefs) | Distinct from decisions | - -## North star (not built now) - -- **Exploratory pathway**: for ambiguous projects where the goal itself needs discovery. Would produce invariants + refined goal → feed into high-certainty pathway. -- **Task/planning layer**: tasks, waves/epics, execution DAG. Tasks would reference assumptions (spikes validate them; predicated tasks depend on them). Belief falsification would cascade through the decision tree and invalidate dependent tasks. Many-to-many between tasks and assumptions, with two relationship types: `validates` (spike) and `predicated_on` (depends on belief holding). -- **Orchestration harness output**: rather than a fire-and-forget spec document, the tool would output a live data structure that an agent orchestration layer queries at runtime — checking belief validity, task preconditions, and decision dependencies. - -These concerns are only relevant if the tool becomes an orchestration harness rather than a spec generator. The entity model is designed so that adding tasks/planning is additive (new tables referencing existing entities via foreign keys), not a rewrite. \ No newline at end of file diff --git a/docs/design/schema.dbdiagram b/docs/design/schema.dbdiagram new file mode 100644 index 00000000..31ceface --- /dev/null +++ b/docs/design/schema.dbdiagram @@ -0,0 +1,405 @@ +{ + "version": "1.0.0", + "darkMode": true, + "gridEnabling": false, + "detailLevel": "All", + "tablePositions": [ + { + "name": "project", + "schemaName": "public", + "x": 538.5830078125, + "y": 73 + }, + { + "name": "turn", + "schemaName": "public", + "x": 12, + "y": 32 + }, + { + "name": "option", + "schemaName": "public", + "x": 538.5830078125, + "y": 311 + }, + { + "name": "decision", + "schemaName": "public", + "x": 1072.5625, + "y": 94 + }, + { + "name": "assumption", + "schemaName": "public", + "x": 1072.5625, + "y": 506 + }, + { + "name": "requirement", + "schemaName": "public", + "x": 1072.5625, + "y": 300 + }, + { + "name": "criterion", + "schemaName": "public", + "x": 1640.06396484375, + "y": 174 + }, + { + "name": "turn_decision", + "schemaName": "public", + "x": 1640.06396484375, + "y": 32 + }, + { + "name": "turn_assumption", + "schemaName": "public", + "x": 1640.06396484375, + "y": 980 + }, + { + "name": "decision_parent_decision", + "schemaName": "public", + "x": 1640.06396484375, + "y": 412 + }, + { + "name": "decision_parent_assumption", + "schemaName": "public", + "x": 1640.06396484375, + "y": 696 + }, + { + "name": "assumption_parent_assumption", + "schemaName": "public", + "x": 1640.06396484375, + "y": 838 + }, + { + "name": "requirement_decision", + "schemaName": "public", + "x": 1640.06396484375, + "y": 554 + } + ], + "tableGroupCollapseStates": [], + "stickyNoteLayouts": [], + "referencePaths": [ + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "turn", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "active_turn_id" + ], + "secondTableName": "project", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "project", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "project_id" + ], + "secondTableName": "turn", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "turn", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "parent_turn_id" + ], + "secondTableName": "turn", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "turn", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "turn_id" + ], + "secondTableName": "option", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "project", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "project_id" + ], + "secondTableName": "decision", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "project", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "project_id" + ], + "secondTableName": "assumption", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "project", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "project_id" + ], + "secondTableName": "requirement", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "project", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "project_id" + ], + "secondTableName": "criterion", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "requirement", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "requirement_id" + ], + "secondTableName": "criterion", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "turn", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "turn_id" + ], + "secondTableName": "turn_decision", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "decision", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "decision_id" + ], + "secondTableName": "turn_decision", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "turn", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "turn_id" + ], + "secondTableName": "turn_assumption", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "assumption", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "assumption_id" + ], + "secondTableName": "turn_assumption", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "decision", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "decision_id" + ], + "secondTableName": "decision_parent_decision", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "decision", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "parent_decision_id" + ], + "secondTableName": "decision_parent_decision", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "decision", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "decision_id" + ], + "secondTableName": "decision_parent_assumption", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "assumption", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "parent_assumption_id" + ], + "secondTableName": "decision_parent_assumption", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "assumption", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "assumption_id" + ], + "secondTableName": "assumption_parent_assumption", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "assumption", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "parent_assumption_id" + ], + "secondTableName": "assumption_parent_assumption", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "requirement", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "requirement_id" + ], + "secondTableName": "requirement_decision", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + }, + { + "firstFieldNames": [ + "id" + ], + "firstTableName": "decision", + "firstSchemaName": "public", + "firstRelation": "1", + "secondFieldNames": [ + "decision_id" + ], + "secondTableName": "requirement_decision", + "secondSchemaName": "public", + "secondRelation": "*", + "checkPoints": [] + } + ] +} \ No newline at end of file diff --git a/memory/PLAN.md b/memory/PLAN.md index a29eade9..00b46bb8 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -27,17 +27,48 @@ 2. **SQLite foundation + project persistence** `FE-535` — Replace Dolt with `better-sqlite3`. Basic persistence with project + message tables. Conversation history replay. `done` - Requirements: → SPEC.md §Requirements #14 - - Assumptions: → SPEC.md §Assumptions A5 (validated), A11 (workaround validated), A12 (validated) + - Assumptions: → SPEC.md §Assumptions A5 (validated), A11 (validated), A12 (validated) - Invariants established: → SPEC.md §Invariants I5, I6 - Invariants respected: → SPEC.md §Invariants I1, I2, I3 - Acceptance: create project, send message, refresh page, see history, continue conversation - Branch: `ln/fe-535-sqlite-persistence` -## Phase 2: Turn Model + Extraction +## Phase 2: Architecture - + + +### Slices + +3. **Turn tree schema + API** `FE-544` — Migrate from message table to the full schema.dbml model (turn, option, decision, assumption, requirement, criterion + all join tables). Update API: POST /api/chat creates turns, GET /api/projects/current returns turns on the active path. Project gets `active_turn_id`. Tests verify turn tree CRUD and active path resolution. `done` + - Requirements: → SPEC.md §Requirements #14 + - Assumptions: → SPEC.md §Assumptions A6 + - Invariants established: → SPEC.md §Invariants I6 (updated), I9, I10 + - Invariants respected: → SPEC.md §Invariants I1, I2, I3 + - Acceptance: create project, create turns with parent chain, resolve active path, close and reopen with state intact + - Branch: `ln/fe-544-turn-tree-schema` + +3c. **Drizzle ORM + core extraction** — Migrate raw DDL to Drizzle schema (`drizzle/schema.ts`) with migration runner. Extract interview orchestration from `app.ts` into `core.ts` — `conductTurn()` returns `AsyncIterable`. Express handler becomes a thin adapter translating DomainEvents to SSE. `not-started` + - Requirements: → SPEC.md §Requirements #14 + - Assumptions: → SPEC.md §Assumptions A18, A19 + - Decisions: → SPEC.md §Decisions D18, D19 + - Invariants to establish: Drizzle migration auto-apply, DomainEvent streaming + - Invariants to respect: → SPEC.md §Invariants I1, I2, I3, I5, I6, I9, I10 + - Acceptance: existing 39 tests pass against Drizzle schema; stale DB auto-migrates on startup; `conductTurn()` yields DomainEvents consumed by Express adapter; Drizzle Studio inspectable + +3d. **Multi-project routing** — Install `@tanstack/react-router`. Three client routes: project list (`/`), interview workspace (`/project/:id`), export preview (`/project/:id/export`). Route loaders replace `useEffect` hydration. Server API becomes project-scoped (`/api/projects/:id/...`). Project list page with phase badges. `not-started` + - Requirements: → SPEC.md §Requirements #1, #15 + - Decisions: → SPEC.md §Decisions D9 (updated) + - Invariants to respect: → SPEC.md §Invariants I1, I2, I3, I6, I9, I10 + - Acceptance: navigate between project list and interview workspace; create new project from list; project-scoped API routes work; route loaders fetch data on navigation + - Ref: → docs/design/BREADBOARD.md §Places, §Wiring + +## Phase 3: Interview Engine + + ### Spikes @@ -48,20 +79,13 @@ ### Slices -3. **Turn tree schema + API** — Migrate from message table to the full schema.dbml model (turn, option, decision, assumption, requirement, criterion + all join tables). Update API: POST /api/chat creates turns, GET /api/projects/current returns turns on the active path. Project gets `active_turn_id`. Tests verify turn tree CRUD and active path resolution. `not-started` - - Requirements: → SPEC.md §Requirements #14 - - Assumptions: → SPEC.md §Assumptions A6 - - Invariants to establish: turn tree persistence, active path resolution - - Invariants to respect: → SPEC.md §Invariants I1, I2, I3 - - Acceptance: create project, create turns with parent chain, resolve active path, close and reopen with state intact - -3b. **Rich chat UI: tool calls + reasoning rendering** — Extend SSE adapter to emit `tool-call-streaming-start`, `tool-call-delta`, `tool-call`, and `tool-result` events for SDK `tool_use` content blocks. Install AI Elements components (`Tool`, `Reasoning`, `ChainOfThought`, `Message`, `PromptInput`) via `npx ai-elements`, restyle to match brunch design. Replace hand-rolled `App.tsx` message rendering with part-type switching (`text`, `reasoning`, `tool-{name}`, `step-start`). Establish user-testability for the streaming pipeline per verification policy — all part types visible in browser. `not-started` +3b. **Rich chat UI: tool calls + reasoning rendering** `FE-541` — Extend SSE adapter to emit tool-call events for SDK `tool_use` content blocks. Install AI Elements components (`Tool`, `Reasoning`, `ChainOfThought`, `Message`, `PromptInput`) via `npx ai-elements`, restyle to match brunch design. Replace hand-rolled message rendering with part-type switching. `not-started` - Requirements: → SPEC.md §Requirements #4 - Assumptions: → SPEC.md §Assumptions A16, A17 - Invariants to establish: → SPEC.md §Invariants I7, I8 - Invariants to respect: → SPEC.md §Invariants I1, I2, I3 - - Acceptance: `npm run dev`, send a message that triggers tool use, see tool call with state transitions (pending → running → completed/error), see reasoning in collapsible block, all rendered via AI Elements components. SSE adapter tests cover tool_use content blocks. - - Branch: `ln/fe-xxx-rich-chat-ui` + - Acceptance: send a message that triggers tool use, see tool call with state transitions, reasoning in collapsible block, all via AI Elements. SSE adapter tests cover tool_use content blocks. + - Branch: `ln/fe-541-rich-chat-ui` 4. **Structured interview: scope phase** — Replace flat chat with structured turns. Implement the scope phase as an agent skill — the agent generates a question with options, grounding ("why this matters"), and impact signal. User selects an option or types a response. Turn persists with phase provenance. UI renders the turn card (question + options + grounding). `not-started` - Requirements: → SPEC.md §Requirements #2, #3 @@ -69,32 +93,34 @@ - Invariants to respect: → SPEC.md §Invariants I1, I2, I3, I5, I6 - Acceptance: start a project, agent asks structured scope questions with options and grounding, user answers, turns persist with parent chain -5. **Observer agent + entity persistence** — After each answered turn, a second agent call extracts decisions and assumptions. Writes to decision/assumption tables with turn linkage (turn_decision, turn_assumption) and dependency edges (decision_parent_decision, decision_parent_assumption, assumption_parent_assumption). `not-started` +5. **Observer agent + entity persistence** — After each answered turn, core invokes a second agent call that extracts decisions and assumptions. Writes to decision/assumption tables with turn linkage and dependency edges. Core yields `observer-complete` DomainEvent; web adapter signals client to refetch entities. `not-started` - Requirements: → SPEC.md §Requirements #5 - Assumptions: → SPEC.md §Assumptions A3, A4, A14 (validated by spike) - - Acceptance: answer a scope question, observer extracts decision + assumptions, dependency edges visible in DB, extraction completes within user think time + - Acceptance: answer a scope question, observer extracts decision + assumptions, dependency edges in DB, extraction within user think time, sidebar refetch triggered -6. **Decision + assumption dashboard** — React sidebar showing decisions and assumptions on the active path. Updates after each observer extraction. Dependency edges visible (what does this decision depend on?). `not-started` +6. **Entity sidebar (read-only)** — React sidebar in interview workspace showing decisions, assumptions, requirements, and criteria on the active path. Tabbed display. Updates after each observer extraction via `observer-complete` event. Dependency edges visible. Stale badges for soft-invalidated entities. `not-started` - Requirements: → SPEC.md §Requirements #6 - Assumptions: — - - Acceptance: entities appear in categorized lists as interview progresses, dependency links navigable + - Invariants to respect: → SPEC.md §Invariants I9, I10 + - Acceptance: entities appear in categorized tabs as interview progresses, dependency links navigable, stale badges render correctly + - Ref: → docs/design/BREADBOARD.md §UI Affordances → P2 Entity sidebar -## Phase 3: Full Interview +## Phase 4: Full Interview ### Slices -7. **Phase transition + resolution** — Interviewing agent judges when scope phase is complete (is_resolution). Summary presented to user. User confirms to advance. UI shows phase completion state. `not-started` +7. **Phase transition + resolution** — Agent judges when scope phase is complete (`is_resolution`). Core yields `phase-resolved` DomainEvent. Client shows summary modal. User confirms to advance. Phase indicator updates. `not-started` - Requirements: → SPEC.md §Requirements #7, #8 - Assumptions: → SPEC.md §Assumptions A15 - - Acceptance: agent marks resolution, summary shows, user confirms, UI reflects phase completion + - Acceptance: agent marks resolution, summary shows, user confirms, phase indicator reflects completion 8. **Design drill-down phase** — Second agent skill. Walks the design tree with structured questions. Decisions extracted by observer. Continues until agent judges resolution. `not-started` - Requirements: → SPEC.md §Requirements #2, #3 - Assumptions: → SPEC.md §Assumptions A13 (validated by slice 4) - - Acceptance: design questions with options, decisions extracted and shown in dashboard, agent resolves when understanding is reached + - Acceptance: design questions with options, decisions extracted and shown in sidebar, agent resolves when understanding is reached 9. **Requirements review phase** — Third agent skill. Walks accumulated requirements list. Agent checks for gaps, proposes additions. User confirms each. Requirements get `reviewed_at` stamped. `not-started` - Requirements: → SPEC.md §Requirements #11 @@ -106,50 +132,55 @@ - Assumptions: — - Acceptance: agent proposes criteria per requirement, user confirms, spec readiness predicate evaluable -## Phase 4: Revisit + Export +## Phase 5: Revisit + Export - + ### Slices -11. **Decision revisit: turn tree branching** — Navigate to a previous decision in the dashboard. Fork a new branch from the source turn. Move HEAD. Abandoned branches can be restored (move HEAD back). Active path recomputation. `not-started` +11. **Decision revisit: branch + checkout** — Click "revisit" on a decision in the sidebar → confirmation → `POST /api/projects/:id/branch` → HEAD moves to fork point → conversation rewinds → stale entities leave active path (path exclusion). Branch dropdown shows available branches. Checkout to switch. `not-started` - Requirements: → SPEC.md §Requirements #9, #10 - Assumptions: → SPEC.md §Assumptions A6 - - Acceptance: revisit a decision, new branch created, interview resumes from fork point, abandon returns to previous path + - Decisions: → SPEC.md §Decisions D17 (path exclusion) + - Acceptance: revisit a decision, new branch created, interview resumes from fork point, checkout returns to previous path + - Ref: → docs/design/BREADBOARD.md §Wiring → Decision revisit -12. **Soft invalidation** — When HEAD moves to a new branch, requirements traced to superseded decisions are flagged (stale reviewed_at). Criteria inherit flag transitively. Dashboard shows invalidation state. Re-entering requirements/criteria phase re-qualifies flagged entities. `not-started` - - Requirements: → SPEC.md §Requirements #9 - - Assumptions: — - - Acceptance: fork a branch, requirements show "needs review" state, re-review clears flags +12. **Entity lifecycle API** — CRUD + review + verify/falsify endpoints for sidebar writes. `PUT .../assumptions/:id` with action (verify/falsify/update) triggers flag propagation per D17. `PUT .../requirements/:id` cascades to criteria. `PUT .../requirements/:id/review` and `.../criteria/:id/review` stamp `reviewed_at`. `not-started` + - Requirements: → SPEC.md §Requirements #9, #11, #12 + - Decisions: → SPEC.md §Decisions D17 (flag propagation) + - Acceptance: falsify an assumption → dependent entities flagged; edit a requirement → criteria flagged; review stamps reviewed_at + - Ref: → docs/design/BREADBOARD.md §Code Affordances → Entity lifecycle -13. **Spec export** — Render markdown spec from active path entities (decisions, assumptions, requirements, criteria). Export enabled only when spec readiness predicate is true (all phases resolved + reviewed). Download button. `not-started` +13. **Spec export** — Render markdown spec from active path entities (decisions, assumptions, requirements, criteria). Export route (`/project/:id/export`) shows preview. Download button. Enabled only when spec readiness predicate is true (all phases resolved + reviewed). `not-started` - Requirements: → SPEC.md §Requirements #13 - Assumptions: — - - Acceptance: complete all phases, click export, markdown downloads with all active-path entities + - Acceptance: complete all phases, navigate to export, markdown preview with all active-path entities, download .md file + - Ref: → docs/design/BREADBOARD.md §Places → P3 -## Phase 5: Distribution +## Phase 6: Distribution ### Slices -14. **npx distribution** — `bin` entry, launcher starts Express (serves built Vite assets + API on one port), opens browser. Single env var: `ANTHROPIC_API_KEY`. `not-started` +14. **npx distribution + CLI** — `bin` entry, launcher starts Express (serves built Vite assets + API on one port), opens browser. `npx brunch` for web UI. `npx brunch [command]` for CLI operations. Single env var: `ANTHROPIC_API_KEY`. `not-started` - Requirements: → SPEC.md §Requirements #1 - Assumptions: → SPEC.md §Assumptions A8 (validated) + - Decisions: → SPEC.md §Decisions D20 - Acceptance: `npx brunch` with key in scope opens working app ## Horizon - + +- CLI interactive interview mode (terminal-based interview using core's DomainEvent stream) +- MCP server adapter (expose core operations as MCP tools) +- Turn tree visualization (git-log-style branch graph in sidebar) +- Entity graph visualization (decision + assumption DAG view) - Exploratory pathway (for projects where the goal itself is unclear) - Multi-provider support via AI SDK server-side (if Claude Agent SDK becomes limiting) -- Entity editing outside interview flow (direct CRUD on dashboard) - Export to GitHub Issues, Linear, YAML task definitions -- Assumption graph visualization (explore dependency chains) -- Decision graph visualization (tree/DAG view) -- Project dashboard with phase completion overview (→ SPEC.md §Requirements #15) ## Dependencies @@ -157,19 +188,21 @@ ``` Phase 1: 1 (skeleton) ──→ 2 (SQLite) -Phase 2: 2 ──→ 3 (turn schema) ──→ 3b (rich chat UI) ──→ 4 (scope interview) - spike (observer) ──→ 5 (observer agent) - 3 ──→ 5 (observer agent) ──→ 6 (dashboard) - 4 ──→ 5 -Phase 3: 6 ──→ 7 (transitions) ──→ 8 (design) ──→ 9 (requirements) ──→ 10 (criteria) -Phase 4: 6 ──→ 11 (branching) ──→ 12 (invalidation) +Phase 2: 2 ──→ 3 (turn schema) ──→ 3c (Drizzle+core) ──→ 3d (routing) +Phase 3: 3c ──→ 3b (rich chat UI) ──→ 4 (scope interview) ──→ 5 (observer) + spike (observer fidelity) ──→ 5 + 3d + 5 ──→ 6 (entity sidebar) +Phase 4: 6 ──→ 7 (transitions) ──→ 8 (design) ──→ 9 (requirements) ──→ 10 (criteria) +Phase 5: 6 ──→ 11 (branching) + 6 ──→ 12 (entity lifecycle API) 10 ──→ 13 (export) -Phase 5: 13 ──→ 14 (npx) +Phase 6: 13 ──→ 14 (npx + CLI) ``` ### Parallelism opportunities -- Slice 3b (rich chat UI) and observer spike can proceed in parallel after slice 3 lands -- Slice 6 (dashboard) and slice 7 (transitions) can start in parallel once slice 5 lands -- Slice 11 (branching) can start after slice 6, independent of slices 7-10 +- Slice 3b (rich chat UI) and 3d (routing) can proceed in parallel after 3c lands +- Observer spike can proceed any time after slice 3 — independent of 3c/3d +- Slice 7 (transitions) and 11 (branching) can start in parallel once slice 6 lands +- Slice 12 (entity lifecycle API) can proceed in parallel with slice 11 - Slice 14 (npx) can start early with a basic launcher, completing after slice 13 diff --git a/memory/SPEC.md b/memory/SPEC.md index de491443..d61bbe40 100644 --- a/memory/SPEC.md +++ b/memory/SPEC.md @@ -18,20 +18,21 @@ The core data model: - **Assumption graph** — Assumptions are the falsifiable beliefs that decisions rest on. They have their own dependency structure (assumptions can rest on prior assumptions). - **Requirements & criteria** — Downstream projections. Requirements accumulate during the decision drill-down and are reviewed in a dedicated phase. Criteria are proposed against confirmed requirements. -The architecture: - -- **Agent engine**: Claude Agent SDK (`query()`) — tool use, MCP, session resume, subagents, permissions, rich streaming events. Each interview phase is an agent skill. -- **Observer agent**: Separate extraction call after each turn — captures decisions, assumptions, and their dependency edges -- **Server**: Express.js — iterates SDK messages, translates to AI SDK's UI Message Stream SSE protocol. No AI SDK runtime server-side -- **Transport**: AI SDK UI Message Stream protocol (SSE with typed JSON events) -- **Client**: React + Vite + `@ai-sdk/react` `useChat` hook — consumes SSE natively -- **Database**: SQLite via `better-sqlite3` — zero-config, embedded +The architecture (layered: db → core → adapters): + +- **Database**: SQLite via Drizzle ORM + `better-sqlite3` — TypeScript schema is single source of truth for types, DDL, and migrations. Auto-applies at startup. +- **Core**: Interface-agnostic service layer — turn tree operations, interview orchestration, entity lifecycle, observer, phase management, export. `conductTurn()` returns `AsyncIterable` for streaming. No transport knowledge. +- **Agent engine**: Claude Agent SDK (`query()`) — tool use, MCP, session resume, subagents, permissions, rich streaming events. Each interview phase is an agent skill. Called by core, not by adapters. +- **Observer agent**: Separate extraction call after each turn — captures decisions, assumptions, and their dependency edges. Invoked by core after turn completion. +- **Web adapter**: Express.js translates `DomainEvent` stream to AI SDK UI Message Stream SSE. React + Vite + `@ai-sdk/react` `useChat` client. +- **CLI adapter**: (future) Terminal I/O consuming the same `DomainEvent` stream +- **MCP adapter**: (future) MCP server exposing core operations as tools - **Output**: Flattened markdown spec exported on demand from the active path's entities ## Constraints & Non-goals - **Anthropic-only** — no multi-provider support (OpenAI, Gemini, Ollama) -- **No belief invalidation cascading** — revisiting a decision soft-invalidates downstream (flags for review), but there is no automatic runtime propagation through the graph +- **No automatic deletion cascading** — invalidation flags entities for review but does not delete or modify them. Two mechanisms: path exclusion (lazy, via HEAD movement) and flag propagation (eager, via dependency graph walk). See D17 - **No task planning** — consumers of the spec, not part of this tool - **No exploratory pathway** — assumes user has a reasonably defined goal - **Single-user** — no collaborative editing @@ -60,25 +61,27 @@ The architecture: ## Assumptions -| # | Assumption | Confidence | Dependent decisions | Implicated slices | Validation approach | -| --- | --------------------------------------------------------------------------------------------------------------------------------- | ---------- | ------------------- | -------------------------- | -------------------------------------------------------------- | -| A1 | AI SDK's UI Message Stream SSE protocol is documented and stable enough to emit conformantly without importing AI SDK server-side | **validated** | D8 | Walking skeleton | Validated: skeleton emits conformant SSE, 15 tests pass | -| A2 | Claude Agent SDK `query()` with `includePartialMessages` provides all streaming event types needed for CLI-quality feedback | **validated** | D8 | Walking skeleton | Validated: adapter translates stream_event messages correctly | -| A3 | Separating interviewer from observer produces better interview quality than inline tool calling | medium | D1 | Observer agent | Compare interview coherence with and without tool-calling load | -| A4 | Observer extraction completes in 1-3s during user read/think time (10-60s), adding zero perceived latency | medium | D1 | Observer agent | Measure extraction latency with realistic turn payloads | -| A5 | `better-sqlite3` npm prebuilt binary works across macOS/Linux without native compilation issues | **validated** | D7 | SQLite foundation | Validated: installed on macOS without native compilation issues | -| A6 | Turn-tree branching in SQLite is sufficient for decision revisit and undo in a single-user tool | high | D7 | Turn tree | Validate with realistic branch/merge scenarios | -| A7 | Users arriving at the tool have a reasonably defined goal | medium | — | Scope phase | User testing; exploratory pathway deferred if false | -| A8 | A single Express port serving API + static assets is sufficient for npx distribution | **validated** | D10 | npx distribution | Validated: Vite proxy to Express works in dev; single port | -| A9 | TanStack AI is too immature for a deliverable (alpha, v0) | medium | D9 | — | Re-evaluate if AI SDK becomes constraining | -| A10 | The `useChat` hook can consume custom SSE without AI SDK server runtime | **validated** | D9 | Walking skeleton | Validated: useChat consumes custom SSE via DefaultChatTransport | -| A11 | Stateless `query()` with prompt-stuffed history is sufficient for multi-turn interviewing — SDK session persistence is unnecessary and undesirable | **validated** | D8, D12 | SQLite foundation | Validated: formatting history into prompt works. SDK sessions rejected as competing source of truth — opaque, machine-local, incompatible with portable data goals (atomic YAML / git-versionable). Turn tree is sole session model. | -| A12 | `useChat` hook accepts initial messages to hydrate conversation state from server-stored history | **validated** | D9 | SQLite foundation | Validated: `useChat` doesn't have `initialMessages` prop but `setMessages` works for hydration | -| A13 | Claude Agent SDK supports defining interview phases as agent skills with distinct system prompts and tool sets | medium | D2 | Interview phases | Test SDK skill/agent configuration API | -| A14 | A second-thread observer agent can reliably extract decisions, assumptions, and dependency edges from a single turn's Q&A | medium | D1 | Observer agent | Probe with realistic interview exchanges; measure extraction fidelity | -| A15 | The LLM can reliably judge when a phase interview has reached sufficient understanding (is_resolution) | medium | D3 | Phase resolution | Probe across varied project types; measure false-positive resolution rate | -| A16 | AI SDK `useChat` hook's `ToolUIPart` state machine (`input-streaming` → `input-available` → `output-available` / `output-error` / `approval-requested` → `approval-responded` / `output-denied`) models all permutations of pending, error, and success for both interim (thinking, tool calls) and final (response) data | high | D14 | Rich chat UI | Validate by extending SSE adapter to emit tool-call events, confirm `useChat` surfaces all states | -| A17 | AI Elements copy-paste components can be restyled without forking — they are ownable source files, not npm-locked dependencies | high | D14 | Rich chat UI | Install via CLI, inspect source, confirm no hidden npm runtime dependency | +| # | Assumption | Confidence | Dependent decisions | Implicated slices | Validation approach | +| --- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | ------------------- | ----------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | +| A1 | AI SDK's UI Message Stream SSE protocol is documented and stable enough to emit conformantly without importing AI SDK server-side | **validated** | D8 | Walking skeleton | Validated: skeleton emits conformant SSE, 15 tests pass | +| A2 | Claude Agent SDK `query()` with `includePartialMessages` provides all streaming event types needed for CLI-quality feedback | **validated** | D8 | Walking skeleton | Validated: adapter translates stream_event messages correctly | +| A3 | Separating interviewer from observer produces better interview quality than inline tool calling | medium | D1 | Observer agent | Compare interview coherence with and without tool-calling load | +| A4 | Observer extraction completes in 1-3s during user read/think time (10-60s), adding zero perceived latency | medium | D1 | Observer agent | Measure extraction latency with realistic turn payloads | +| A5 | `better-sqlite3` npm prebuilt binary works across macOS/Linux without native compilation issues | **validated** | D7 | SQLite foundation | Validated: installed on macOS without native compilation issues | +| A6 | Turn-tree branching in SQLite is sufficient for decision revisit and undo in a single-user tool | high | D7 | Turn tree | Validate with realistic branch/merge scenarios | +| A7 | Users arriving at the tool have a reasonably defined goal | medium | — | Scope phase | User testing; exploratory pathway deferred if false | +| A8 | A single Express port serving API + static assets is sufficient for npx distribution | **validated** | D10 | npx distribution | Validated: Vite proxy to Express works in dev; single port | +| A9 | TanStack AI is too immature for a deliverable (alpha, v0) | medium | D9 | — | Re-evaluate if AI SDK becomes constraining | +| A10 | The `useChat` hook can consume custom SSE without AI SDK server runtime | **validated** | D9 | Walking skeleton | Validated: useChat consumes custom SSE via DefaultChatTransport | +| A11 | Stateless `query()` with prompt-stuffed history is sufficient for multi-turn interviewing — SDK session persistence is unnecessary and undesirable | **validated** | D8, D12 | SQLite foundation | Validated: formatting history into prompt works. SDK sessions rejected as competing source of truth — opaque, machine-local, incompatible with portable data goals (atomic YAML / git-versionable). Turn tree is sole session model. | +| A12 | `useChat` hook accepts initial messages to hydrate conversation state from server-stored history | **validated** | D9 | SQLite foundation | Validated: `useChat` doesn't have `initialMessages` prop but `setMessages` works for hydration | +| A13 | Claude Agent SDK supports defining interview phases as agent skills with distinct system prompts and tool sets | medium | D2 | Interview phases | Test SDK skill/agent configuration API | +| A14 | A second-thread observer agent can reliably extract decisions, assumptions, and dependency edges from a single turn's Q&A | medium | D1 | Observer agent | Probe with realistic interview exchanges; measure extraction fidelity | +| A15 | The LLM can reliably judge when a phase interview has reached sufficient understanding (is_resolution) | medium | D3 | Phase resolution | Probe across varied project types; measure false-positive resolution rate | +| A16 | AI SDK `useChat` hook's `ToolUIPart` state machine (`input-streaming` → `input-available` → `output-available` / `output-error` / `approval-requested` → `approval-responded` / `output-denied`) models all permutations of pending, error, and success for both interim (thinking, tool calls) and final (response) data | high | D14 | Rich chat UI | Validate by extending SSE adapter to emit tool-call events, confirm `useChat` surfaces all states | +| A17 | AI Elements copy-paste components can be restyled without forking — they are ownable source files, not npm-locked dependencies | high | D14 | Rich chat UI | Install via CLI, inspect source, confirm no hidden npm runtime dependency | +| A18 | Drizzle ORM migration runner reliably auto-applies schema changes from a migrations folder at startup with better-sqlite3 | medium | D18 | Drizzle refactor | Test with schema change on existing DB file | +| A19 | `AsyncIterable` from core can be consumed by both SSE streaming (web) and line-by-line terminal output (CLI) without buffering issues | high | D19 | Core extraction | Validate with web adapter and simple terminal consumer | ## Decisions @@ -89,20 +92,27 @@ The architecture: 3. **Phase resolution via LLM judgment** — A turn's `is_resolution` flag is set by the interviewing agent when it judges that shared understanding has been reached for that phase. The active path is resolved for a phase when its latest turn has `is_resolution = true`. Spec export requires all phases resolved. Depends on: A15. Supersedes: —. 4. **Two-agent pattern (interviewer + observer)** — The interviewer focuses solely on conducting the interview with structured questions. After each answered turn, a separate observer agent extracts decisions, assumptions, and dependency edges. The observer can use a cheaper/faster model. Keeps the interviewer prompt clean and extraction independently testable. Depends on: A3, A4, A14. Supersedes: —. 5. **Decision dependency graph** — Decisions depend on prior decisions and/or assumptions via `decision_parent_decision` and `decision_parent_assumption` join tables. Assumptions can depend on prior assumptions via `assumption_parent_assumption`. The observer agent captures these edges during extraction. Depends on: A14. Supersedes: —. -6. **Soft invalidation for requirements and criteria** — When a decision is revisited (branch fork), requirements traced to that decision are flagged for re-review via stale `reviewed_at` timestamps. Criteria inherit the flag transitively from their requirements. The agent handles re-qualification holistically, not mechanistically. Depends on: —. Supersedes: —. +6. **Soft invalidation for requirements and criteria** — When a decision is revisited (branch fork), requirements traced to that decision are flagged for re-review via stale `reviewed_at` timestamps. Criteria inherit the flag transitively from their requirements. Mechanism specified in D17. Depends on: —. Supersedes: —. + +17. **Two invalidation mechanisms — path exclusion and flag propagation** — Path exclusion (lazy): `revisitDecision` → `branch()` moves HEAD; entities on the abandoned branch leave the active path. Requirements are stale when their source decision is not on the active path — computed by the active-path query, no eager writes. Flag propagation (eager): `falsifyAssumption` walks dependency graph edges (`assumption_parent_assumption`, `decision_parent_assumption`), marks dependents. `updateRequirement` nulls `reviewed_at` on traced criteria. Cascade model: falsify assumption → walk graph → flag dependents; revisit decision → branch → path exclusion; update requirement → flag criteria. Depends on: D1, D5, D6. Supersedes: D6's unspecified "holistic" re-qualification. 12. **Stateless SDK integration — no session persistence** — Each `query()` call uses `persistSession: false`. Conversation context is reconstructed from the turn tree's active path and injected as formatted history + structured entity summaries. SDK sessions (`resume`, `fork`, session IDs) are not used. The turn tree is the sole session model. Rationale: SDK sessions are an opaque, machine-local competing source of truth incompatible with brunch's branching semantics and future portable-data goals (atomic YAML, git-versionable). Depends on: A11. Supersedes: implicit reliance on SDK session state. 13. **Observer captures derived intelligence** — The observer agent's extraction mandate extends beyond decisions and assumptions to include derived observations (e.g. codebase analysis, domain insights) that the interviewer surfaced through tool use during a turn. These are persisted so subsequent stateless `query()` calls can inject them as context. The exact entity model is TBD — candidates include a dedicated `observation` table, enriched `decision.rationale`, or a `notes` field on `turn`. Depends on: A14, D12. Supersedes: —. 14. **AI Elements for rich chat UI components** — Copy-paste component source files (via `npx ai-elements`) from Vercel's AI Elements registry, built on shadcn/ui + Radix. Components directly consume AI SDK's `ToolUIPart` types and `useChat` hook state. Provides `Tool` (7-state lifecycle), `Reasoning` (collapsible), `ChainOfThought` (groups reasoning + tool calls), `Message`, `Conversation`, `PromptInput`. Source files are owned, not npm-locked — full restyle control. No runtime abstraction layer. Depends on: A16, A17. Supersedes: hand-rolled message rendering in App.tsx. +15. **Transitional turn-field inversion** — During the pre-structured-interview phase (slices 1–3), `turn.answer` holds the user's chat message and `turn.question` holds the agent's streamed response. This inverts the canonical interview semantics where the agent asks (`question`) and the user answers (`answer`). The inversion is temporary — slice 4 (structured interview) populates turns in their canonical direction. No schema change needed; the fields carry correct types, just with flipped temporal ordering. Client hydrates `useChat` by mapping each turn to two `UIMessage` entries (answer → user, question → assistant). Depends on: D1. Supersedes: flat `message` table with `role` field from slice 2. ### Technical stack -7. **SQLite via better-sqlite3** — Zero-config embedded DB. Turn tree, decisions, assumptions, requirements, criteria all in SQLite tables. Schema defined in `docs/design/schema.dbml`. Depends on: A5, A6. Supersedes: Dolt (docker-based). +7. **SQLite via better-sqlite3** — Zero-config embedded DB. Turn tree, decisions, assumptions, requirements, criteria all in SQLite tables. Schema defined in Drizzle (see D18; `docs/design/schema.dbml` retained as historical reference). Depends on: A5, A6. Supersedes: Dolt (docker-based). 8. **Express.js server emits AI SDK-conformant SSE** — Iterates SDK's `query()` async generator, translates each `SDKMessage` into SSE events matching AI SDK's UI Message Stream protocol via per-request translator factory. No AI SDK runtime imported server-side. Depends on: A1, A2. Supersedes: hand-rolled NDJSON streaming. -9. **React + Vite + @ai-sdk/react client** — `useChat` for conversation streaming. Custom components for decision/entity dashboard. Phase indicator and navigation. Depends on: A9, A10. Supersedes: Preact, both existing frontends. +9. **React + Vite + @ai-sdk/react + @tanstack/react-router client** — `useChat` for conversation streaming. TanStack Router for type-safe routing with route loaders for data fetching on navigation (replaces manual `useEffect` hydration). Three routes for MVP: project list (`/`), interview workspace (`/project/:id`), export preview (`/project/:id/export`). See `docs/design/BREADBOARD.md`. Depends on: A9, A10. Supersedes: Preact, both existing frontends, single-page no-routing layout. 10. **npx-launchable single-command distribution** — `bin` entry, launcher starts Express (serves built Vite assets + API on one port), opens browser. Single env var: `ANTHROPIC_API_KEY`. DB auto-created in project directory or `~/.brunch/`. Depends on: A8. Supersedes: multi-step Docker + env var setup. 11. **Drop list** — Dolt/mysql2, OpenCode sidecar, Preact, both existing frontend implementations, NDJSON protocol, JSON Schema definitions (→ Zod), @tanstack/react-table, @dnd-kit/, dompurify, marked, four streaming functions in claude.js, dispatch.js. Depends on: —. Supersedes: —. +16. **Integer autoincrement primary keys** — All entity tables use `INTEGER PRIMARY KEY AUTOINCREMENT` instead of `TEXT` UUIDs. SQLite ROWID alias is simpler, matches schema.dbml, avoids UUID generation. No external systems reference these IDs. Client coerces to strings for `useChat` hydration (`turn-${id}-answer`, `turn-${id}-question`). Depends on: D7. Supersedes: `randomUUID()` TEXT PKs from slice 2. +18. **Drizzle ORM replaces raw DDL** — TypeScript schema definition (`drizzle/schema.ts`) is single source of truth for types, DDL, and migrations. Auto-applies from `drizzle/migrations/` at startup. Drizzle Studio available for DB inspection during development. Depends on: A18, D7. Supersedes: raw DDL strings in db.ts, `docs/design/schema.dbml` as design document, hand-written TypeScript interfaces. +19. **Layered architecture with DomainEvent streaming** — Core interview orchestration extracted from Express handlers into interface-agnostic service layer. Core operations: turn tree (createProject, conductTurn, getActivePath, branch, checkout), entity lifecycle (revisitDecision, falsifyAssumption, verifyAssumption, CRUD for requirements/criteria, reviewRequirement/reviewCriterion), observer (runObserver), phase (getPhaseStatus), export (exportSpec). `conductTurn()` returns `AsyncIterable` — domain events (`thinking`, `text-delta`, `turn-created`, `observer-complete`) that each adapter translates to its transport format. Web (Express+SSE), CLI, and MCP adapters are thin transport layers. Depends on: A19, D8, D12. Supersedes: interview logic embedded in Express POST handler. +20. **CLI executable with subcommands** — `npx brunch` launches web UI (default). `npx brunch [command]` for CLI operations on the same DB. Future: sidecar MCP server. Depends on: D10, D19. Supersedes: web-only distribution model in D10. ## Invariants @@ -112,16 +122,18 @@ The architecture: Established by ln-build/ln-spike traceability. Referenced by PLAN.md slices (to establish / to respect). --> -| # | Invariant | Established by | Protected by | Proves | -| --- | ---------------------------- | ------------------ | --------------------------------- | ------ | -| I1 | SSE protocol conformance | Slice 1 (skeleton) | sse-adapter.test.ts | D8 | -| I2 | Stream lifecycle correctness | Slice 1 (skeleton) | app.test.ts | D8 | -| I3 | Thinking/text separation | Slice 1 (skeleton) | sse-adapter.test.ts, app.test.ts | D8 | -| I4 | Vite proxy routing | Slice 1 (skeleton) | vite.config.ts (manual) | D10 | -| I5 | DB lifecycle correctness | Slice 2 (SQLite) | db.test.ts | D7 | -| I6 | Message persistence | Slice 2 (SQLite) | db.test.ts, app.test.ts | D7 | -| I7 | Tool call SSE conformance | Slice 3b (rich UI) | sse-adapter.test.ts | D8, D14 | -| I8 | Tool part state rendering | Slice 3b (rich UI) | manual (outer loop) | D14 | +| # | Invariant | Established by | Protected by | Proves | +| --- | ---------------------------- | ------------------- | -------------------------------- | ------- | +| I1 | SSE protocol conformance | Slice 1 (skeleton) | sse-adapter.test.ts | D8 | +| I2 | Stream lifecycle correctness | Slice 1 (skeleton) | app.test.ts | D8 | +| I3 | Thinking/text separation | Slice 1 (skeleton) | sse-adapter.test.ts, app.test.ts | D8 | +| I4 | Vite proxy routing | Slice 1 (skeleton) | vite.config.ts (manual) | D10 | +| I5 | DB lifecycle correctness | Slice 2 (SQLite) | db.test.ts | D7 | +| I6 | Turn persistence | Slice 3 (turn tree) | db.test.ts, app.test.ts | D1, D7 | +| I7 | Tool call SSE conformance | Slice 3b (rich UI) | sse-adapter.test.ts | D8, D14 | +| I8 | Tool part state rendering | Slice 3b (rich UI) | manual (outer loop) | D14 | +| I9 | Turn tree parent chain | Slice 3 (turn tree) | db.test.ts | D1 | +| I10 | Active path resolution | Slice 3 (turn tree) | db.test.ts | D1 | ## Lexicon @@ -131,34 +143,40 @@ The architecture: ### Method terms -| Term | Definition | -| --------------- | --------------------------------------------------------------------------------------------- | +| Term | Definition | +| --------------- | ---------------------------------------------------------------------------------------------- | | **assumption** | A falsifiable belief accepted as true; tracked with confidence, linked to decisions and slices | -| **decision** | A recorded choice that resolves a question; ordered, with supersession chain | -| **invariant** | A structural property proven by implementation and protected by tests; must not regress | -| **requirement** | A capability the system must provide | -| **slice** | A thin end-to-end tracer-bullet path through all integration layers | -| **spike** | A time-boxed throwaway investigation to answer one hard question | +| **decision** | A recorded choice that resolves a question; ordered, with supersession chain | +| **invariant** | A structural property proven by implementation and protected by tests; must not regress | +| **requirement** | A capability the system must provide | +| **slice** | A thin end-to-end tracer-bullet path through all integration layers | +| **spike** | A time-boxed throwaway investigation to answer one hard question | ### Domain terms -| Term | Definition | -| ----------------------- | --------------------------------------------------------------------------------------------------------------------------- | -| **project** | A spec elicitation session. Has a name, a HEAD pointer (`active_turn_id`), and phase completion state | -| **turn** | One question-answer pair in the interview. Carries phase provenance, options, grounding ("why"), impact signal, and the user's answer. Points to its parent turn — the turn tree is the version history | -| **option** | A structured alternative presented in a turn. At least two per turn. One may be recommended; one is selected by the user | -| **decision** | A resolved fork in the design tree. Extracted by the observer from an answered turn. Depends on prior decisions and/or assumptions. Traced back to its source turn via `turn_decision` | -| **assumption** | A falsifiable belief a decision rests on. Extracted by the observer. Can depend on prior assumptions. Traced back to its source turn via `turn_assumption` | -| **requirement** | What the system must do. Accumulated during the design drill-down, confirmed during the requirements review phase. Traced to source decisions via `requirement_decision`. Has `reviewed_at` for soft-invalidation | -| **criterion** | A testable condition verifying a requirement. Proposed by the agent during the criteria phase, confirmed by the user. Has `reviewed_at` for soft-invalidation | -| **active path** | The branch from HEAD to root in the turn tree. Determines which turns, decisions, and assumptions are currently active | -| **phase** | A stage of the interview: `scope`, `design`, `requirements`, `criteria`. Immutable provenance on each turn. Each phase is backed by an agent skill | -| **phase resolution** | LLM judgment that shared understanding has been reached for a phase. Marked by `turn.is_resolution = true` on the last turn of a phase | -| **interviewer** | The primary agent role: conducts the interview with structured questions, grounding, and impact signals. Does not extract entities | -| **observer** | The secondary agent role: extracts decisions, assumptions, and dependency edges from each answered turn. Runs post-answer during user read time | -| **decision graph** | The DAG of decisions and their dependencies (on prior decisions and assumptions). Revisiting a decision forks the turn tree | -| **soft invalidation** | When a decision is revisited, requirements traced to it are flagged for re-review (stale `reviewed_at`). Criteria inherit the flag transitively. The agent re-qualifies holistically | -| **spec readiness** | Compound predicate: all four phases resolved AND requirements reviewed AND criteria confirmed. Only then is export enabled | +| Term | Definition | +| --------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| **project** | A spec elicitation session. Has a name, a HEAD pointer (`active_turn_id`), and phase completion state | +| **turn** | One question-answer pair in the interview. Carries phase provenance, options, grounding ("why"), impact signal, and the user's answer. Points to its parent turn — the turn tree is the version history | +| **option** | A structured alternative presented in a turn. At least two per turn. One may be recommended; one is selected by the user | +| **decision** | A resolved fork in the design tree. Extracted by the observer from an answered turn. Depends on prior decisions and/or assumptions. Traced back to its source turn via `turn_decision` | +| **assumption** | A falsifiable belief a decision rests on. Extracted by the observer. Can depend on prior assumptions. Traced back to its source turn via `turn_assumption` | +| **requirement** | What the system must do. Accumulated during the design drill-down, confirmed during the requirements review phase. Traced to source decisions via `requirement_decision`. Has `reviewed_at` for soft-invalidation | +| **criterion** | A testable condition verifying a requirement. Proposed by the agent during the criteria phase, confirmed by the user. Has `reviewed_at` for soft-invalidation | +| **active path** | The branch from HEAD to root in the turn tree. Determines which turns, decisions, and assumptions are currently active | +| **branch** (verb) | Fork the turn tree from a given turn, creating a new path and moving HEAD. Analogous to git branch + checkout | +| **checkout** (verb) | Move HEAD to an existing turn on a different branch without creating new turns. Analogous to git checkout | +| **phase** | A stage of the interview: `scope`, `design`, `requirements`, `criteria`. Immutable provenance on each turn. Each phase is backed by an agent skill | +| **phase resolution** | LLM judgment that shared understanding has been reached for a phase. Marked by `turn.is_resolution = true` on the last turn of a phase | +| **interviewer** | The primary agent role: conducts the interview with structured questions, grounding, and impact signals. Does not extract entities | +| **observer** | The secondary agent role: extracts decisions, assumptions, and dependency edges from each answered turn. Runs post-answer during user read time | +| **core** | The interface-agnostic service layer between the database and transport adapters. Owns interview orchestration, entity lifecycle, observer invocation. Returns `AsyncIterable` for streaming | +| **domain event** | A typed event yielded by `conductTurn()` — `thinking`, `text-delta`, `turn-created`, `observer-complete`, etc. Each adapter translates to its transport format (SSE, terminal, MCP) | +| **decision graph** | The DAG of decisions and their dependencies (on prior decisions and assumptions). Revisiting a decision forks the turn tree | +| **path exclusion** | Invalidation by moving HEAD so entities on the abandoned branch leave the active path. Lazy — computed by the active-path query, no eager writes. Triggered by `revisitDecision` / `branch` | +| **flag propagation** | Invalidation by walking dependency graph edges and marking entities stale (nulling `reviewed_at`). Eager — triggered by `falsifyAssumption` or `updateRequirement` | +| **soft invalidation** | Umbrella term for both path exclusion and flag propagation. Entities are flagged for re-review but never deleted or modified. See D17 | +| **spec readiness** | Compound predicate: all four phases resolved AND requirements reviewed AND criteria confirmed. Only then is export enabled | ## Verification Design @@ -169,15 +187,15 @@ The architecture: ### Verification Commands -| Step | Check | Command | -| ---- | -------------- | ---------------------- | -| 1 | Type checking | `npx tsc --noEmit` | -| 2 | Unit tests | `npx vitest run` | -| 3 | Build | `npx vite build` | +| Step | Check | Command | +| ---- | ------------- | ------------------ | +| 1 | Type checking | `npx tsc --noEmit` | +| 2 | Unit tests | `npx vitest run` | +| 3 | Build | `npx vite build` | ### Verification Policy -End-to-end slices must be **user-testable**, not just programmatically tested. Each slice that touches the user-facing boundary should be manually verifiable via `npm run dev` (or equivalent). Use `/tool-cmux` for dev server panes and `/tool-cdp-cli` for browser interaction during outer-loop verification. +End-to-end slices must be **user-testable**, not just programmatically tested. Each slice that touches the user-facing boundary should be manually verifiable via `npm run dev` (or equivalent). Use `/cli-cmux` for dev server panes and `/cli-cdp` for browser interaction during outer-loop verification. ### Feedback Loops @@ -202,11 +220,11 @@ End-to-end slices must be **user-testable**, not just programmatically tested. E -| File | Tests | Protects | -| ------------------------ | ----- | ---------------- | -| sse-adapter.test.ts | 10 | I1, I3 | -| app.test.ts | 8 | I2, I3, I5, I6 | -| db.test.ts | 10 | I5, I6 | +| File | Tests | Protects | +| ------------------- | ----- | --------------- | +| sse-adapter.test.ts | 12 | I1, I3 | +| db.test.ts | 18 | I5, I6, I9, I10 | +| app.test.ts | 9 | I2, I3, I6 | ## Acceptance Criteria (exit conditions) diff --git a/src/client/App.tsx b/src/client/App.tsx index 93a15a06..d4ad75ca 100644 --- a/src/client/App.tsx +++ b/src/client/App.tsx @@ -8,17 +8,29 @@ export function App() { const { messages, sendMessage, setMessages, status, error } = useChat(); const isLoading = status === 'submitted' || status === 'streaming'; - // Fetch conversation history on mount + // Fetch conversation history on mount — hydrate turns into useChat messages useEffect(() => { fetch('/api/projects/current') .then((res) => res.json()) .then((data) => { - if (data.messages?.length > 0) { - const msgs: UIMessage[] = data.messages.map((m: { id: string; role: string; content: string }) => ({ - id: m.id, - role: m.role as 'user' | 'assistant', - parts: [{ type: 'text' as const, text: m.content }], - })); + if (data.turns?.length > 0) { + const msgs: UIMessage[] = []; + for (const turn of data.turns as Array<{ id: number; answer: string | null; question: string | null }>) { + if (turn.answer) { + msgs.push({ + id: `turn-${turn.id}-answer`, + role: 'user', + parts: [{ type: 'text' as const, text: turn.answer }], + }); + } + if (turn.question) { + msgs.push({ + id: `turn-${turn.id}-question`, + role: 'assistant', + parts: [{ type: 'text' as const, text: turn.question }], + }); + } + } setMessages(msgs); } setLoading(false); diff --git a/src/server/app.test.ts b/src/server/app.test.ts index ada1b5a2..b251c836 100644 --- a/src/server/app.test.ts +++ b/src/server/app.test.ts @@ -198,35 +198,56 @@ describe('POST /api/chat', () => { }); }); -describe('POST /api/chat — persistence', () => { - it('persists user and assistant messages to the database', async () => { +describe('POST /api/chat — turn persistence', () => { + it('creates a turn with user answer and advances HEAD', async () => { mockQuery.mockReturnValue(mockTextStream('Hi there')); await request(app) .post('/api/chat') .send({ messages: [{ role: 'user', content: 'hello' }] }); - const { getOrCreateProject, getMessages } = await import('./db.js'); + const { getOrCreateProject, getActivePath } = await import('./db.js'); const project = getOrCreateProject(db); - const messages = getMessages(db, project.id); - expect(messages).toHaveLength(2); - expect(messages[0]).toMatchObject({ role: 'user', content: 'hello' }); - expect(messages[1]).toMatchObject({ role: 'assistant' }); - expect(messages[1].content).toContain('Hi there'); + expect(project.active_turn_id).not.toBeNull(); + const turns = getActivePath(db, project.id); + expect(turns).toHaveLength(1); + expect(turns[0].answer).toBe('hello'); + expect(turns[0].question).toContain('Hi there'); + expect(turns[0].phase).toBe('scope'); + }); + + it('chains turns with parent pointers across exchanges', async () => { + mockQuery.mockReturnValue(mockTextStream('First response')); + await request(app) + .post('/api/chat') + .send({ messages: [{ role: 'user', content: 'first' }] }); + + mockQuery.mockReturnValue(mockTextStream('Second response')); + await request(app) + .post('/api/chat') + .send({ messages: [{ role: 'user', content: 'second' }] }); + + const { getOrCreateProject, getActivePath } = await import('./db.js'); + const project = getOrCreateProject(db); + const turns = getActivePath(db, project.id); + expect(turns).toHaveLength(2); + expect(turns[0].answer).toBe('first'); + expect(turns[1].answer).toBe('second'); + expect(turns[1].parent_turn_id).toBe(turns[0].id); }); }); describe('GET /api/projects/current', () => { - it('returns a project with empty messages when no history exists', async () => { + it('returns a project with empty turns when no history exists', async () => { const res = await request(app) .get('/api/projects/current') .expect(200); expect(res.body.project).toMatchObject({ name: 'default' }); - expect(res.body.messages).toEqual([]); + expect(res.body.turns).toEqual([]); }); - it('returns existing messages after a chat exchange', async () => { + it('returns turns on active path after a chat exchange', async () => { mockQuery.mockReturnValue(mockTextStream('Hi')); await request(app) @@ -237,7 +258,8 @@ describe('GET /api/projects/current', () => { .get('/api/projects/current') .expect(200); - expect(res.body.messages).toHaveLength(2); - expect(res.body.messages[0]).toMatchObject({ role: 'user', content: 'hello' }); + expect(res.body.turns).toHaveLength(1); + expect(res.body.turns[0].answer).toBe('hello'); + expect(res.body.turns[0].question).toContain('Hi'); }); }); diff --git a/src/server/app.ts b/src/server/app.ts index a2f40d7b..27c4e729 100644 --- a/src/server/app.ts +++ b/src/server/app.ts @@ -2,7 +2,7 @@ import express from 'express'; import type { Request, Response } from 'express'; import { query } from '@anthropic-ai/claude-agent-sdk'; import { createTranslator, formatSSE, type AIEvent } from './sse-adapter.js'; -import { createDb, getOrCreateProject, saveMessage, getMessages, type Message } from './db.js'; +import { createDb, getOrCreateProject, createTurn, updateTurn, getActivePath, advanceHead, type Turn } from './db.js'; /** Extract user text from a UIMessage (parts[]) or legacy format (content string). */ function extractPrompt(messages: unknown[]): string { @@ -13,13 +13,16 @@ function extractPrompt(messages: unknown[]): string { return parts?.filter((p) => p.type === 'text').map((p) => p.text).join('') ?? ''; } -/** Format conversation history for multi-turn context (A11 workaround). */ -function formatHistory(history: Message[], currentPrompt: string): string { - if (history.length === 0) return currentPrompt; - const historyText = history - .map((m) => `${m.role === 'user' ? 'User' : 'Assistant'}: ${m.content}`) - .join('\n'); - return `Previous conversation:\n${historyText}\n\n---\nUser: ${currentPrompt}`; +/** Format conversation history from active-path turns for multi-turn context. */ +function formatHistory(turns: Turn[], currentPrompt: string): string { + if (turns.length === 0) return currentPrompt; + const lines: string[] = []; + for (const turn of turns) { + if (turn.answer) lines.push(`User: ${turn.answer}`); + if (turn.question) lines.push(`Assistant: ${turn.question}`); + } + if (lines.length === 0) return currentPrompt; + return `Previous conversation:\n${lines.join('\n')}\n\n---\nUser: ${currentPrompt}`; } /** Collect assistant text content from translated SSE events. */ @@ -37,8 +40,8 @@ export function createApp(dbPath?: string) { app.get('/api/projects/current', (_req: Request, res: Response) => { const project = getOrCreateProject(db); - const messages = getMessages(db, project.id); - res.json({ project, messages }); + const turns = getActivePath(db, project.id); + res.json({ project, turns }); }); app.post('/api/chat', async (req: Request, res: Response) => { @@ -47,10 +50,17 @@ export function createApp(dbPath?: string) { console.log('POST /api/chat — prompt:', JSON.stringify(prompt).substring(0, 100)); const project = getOrCreateProject(db); - const history = getMessages(db, project.id); - saveMessage(db, project.id, 'user', prompt); + const activePath = getActivePath(db, project.id); - const fullPrompt = formatHistory(history, prompt); + // Create turn: answer = user's message, question = '' (filled after streaming) + const turn = createTurn(db, project.id, { + parent_turn_id: project.active_turn_id, + phase: 'scope', + question: '', + answer: prompt, + }); + + const fullPrompt = formatHistory(activePath, prompt); res.setHeader('Content-Type', 'text/event-stream'); res.setHeader('Cache-Control', 'no-cache'); @@ -85,8 +95,9 @@ export function createApp(dbPath?: string) { } if (assistantText) { - saveMessage(db, project.id, 'assistant', assistantText); + updateTurn(db, turn.id, { question: assistantText }); } + advanceHead(db, project.id, turn.id); res.write(formatSSE('[DONE]')); res.end(); diff --git a/src/server/db.test.ts b/src/server/db.test.ts index 22aaf9a7..55764eb0 100644 --- a/src/server/db.test.ts +++ b/src/server/db.test.ts @@ -3,7 +3,16 @@ import { existsSync, unlinkSync, mkdirSync } from 'fs'; import { join } from 'path'; import { tmpdir } from 'os'; import { randomUUID } from 'crypto'; -import { createDb, getOrCreateProject, saveMessage, getMessages, type DB } from './db.js'; +import { + createDb, + getOrCreateProject, + createTurn, + updateTurn, + createOption, + getActivePath, + advanceHead, + type DB, +} from './db.js'; let db: DB; @@ -16,13 +25,29 @@ afterEach(() => { }); describe('createDb', () => { - it('creates project and message tables', () => { + it('creates all 13 tables from schema.dbml', () => { const tables = db .prepare("SELECT name FROM sqlite_master WHERE type='table' ORDER BY name") .all() as Array<{ name: string }>; const names = tables.map((t) => t.name); - expect(names).toContain('project'); - expect(names).toContain('message'); + const expected = [ + 'project', + 'turn', + 'option', + 'decision', + 'assumption', + 'requirement', + 'criterion', + 'turn_decision', + 'turn_assumption', + 'decision_parent_decision', + 'decision_parent_assumption', + 'assumption_parent_assumption', + 'requirement_decision', + ]; + for (const table of expected) { + expect(names).toContain(table); + } }); it('creates database file on disk when given a path', () => { @@ -48,9 +73,9 @@ describe('createDb', () => { }); describe('getOrCreateProject', () => { - it('creates a default project when none exists', () => { + it('creates a default project with null active_turn_id', () => { const project = getOrCreateProject(db); - expect(project).toMatchObject({ name: 'default' }); + expect(project).toMatchObject({ name: 'default', active_turn_id: null }); expect(project.id).toBeDefined(); expect(project.created_at).toBeDefined(); }); @@ -62,43 +87,140 @@ describe('getOrCreateProject', () => { }); }); -describe('saveMessage / getMessages', () => { - it('persists user and assistant messages', () => { +describe('turn CRUD', () => { + it('creates a root turn with no parent', () => { const project = getOrCreateProject(db); - saveMessage(db, project.id, 'user', 'hello'); - saveMessage(db, project.id, 'assistant', 'hi there'); - const messages = getMessages(db, project.id); - expect(messages).toHaveLength(2); - expect(messages[0]).toMatchObject({ role: 'user', content: 'hello' }); - expect(messages[1]).toMatchObject({ role: 'assistant', content: 'hi there' }); + const turn = createTurn(db, project.id, { + phase: 'scope', + question: 'What is the project about?', + answer: 'A chat app', + }); + expect(turn.id).toBeDefined(); + expect(turn.parent_turn_id).toBeNull(); + expect(turn.phase).toBe('scope'); + expect(turn.question).toBe('What is the project about?'); + expect(turn.answer).toBe('A chat app'); + expect(turn.is_resolution).toBe(0); }); - it('returns messages ordered by creation time', () => { + it('creates child turns with parent chain', () => { const project = getOrCreateProject(db); - saveMessage(db, project.id, 'user', 'first'); - saveMessage(db, project.id, 'assistant', 'second'); - saveMessage(db, project.id, 'user', 'third'); - const messages = getMessages(db, project.id); - expect(messages.map((m) => m.content)).toEqual(['first', 'second', 'third']); + const t1 = createTurn(db, project.id, { phase: 'scope', question: 'Q1', answer: 'A1' }); + const t2 = createTurn(db, project.id, { phase: 'scope', question: 'Q2', answer: 'A2', parent_turn_id: t1.id }); + const t3 = createTurn(db, project.id, { phase: 'scope', question: 'Q3', answer: 'A3', parent_turn_id: t2.id }); + expect(t2.parent_turn_id).toBe(t1.id); + expect(t3.parent_turn_id).toBe(t2.id); }); - it('assigns unique IDs to each message', () => { + it('creates options for a turn', () => { const project = getOrCreateProject(db); - saveMessage(db, project.id, 'user', 'a'); - saveMessage(db, project.id, 'assistant', 'b'); - const messages = getMessages(db, project.id); - expect(messages[0].id).not.toBe(messages[1].id); + const turn = createTurn(db, project.id, { phase: 'scope', question: 'Pick one' }); + const opt1 = createOption(db, turn.id, { position: 0, content: 'Option A', is_recommended: true }); + const opt2 = createOption(db, turn.id, { position: 1, content: 'Option B' }); + expect(opt1.is_recommended).toBe(1); + expect(opt1.content).toBe('Option A'); + expect(opt2.is_recommended).toBe(0); }); - it('returns empty array for project with no messages', () => { + it('enforces unique (turn_id, position) on options', () => { const project = getOrCreateProject(db); - const messages = getMessages(db, project.id); - expect(messages).toEqual([]); + const turn = createTurn(db, project.id, { phase: 'scope', question: 'Pick one' }); + createOption(db, turn.id, { position: 0, content: 'Option A' }); + expect(() => createOption(db, turn.id, { position: 0, content: 'Duplicate' })).toThrow(); + }); + + it('updates turn answer and question', () => { + const project = getOrCreateProject(db); + const turn = createTurn(db, project.id, { phase: 'scope', question: '' }); + updateTurn(db, turn.id, { question: 'Updated Q', answer: 'User said this' }); + const updated = db.prepare('SELECT * FROM turn WHERE id = ?').get(turn.id) as any; + expect(updated.question).toBe('Updated Q'); + expect(updated.answer).toBe('User said this'); + }); + + it('partial update only changes specified fields', () => { + const project = getOrCreateProject(db); + const turn = createTurn(db, project.id, { phase: 'scope', question: 'Original Q', answer: 'Original A' }); + updateTurn(db, turn.id, { question: 'New Q' }); + const updated = db.prepare('SELECT * FROM turn WHERE id = ?').get(turn.id) as any; + expect(updated.question).toBe('New Q'); + expect(updated.answer).toBe('Original A'); + }); +}); + +describe('active path resolution', () => { + it('returns empty array when no HEAD is set', () => { + const project = getOrCreateProject(db); + const path = getActivePath(db, project.id); + expect(path).toEqual([]); + }); + + it('resolves linear chain from root to HEAD', () => { + const project = getOrCreateProject(db); + const t1 = createTurn(db, project.id, { phase: 'scope', question: 'Q1', answer: 'A1' }); + const t2 = createTurn(db, project.id, { phase: 'scope', question: 'Q2', answer: 'A2', parent_turn_id: t1.id }); + const t3 = createTurn(db, project.id, { phase: 'scope', question: 'Q3', answer: 'A3', parent_turn_id: t2.id }); + advanceHead(db, project.id, t3.id); + + const path = getActivePath(db, project.id); + expect(path).toHaveLength(3); + expect(path.map((t) => t.id)).toEqual([t1.id, t2.id, t3.id]); + }); + + it('resolves correct branch after fork', () => { + const project = getOrCreateProject(db); + const t1 = createTurn(db, project.id, { phase: 'scope', question: 'Q1', answer: 'A1' }); + const t2a = createTurn(db, project.id, { phase: 'scope', question: 'Q2a', answer: 'A2a', parent_turn_id: t1.id }); + const t2b = createTurn(db, project.id, { phase: 'scope', question: 'Q2b', answer: 'A2b', parent_turn_id: t1.id }); + + // HEAD at branch b + advanceHead(db, project.id, t2b.id); + const pathB = getActivePath(db, project.id); + expect(pathB.map((t) => t.id)).toEqual([t1.id, t2b.id]); + + // Switch HEAD to branch a + advanceHead(db, project.id, t2a.id); + const pathA = getActivePath(db, project.id); + expect(pathA.map((t) => t.id)).toEqual([t1.id, t2a.id]); + }); + + it('handles single-turn tree (root = HEAD)', () => { + const project = getOrCreateProject(db); + const t1 = createTurn(db, project.id, { phase: 'scope', question: 'Q1', answer: 'A1' }); + advanceHead(db, project.id, t1.id); + const path = getActivePath(db, project.id); + expect(path).toHaveLength(1); + expect(path[0].id).toBe(t1.id); + }); + + it('resolves deep fork correctly', () => { + const project = getOrCreateProject(db); + const t1 = createTurn(db, project.id, { phase: 'scope', question: 'Q1', answer: 'A1' }); + const t2 = createTurn(db, project.id, { phase: 'scope', question: 'Q2', answer: 'A2', parent_turn_id: t1.id }); + const t3 = createTurn(db, project.id, { phase: 'scope', question: 'Q3', answer: 'A3', parent_turn_id: t2.id }); + // Fork from t2 (not from t3) + const t4 = createTurn(db, project.id, { phase: 'design', question: 'Q4', answer: 'A4', parent_turn_id: t2.id }); + const t5 = createTurn(db, project.id, { phase: 'design', question: 'Q5', answer: 'A5', parent_turn_id: t4.id }); + + advanceHead(db, project.id, t5.id); + const path = getActivePath(db, project.id); + expect(path.map((t) => t.id)).toEqual([t1.id, t2.id, t4.id, t5.id]); + // t3 is on the other branch — not in the active path + }); +}); + +describe('advanceHead', () => { + it('updates project active_turn_id', () => { + const project = getOrCreateProject(db); + const turn = createTurn(db, project.id, { phase: 'scope', question: 'Q1' }); + advanceHead(db, project.id, turn.id); + const updated = getOrCreateProject(db); + expect(updated.active_turn_id).toBe(turn.id); }); }); -describe('DB lifecycle', () => { - it('create → persist → close → reopen → state intact', () => { +describe('DB lifecycle — turn tree persistence', () => { + it('create → persist turns → close → reopen → state intact', () => { const dir = join(tmpdir(), `brunch-test-${randomUUID()}`); mkdirSync(dir, { recursive: true }); const dbPath = join(dir, 'lifecycle.db'); @@ -106,21 +228,28 @@ describe('DB lifecycle', () => { // Create and populate const db1 = createDb(dbPath); const project = getOrCreateProject(db1); - saveMessage(db1, project.id, 'user', 'hello'); - saveMessage(db1, project.id, 'assistant', 'world'); + const t1 = createTurn(db1, project.id, { phase: 'scope', question: 'Q1', answer: 'A1' }); + const t2 = createTurn(db1, project.id, { phase: 'scope', question: 'Q2', answer: 'A2', parent_turn_id: t1.id }); + createOption(db1, t1.id, { position: 0, content: 'Opt A', is_recommended: true }); + createOption(db1, t1.id, { position: 1, content: 'Opt B' }); + advanceHead(db1, project.id, t2.id); db1.close(); // Reopen and verify const db2 = createDb(dbPath); - const reopenedProject = getOrCreateProject(db2); - expect(reopenedProject.id).toBe(project.id); - const messages = getMessages(db2, reopenedProject.id); - expect(messages).toHaveLength(2); - expect(messages[0]).toMatchObject({ role: 'user', content: 'hello' }); - expect(messages[1]).toMatchObject({ role: 'assistant', content: 'world' }); + const reopened = getOrCreateProject(db2); + expect(reopened.id).toBe(project.id); + expect(reopened.active_turn_id).toBe(t2.id); + const path = getActivePath(db2, reopened.id); + expect(path).toHaveLength(2); + expect(path[0].question).toBe('Q1'); + expect(path[1].question).toBe('Q2'); + // Verify options survived + const options = db2.prepare('SELECT * FROM option WHERE turn_id = ? ORDER BY position').all(t1.id) as any[]; + expect(options).toHaveLength(2); + expect(options[0].content).toBe('Opt A'); db2.close(); - // Cleanup unlinkSync(dbPath); }); }); diff --git a/src/server/db.ts b/src/server/db.ts index 85b42d2c..2f32b742 100644 --- a/src/server/db.ts +++ b/src/server/db.ts @@ -1,38 +1,153 @@ import Database from 'better-sqlite3'; -import { randomUUID } from 'crypto'; export type DB = Database.Database; -export type Role = 'user' | 'assistant'; +export type Phase = 'scope' | 'design' | 'requirements' | 'criteria'; +export type Impact = 'high' | 'medium' | 'low'; export interface Project { - id: string; + id: number; name: string; + active_turn_id: number | null; created_at: string; + updated_at: string; } -export interface Message { - id: string; - project_id: string; - role: Role; - content: string; +export interface Turn { + id: number; + project_id: number; + parent_turn_id: number | null; + phase: Phase; + question: string; + why: string | null; + impact: Impact | null; + answer: string | null; + is_resolution: number; created_at: string; } +export interface Option { + id: number; + turn_id: number; + position: number; + content: string; + is_recommended: number; + is_selected: number; +} + +export interface CreateTurnInput { + parent_turn_id?: number | null; + phase: Phase; + question: string; + why?: string | null; + impact?: Impact | null; + answer?: string | null; + is_resolution?: boolean; +} + +export interface CreateOptionInput { + position: number; + content: string; + is_recommended?: boolean; + is_selected?: boolean; +} + export function createDb(path: string = ':memory:'): DB { const db = new Database(path); db.pragma('journal_mode = WAL'); db.exec(` CREATE TABLE IF NOT EXISTS project ( - id TEXT PRIMARY KEY, + id INTEGER PRIMARY KEY AUTOINCREMENT, name TEXT NOT NULL, + active_turn_id INTEGER, + created_at TEXT NOT NULL DEFAULT (datetime('now')), + updated_at TEXT NOT NULL DEFAULT (datetime('now')) + ); + + CREATE TABLE IF NOT EXISTS turn ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL REFERENCES project(id), + parent_turn_id INTEGER REFERENCES turn(id), + phase TEXT NOT NULL CHECK (phase IN ('scope', 'design', 'requirements', 'criteria')), + question TEXT NOT NULL DEFAULT '', + why TEXT, + impact TEXT CHECK (impact IS NULL OR impact IN ('high', 'medium', 'low')), + answer TEXT, + is_resolution INTEGER NOT NULL DEFAULT 0, created_at TEXT NOT NULL DEFAULT (datetime('now')) ); - CREATE TABLE IF NOT EXISTS message ( - id TEXT PRIMARY KEY, - project_id TEXT NOT NULL REFERENCES project(id), - role TEXT NOT NULL CHECK (role IN ('user', 'assistant')), + + CREATE TABLE IF NOT EXISTS option ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + turn_id INTEGER NOT NULL REFERENCES turn(id), + position INTEGER NOT NULL, content TEXT NOT NULL, - created_at TEXT NOT NULL DEFAULT (datetime('now')) + is_recommended INTEGER NOT NULL DEFAULT 0, + is_selected INTEGER NOT NULL DEFAULT 0, + UNIQUE(turn_id, position) + ); + + CREATE TABLE IF NOT EXISTS decision ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL REFERENCES project(id), + content TEXT NOT NULL, + rationale TEXT + ); + + CREATE TABLE IF NOT EXISTS assumption ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL REFERENCES project(id), + content TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS requirement ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL REFERENCES project(id), + content TEXT NOT NULL, + reviewed_at TEXT + ); + + CREATE TABLE IF NOT EXISTS criterion ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + project_id INTEGER NOT NULL REFERENCES project(id), + requirement_id INTEGER NOT NULL REFERENCES requirement(id), + content TEXT NOT NULL, + reviewed_at TEXT + ); + + CREATE TABLE IF NOT EXISTS turn_decision ( + turn_id INTEGER NOT NULL REFERENCES turn(id), + decision_id INTEGER NOT NULL REFERENCES decision(id), + PRIMARY KEY (turn_id, decision_id) + ); + + CREATE TABLE IF NOT EXISTS turn_assumption ( + turn_id INTEGER NOT NULL REFERENCES turn(id), + assumption_id INTEGER NOT NULL REFERENCES assumption(id), + PRIMARY KEY (turn_id, assumption_id) + ); + + CREATE TABLE IF NOT EXISTS decision_parent_decision ( + decision_id INTEGER NOT NULL REFERENCES decision(id), + parent_decision_id INTEGER NOT NULL REFERENCES decision(id), + PRIMARY KEY (decision_id, parent_decision_id) + ); + + CREATE TABLE IF NOT EXISTS decision_parent_assumption ( + decision_id INTEGER NOT NULL REFERENCES decision(id), + parent_assumption_id INTEGER NOT NULL REFERENCES assumption(id), + PRIMARY KEY (decision_id, parent_assumption_id) + ); + + CREATE TABLE IF NOT EXISTS assumption_parent_assumption ( + assumption_id INTEGER NOT NULL REFERENCES assumption(id), + parent_assumption_id INTEGER NOT NULL REFERENCES assumption(id), + PRIMARY KEY (assumption_id, parent_assumption_id) + ); + + CREATE TABLE IF NOT EXISTS requirement_decision ( + requirement_id INTEGER NOT NULL REFERENCES requirement(id), + decision_id INTEGER NOT NULL REFERENCES decision(id), + PRIMARY KEY (requirement_id, decision_id) ); `); return db; @@ -41,17 +156,71 @@ export function createDb(path: string = ':memory:'): DB { export function getOrCreateProject(db: DB, name = 'default'): Project { const existing = db.prepare('SELECT * FROM project ORDER BY created_at DESC LIMIT 1').get() as Project | undefined; if (existing) return existing; - const id = randomUUID(); - db.prepare('INSERT INTO project (id, name) VALUES (?, ?)').run(id, name); - return db.prepare('SELECT * FROM project WHERE id = ?').get(id) as Project; + const result = db.prepare('INSERT INTO project (name) VALUES (?)').run(name); + return db.prepare('SELECT * FROM project WHERE id = ?').get(result.lastInsertRowid) as Project; +} + +export function createTurn(db: DB, projectId: number, input: CreateTurnInput): Turn { + const result = db.prepare(` + INSERT INTO turn (project_id, parent_turn_id, phase, question, why, impact, answer, is_resolution) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + `).run( + projectId, + input.parent_turn_id ?? null, + input.phase, + input.question, + input.why ?? null, + input.impact ?? null, + input.answer ?? null, + input.is_resolution ? 1 : 0, + ); + return db.prepare('SELECT * FROM turn WHERE id = ?').get(result.lastInsertRowid) as Turn; } -export function saveMessage(db: DB, projectId: string, role: Role, content: string): Message { - const id = randomUUID(); - db.prepare('INSERT INTO message (id, project_id, role, content) VALUES (?, ?, ?, ?)').run(id, projectId, role, content); - return db.prepare('SELECT * FROM message WHERE id = ?').get(id) as Message; +export function updateTurn(db: DB, turnId: number, updates: { question?: string; answer?: string }): void { + const setClauses: string[] = []; + const values: unknown[] = []; + if (updates.question !== undefined) { + setClauses.push('question = ?'); + values.push(updates.question); + } + if (updates.answer !== undefined) { + setClauses.push('answer = ?'); + values.push(updates.answer); + } + if (setClauses.length === 0) return; + values.push(turnId); + db.prepare(`UPDATE turn SET ${setClauses.join(', ')} WHERE id = ?`).run(...values); +} + +export function createOption(db: DB, turnId: number, input: CreateOptionInput): Option { + const result = db.prepare(` + INSERT INTO option (turn_id, position, content, is_recommended, is_selected) + VALUES (?, ?, ?, ?, ?) + `).run( + turnId, + input.position, + input.content, + input.is_recommended ? 1 : 0, + input.is_selected ? 1 : 0, + ); + return db.prepare('SELECT * FROM option WHERE id = ?').get(result.lastInsertRowid) as Option; +} + +export function getActivePath(db: DB, projectId: number): Turn[] { + const project = db.prepare('SELECT active_turn_id FROM project WHERE id = ?').get(projectId) as Pick | undefined; + if (!project?.active_turn_id) return []; + + return db.prepare(` + WITH RECURSIVE path AS ( + SELECT * FROM turn WHERE id = ? + UNION ALL + SELECT t.* FROM turn t JOIN path p ON t.id = p.parent_turn_id + ) + SELECT * FROM path ORDER BY id ASC + `).all(project.active_turn_id) as Turn[]; } -export function getMessages(db: DB, projectId: string): Message[] { - return db.prepare('SELECT * FROM message WHERE project_id = ? ORDER BY created_at ASC, rowid ASC').all(projectId) as Message[]; +export function advanceHead(db: DB, projectId: number, turnId: number): void { + db.prepare("UPDATE project SET active_turn_id = ?, updated_at = datetime('now') WHERE id = ?").run(turnId, projectId); }