diff --git a/.agents/skills/ln-build/SKILL.md b/.agents/skills/ln-build/SKILL.md index de0ee133..f8e86928 100644 --- a/.agents/skills/ln-build/SKILL.md +++ b/.agents/skills/ln-build/SKILL.md @@ -40,7 +40,7 @@ Run the project's verification harness. All checks must pass. Commit: `feat: [ta After the slice lands and verification passes, do all of these before presenting routing options: -1. Mark the slice `done` in `memory/PLAN.md` +1. Mark the slice `done` in `memory/PLAN.md`. Check `## Dependencies` — if this slice unblocked multiple downstream slices, note them as newly available (some may be parallelizable) 2. Update assumption confidence in `memory/SPEC.md` §Assumptions — set validated assumptions to `**validated**`, invalidated ones to `**invalidated**` and flag implicated slices in PLAN.md 3. Add new invariants to `memory/SPEC.md` §Invariants — each structural property now protected by tests. Update `memory/PLAN.md` slice with `Invariants established: I#` 4. Add any new decisions to `memory/SPEC.md` §Decisions, new assumptions to §Assumptions @@ -54,7 +54,7 @@ After traceability is complete, present these options to the user (use `tool-ask | # | Label | Target | Why | | --- | ---------------- | ------------ | ------------------------------------------------------------ | -| 1 | Scope next slice | `ln-scope` | More slices remain on the plan | +| 1 | Scope next slice | `ln-scope` | More slices remain — if multiple were unblocked, name them | | 2 | Review the code | `ln-review` | Assess quality after an implementation burst | | 3 | Revise spec | `ln-spec` | Build revealed the spec needs structural revision | | 4 | Revise plan | `ln-plan` | Revisit the plan or re-prioritize | diff --git a/.agents/skills/ln-plan/SKILL.md b/.agents/skills/ln-plan/SKILL.md index 4877edce..57ed30c3 100644 --- a/.agents/skills/ln-plan/SKILL.md +++ b/.agents/skills/ln-plan/SKILL.md @@ -20,10 +20,16 @@ If context is thin, run a brief interview (not a full `ln-grill`) to fill gaps. ## Plan +**Mode detection.** If the user is inserting or reordering specific slices — not replanning from scratch — this is a **patch**. Read PLAN.md, make the targeted edits, then jump to the post-edit checklist (step 5). + 1. If `memory/PLAN.md` exists, read it first. Retire completed slices (mark `done`). Assess what remains and what's changed. 2. Explore the codebase. Identify architectural constraints the slices must respect (routes, schema, auth, third-party boundaries). 3. Draft or revise phases and slices. Each slice must be independently demoable and independently grabbable where possible. Group into temporal phases. For each, name dependent requirements and assumptions from `memory/SPEC.md`. 4. Confirm with user — adjust granularity, reorder, split or merge. +5. **Post-edit checklist** — after any addition, removal, or reordering: + - Update the `## Dependencies` ASCII graph to reflect new/changed edges + - Update `### Parallelism opportunities` if new concurrent paths opened + - Verify every new slice names its requirements, assumptions, invariants to establish, and invariants to respect from SPEC.md ## Output diff --git a/.agents/skills/ln-scope/SKILL.md b/.agents/skills/ln-scope/SKILL.md index 4ecc9ac8..953eb8ec 100644 --- a/.agents/skills/ln-scope/SKILL.md +++ b/.agents/skills/ln-scope/SKILL.md @@ -14,6 +14,8 @@ The behavior to deliver: $ARGUMENTS If `memory/SPEC.md` exists, use its lexicon and respect its invariants. +**Parallelism check.** If `memory/PLAN.md` exists, check `## Dependencies` and `### Parallelism opportunities`. If the current state (completed slices) unblocks multiple slices, surface them: "Slices X and Y are both unblocked — which to scope?" If the user names one, note the other(s) as available for concurrent work (e.g. a separate agent thread or session). + ## Scope Card ### Target Behavior diff --git a/.agents/skills/ln-spec/SKILL.md b/.agents/skills/ln-spec/SKILL.md index 33b1f381..cfa9f321 100644 --- a/.agents/skills/ln-spec/SKILL.md +++ b/.agents/skills/ln-spec/SKILL.md @@ -16,6 +16,8 @@ The feature or problem: $ARGUMENTS ## Procedure +**Mode detection.** If the user provides a specific finding, research result, or decision to record — not a new feature area — this is a **patch**, not a full pass. Skip to step 5. + 1. **Capture the problem** from the user's perspective — what they want and *why*. The *why* shapes the solution space. 2. **Explore the codebase** to verify assertions, understand current state, and find existing patterns. If `memory/SPEC.md` exists, read it first — this is an update, not a blank-slate write. 3. **Interview** (if understanding is thin), to close remaining gaps. Walk each branch of the design tree. For each question, provide your recommended answer. If the codebase can answer a question, explore it instead of asking. Use `/ln-grill` if it hasn't already been run. @@ -30,6 +32,16 @@ Write or update `./memory/SPEC.md` following the template at `@resources/spec-te If `memory/PLAN.md` exists, verify that changed assumptions and decisions still align with affected slices. +### Cross-reference integrity + +Every amendment must close its reference chain. After editing, verify: + +- **New assumption** → has: dependent decision(s), implicated slice(s) in PLAN.md, validation approach +- **New decision** → has: dependent assumption(s), supersession note +- **New invariant** → has: establishing slice in PLAN.md, protecting test (or `manual (outer loop)`), proved decision +- **New constraint** → has: rationale for exclusion +- **New feedback loop item** → names the invariant(s) it protects + ## Routing After filing the spec, present these options to the user (use `tool-ask-question`): diff --git a/AGENTS.md b/AGENTS.md index 04c482f3..79997ffd 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -16,6 +16,13 @@ Slices and spikes in `memory/PLAN.md` are the unit of work. When starting one: One branch per slice/spike. Stacked branches mirror slice dependencies in PLAN.md. Graphite manages the stack; Linear tracks the issue. +### naming conventions + +- **Branch**: `ln/{issue-id}-{keywords}` (e.g. `ln/fe-534-walking-skeleton`) +- **PR title**: `{issue-id | upper}: {Linear issue title in sentence case}` (e.g. `FE-534: Walking skeleton SDK to SSE to React`) + +PR descriptions are written only when tying off a branch — not during active development. + ## planning Two canonical documents in `memory/`: @@ -37,3 +44,12 @@ The `/ln-*` skills at `.agents/skills/` follow this flow: ### verification Verification strategy is defined per-project in SPEC.md §Verification Design (three-tier feedback loops). The global verification harness in `~/.claude/CLAUDE.md` provides the execution stack. + +### manual testing + +When a slice requires manual UI testing (outer-loop verification): + +1. **Dev server**: use `/tool-cmux` to open a terminal pane, run `npm run dev` there +2. **Browser**: use `/tool-cdp-cli` to launch Chrome with DevTools Protocol, open the dev URL, and interact (snapshot, fill, click, eval, console) + +This keeps the dev server and browser observable without leaving the agent session. diff --git a/docs/design/schema.dbml b/docs/design/schema.dbml new file mode 100644 index 00000000..abc1cc15 --- /dev/null +++ b/docs/design/schema.dbml @@ -0,0 +1,176 @@ +// Brunch v2 — Entity Relation Model +// Produced by ln-grill session 2026-03-31 +// +// Core idea: the turn tree is the version history. Decisions and assumptions +// are extracted from turns by an observer agent. Requirements and criteria +// are downstream projections reviewed in later phases. The active path from +// HEAD determines which entities are current. No snapshots needed. + +// ── Core ─────────────────────────────────────────────────── + +Table project { + id integer [pk] + name text [not null] + active_turn_id integer [ref: > turn.id, note: 'HEAD pointer — current tip of the active branch'] + created_at timestamp [default: `CURRENT_TIMESTAMP`] + updated_at timestamp [default: `CURRENT_TIMESTAMP`] + + Note: 'Identity and metadata. No phase cursor — phase resolution lives on turns.' +} + +// ── Conversation tree ────────────────────────────────────── + +Table turn { + id integer [pk] + project_id integer [not null, ref: > project.id] + parent_turn_id integer [ref: > turn.id, note: 'null = root turn'] + phase phase_enum [not null, note: 'Immutable provenance'] + question text [not null, note: 'What the interviewing agent asked'] + why text [note: 'Why this question matters — strategic grounding for the user'] + impact impact_enum [note: 'Signal label for the user'] + answer text [note: 'What the user chose or typed. null = unanswered'] + is_resolution boolean [not null, default: false, note: 'LLM judgment: this turn completes the phase'] + created_at timestamp [default: `CURRENT_TIMESTAMP`] + + Note: 'The commit. Branches on decision revisit. Walk parent_turn_id to root for the active path.' +} + +Table option { + id integer [pk] + turn_id integer [not null, ref: > turn.id] + position integer [not null] + content text [not null] + is_recommended boolean [not null, default: false] + is_selected boolean [not null, default: false] + + indexes { + (turn_id, position) [unique] + } + + Note: 'Structured alternatives presented in a turn. At least two per turn.' +} + +Enum impact_enum { + high + medium + low +} + +Enum phase_enum { + scope + design + requirements + criteria +} + +// ── Knowledge entities (extracted by observer agent) ─────── + +Table decision { + id integer [pk] + project_id integer [not null, ref: > project.id] + content text [not null, note: 'What was decided'] + rationale text [note: 'Why this was chosen'] + + Note: 'A resolved fork in the design tree. Traced to the turn that produced it.' +} + +Table assumption { + id integer [pk] + project_id integer [not null, ref: > project.id] + content text [not null, note: 'The falsifiable belief'] + + Note: 'A load-bearing belief a decision rests on. Dependency of decisions, not independent.' +} + +Table requirement { + id integer [pk] + project_id integer [not null, ref: > project.id] + content text [not null, note: 'What the system must do'] + reviewed_at timestamp [note: 'Last confirmed against the current decision graph'] + + Note: 'Accumulated during drill-down, confirmed during requirements review phase.' +} + +Table criterion { + id integer [pk] + project_id integer [not null, ref: > project.id] + requirement_id integer [not null, ref: > requirement.id] + content text [not null, note: 'Testable condition'] + reviewed_at timestamp [note: 'Last confirmed against the current requirement set'] + + Note: 'Proposed by agent, confirmed by user during criteria phase.' +} + +// ── Observer linkage (turn → extracted entities) ─────────── + +Table turn_decision { + turn_id integer [not null, ref: > turn.id] + decision_id integer [not null, ref: > decision.id] + + indexes { + (turn_id, decision_id) [pk] + } + + Note: 'This turn produced this decision.' +} + +Table turn_assumption { + turn_id integer [not null, ref: > turn.id] + assumption_id integer [not null, ref: > assumption.id] + + indexes { + (turn_id, assumption_id) [pk] + } + + Note: 'This turn surfaced this assumption.' +} + +// ── Decision dependency graph ────────────────────────────── + +Table decision_parent_decision { + decision_id integer [not null, ref: > decision.id] + parent_decision_id integer [not null, ref: > decision.id] + + indexes { + (decision_id, parent_decision_id) [pk] + } + + Note: 'DAG edge: this decision depends on a prior decision.' +} + +Table decision_parent_assumption { + decision_id integer [not null, ref: > decision.id] + parent_assumption_id integer [not null, ref: > assumption.id] + + indexes { + (decision_id, parent_assumption_id) [pk] + } + + Note: 'This decision is predicated on this assumption.' +} + +// ── Assumption dependency graph ───────────────────────────── + +Table assumption_parent_assumption { + assumption_id integer [not null, ref: > assumption.id] + parent_assumption_id integer [not null, ref: > assumption.id] + + indexes { + (assumption_id, parent_assumption_id) [pk] + } + + Note: 'DAG edge: this assumption rests on a prior assumption.' +} + +// ── Requirement provenance ───────────────────────────────── + +Table requirement_decision { + requirement_id integer [not null, ref: > requirement.id] + decision_id integer [not null, ref: > decision.id] + + indexes { + (requirement_id, decision_id) [pk] + } + + Note: 'Many-to-many: which decisions gave rise to this requirement.' +} diff --git a/memory/PLAN.md b/memory/PLAN.md index 7a37c8d5..a29eade9 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -19,109 +19,137 @@ ### Slices 1. **Walking skeleton: SDK → SSE → React** `FE-534` — Prove the integration seam: the highest-uncertainty slice, retires the most risk. `done` - - Requirements: → SPEC.md §Requirements #1, #3 + - Requirements: → SPEC.md §Requirements #1, #4 - Assumptions: → SPEC.md §Assumptions A1, A2, A8, A10 - Invariants established: → SPEC.md §Invariants I1, I2, I3, I4 - Acceptance: `npm run dev` opens browser, type a message, see streamed response with visible thinking and text. `useChat` manages all state - - Blocks: all subsequent slices - Branch: `ln/fe-534-walking-skeleton` -2. **SQLite foundation + project persistence** `FE-535` — Replace Dolt with `better-sqlite3`. Schema: `project`, `interview_exchange`, `spec_output`. Auto-create DB on startup. Session CRUD. Resume via Claude Agent SDK `resume`. `not-started` - - Requirements: → SPEC.md §Requirements #9 - - Assumptions: → SPEC.md §Assumptions A5 - - Invariants to establish: DB lifecycle (create → persist → close → reopen → intact) - - Invariants to respect: → SPEC.md §Invariants I1, I2, I3 - - Acceptance: create project, close browser, reopen, resume conversation +2. **SQLite foundation + project persistence** `FE-535` — Replace Dolt with `better-sqlite3`. Basic persistence with project + message tables. Conversation history replay. `done` + - Requirements: → SPEC.md §Requirements #14 + - Assumptions: → SPEC.md §Assumptions A5 (validated), A11 (workaround validated), A12 (validated) + - Invariants established: → SPEC.md §Invariants I5, I6 + - Invariants respected: → SPEC.md §Invariants I1, I2, I3 + - Acceptance: create project, send message, refresh page, see history, continue conversation - Branch: `ln/fe-535-sqlite-persistence` -## Phase 2: Interview Core +## Phase 2: Turn Model + Extraction - + -### Slices +### Spikes -3. **Interview Phase 1: scope establishment** `FE-536` — System prompt drives scope elicitation. LLM presents structured questions with options. Exchanges stored in `interview_exchange`. `not-started` - - Requirements: → SPEC.md §Requirements #2, #5 - - Assumptions: → SPEC.md §Assumptions A7 - - Acceptance: user describes goal, LLM asks structured scope questions, exchanges persisted - - Branch: `ln/fe-536-interview-phase-1` +1. **Observer extraction fidelity** — Can the LLM reliably extract decisions, assumptions, and dependency edges from a single turn's Q&A? Test with realistic fixture turns across different question types (scope, design, constraints). Measure extraction consistency across runs. `not-started` + - Assumptions: → SPEC.md §Assumptions A14, A3 + - Time box: 2 hours + - Success: ≥80% of expected entities captured with correct dependency edges across 5+ fixture turns -4. **Entity extraction pipeline** `FE-537` — After each exchange, separate `queryStructured` call extracts entities. Materialize into entity tables. Emit `data-entities` SSE event. `not-started` - - Requirements: → SPEC.md §Requirements #4 - - Assumptions: → SPEC.md §Assumptions A3, A4 - - Acceptance: entity dashboard shows extracted items within 1-3s of answering - - Branch: `ln/fe-537-entity-extraction` +### Slices -5. **Entity dashboard UI** `FE-538` — React sidebar showing accumulated entities by type. Updates live via `data-entities` events. Read-only. `not-started` - - Requirements: → SPEC.md §Requirements #4 +3. **Turn tree schema + API** — Migrate from message table to the full schema.dbml model (turn, option, decision, assumption, requirement, criterion + all join tables). Update API: POST /api/chat creates turns, GET /api/projects/current returns turns on the active path. Project gets `active_turn_id`. Tests verify turn tree CRUD and active path resolution. `not-started` + - Requirements: → SPEC.md §Requirements #14 + - Assumptions: → SPEC.md §Assumptions A6 + - Invariants to establish: turn tree persistence, active path resolution + - Invariants to respect: → SPEC.md §Invariants I1, I2, I3 + - Acceptance: create project, create turns with parent chain, resolve active path, close and reopen with state intact + +3b. **Rich chat UI: tool calls + reasoning rendering** — Extend SSE adapter to emit `tool-call-streaming-start`, `tool-call-delta`, `tool-call`, and `tool-result` events for SDK `tool_use` content blocks. Install AI Elements components (`Tool`, `Reasoning`, `ChainOfThought`, `Message`, `PromptInput`) via `npx ai-elements`, restyle to match brunch design. Replace hand-rolled `App.tsx` message rendering with part-type switching (`text`, `reasoning`, `tool-{name}`, `step-start`). Establish user-testability for the streaming pipeline per verification policy — all part types visible in browser. `not-started` + - Requirements: → SPEC.md §Requirements #4 + - Assumptions: → SPEC.md §Assumptions A16, A17 + - Invariants to establish: → SPEC.md §Invariants I7, I8 + - Invariants to respect: → SPEC.md §Invariants I1, I2, I3 + - Acceptance: `npm run dev`, send a message that triggers tool use, see tool call with state transitions (pending → running → completed/error), see reasoning in collapsible block, all rendered via AI Elements components. SSE adapter tests cover tool_use content blocks. + - Branch: `ln/fe-xxx-rich-chat-ui` + +4. **Structured interview: scope phase** — Replace flat chat with structured turns. Implement the scope phase as an agent skill — the agent generates a question with options, grounding ("why this matters"), and impact signal. User selects an option or types a response. Turn persists with phase provenance. UI renders the turn card (question + options + grounding). `not-started` + - Requirements: → SPEC.md §Requirements #2, #3 + - Assumptions: → SPEC.md §Assumptions A7, A13 + - Invariants to respect: → SPEC.md §Invariants I1, I2, I3, I5, I6 + - Acceptance: start a project, agent asks structured scope questions with options and grounding, user answers, turns persist with parent chain + +5. **Observer agent + entity persistence** — After each answered turn, a second agent call extracts decisions and assumptions. Writes to decision/assumption tables with turn linkage (turn_decision, turn_assumption) and dependency edges (decision_parent_decision, decision_parent_assumption, assumption_parent_assumption). `not-started` + - Requirements: → SPEC.md §Requirements #5 + - Assumptions: → SPEC.md §Assumptions A3, A4, A14 (validated by spike) + - Acceptance: answer a scope question, observer extracts decision + assumptions, dependency edges visible in DB, extraction completes within user think time + +6. **Decision + assumption dashboard** — React sidebar showing decisions and assumptions on the active path. Updates after each observer extraction. Dependency edges visible (what does this decision depend on?). `not-started` + - Requirements: → SPEC.md §Requirements #6 - Assumptions: — - - Acceptance: entities appear in categorized lists as interview progresses - - Branch: `ln/fe-538-entity-dashboard` + - Acceptance: entities appear in categorized lists as interview progresses, dependency links navigable -## Phase 3: Full Interview Flow +## Phase 3: Full Interview - + ### Slices -6. **Phase transition: scope → design** `FE-539` — LLM proposes transition with summary. User confirms. Phase stored on project. Dashboard shows indicator. `not-started` - - Requirements: → SPEC.md §Requirements #7 - - Assumptions: — - - Acceptance: LLM summarizes scope, user confirms, design phase begins - - Branch: `ln/fe-539-phase-transition` +7. **Phase transition + resolution** — Interviewing agent judges when scope phase is complete (is_resolution). Summary presented to user. User confirms to advance. UI shows phase completion state. `not-started` + - Requirements: → SPEC.md §Requirements #7, #8 + - Assumptions: → SPEC.md §Assumptions A15 + - Acceptance: agent marks resolution, summary shows, user confirms, UI reflects phase completion -7. **Interview Phase 2: design tree exploration** `FE-540` — LLM works down design tree with structured questions. Decisions extracted and materialized. `not-started` - - Requirements: → SPEC.md §Requirements #2, #5 - - Assumptions: — - - Acceptance: design questions with options, decisions in dashboard - - Branch: `ln/fe-540-interview-phase-2` +8. **Design drill-down phase** — Second agent skill. Walks the design tree with structured questions. Decisions extracted by observer. Continues until agent judges resolution. `not-started` + - Requirements: → SPEC.md §Requirements #2, #3 + - Assumptions: → SPEC.md §Assumptions A13 (validated by slice 4) + - Acceptance: design questions with options, decisions extracted and shown in dashboard, agent resolves when understanding is reached -8. **Freeform side-channel** `FE-541` — "Ask about this" escape hatch. Separate `useChat` scoped to current question. Doesn't pollute main transcript. `not-started` - - Requirements: → SPEC.md §Requirements #6 +9. **Requirements review phase** — Third agent skill. Walks accumulated requirements list. Agent checks for gaps, proposes additions. User confirms each. Requirements get `reviewed_at` stamped. `not-started` + - Requirements: → SPEC.md §Requirements #11 - Assumptions: — - - Acceptance: digress, get answer, return to main flow unchanged - - Branch: `ln/fe-541-side-channel` + - Acceptance: agent presents requirements, suggests gaps, user confirms, reviewed_at updated -9. **Interview Phase 3: acceptance criteria validation** `FE-542` — LLM surfaces criteria, proposes additions, walks risks. `acceptance_criterion` and `risk` tables populated. `not-started` - - Requirements: → SPEC.md §Requirements #2 - - Assumptions: — - - Acceptance: criteria and risks appear in dashboard after validation - - Branch: `ln/fe-542-interview-phase-3` +10. **Criteria phase** — Fourth agent skill. For each confirmed requirement, agent proposes testable criteria. User selects/edits/confirms. Criteria get `reviewed_at` stamped. `not-started` + - Requirements: → SPEC.md §Requirements #12 + - Assumptions: — + - Acceptance: agent proposes criteria per requirement, user confirms, spec readiness predicate evaluable -## Phase 4: Distribution +## Phase 4: Revisit + Export - + ### Slices -10. **Spec export** `FE-543` — Flatten entity state to markdown. LLM generates from entities + exchanges. Download button. `not-started` - - Requirements: → SPEC.md §Requirements #8, #10 +11. **Decision revisit: turn tree branching** — Navigate to a previous decision in the dashboard. Fork a new branch from the source turn. Move HEAD. Abandoned branches can be restored (move HEAD back). Active path recomputation. `not-started` + - Requirements: → SPEC.md §Requirements #9, #10 + - Assumptions: → SPEC.md §Assumptions A6 + - Acceptance: revisit a decision, new branch created, interview resumes from fork point, abandon returns to previous path + +12. **Soft invalidation** — When HEAD moves to a new branch, requirements traced to superseded decisions are flagged (stale reviewed_at). Criteria inherit flag transitively. Dashboard shows invalidation state. Re-entering requirements/criteria phase re-qualifies flagged entities. `not-started` + - Requirements: → SPEC.md §Requirements #9 - Assumptions: — - - Acceptance: click export, markdown downloads. Re-export after changes updates spec - - Branch: `ln/fe-543-spec-export` + - Acceptance: fork a branch, requirements show "needs review" state, re-review clears flags -11. **Snapshot versioning** `FE-544` — `project_snapshot` table. Auto-snapshot at phase transitions. Restore from previous. `not-started` - - Requirements: → SPEC.md §Requirements #10 - - Assumptions: → SPEC.md §Assumptions A6 - - Acceptance: snapshot, change, restore, state reverts - - Branch: `ln/fe-544-snapshot-versioning` +13. **Spec export** — Render markdown spec from active path entities (decisions, assumptions, requirements, criteria). Export enabled only when spec readiness predicate is true (all phases resolved + reviewed). Download button. `not-started` + - Requirements: → SPEC.md §Requirements #13 + - Assumptions: — + - Acceptance: complete all phases, click export, markdown downloads with all active-path entities + +## Phase 5: Distribution + + + +### Slices -12. **npx distribution** `FE-545` — `bin` entry, launcher script, single port, opens browser. Single env var: `ANTHROPIC_API_KEY`. `not-started` +14. **npx distribution** — `bin` entry, launcher starts Express (serves built Vite assets + API on one port), opens browser. Single env var: `ANTHROPIC_API_KEY`. `not-started` - Requirements: → SPEC.md §Requirements #1 - - Assumptions: → SPEC.md §Assumptions A8 + - Assumptions: → SPEC.md §Assumptions A8 (validated) - Acceptance: `npx brunch` with key in scope opens working app - - Branch: `ln/fe-545-npx-distribution` -## Phase 5: Horizon +## Horizon - + -- Pre-prompting phase (Phase 0) — category-narrowing quiz -- Decision DAG tracking (join tables, graph visualization) -- Assumption↔decision links and belief invalidation +- Exploratory pathway (for projects where the goal itself is unclear) - Multi-provider support via AI SDK server-side (if Claude Agent SDK becomes limiting) - Entity editing outside interview flow (direct CRUD on dashboard) - Export to GitHub Issues, Linear, YAML task definitions +- Assumption graph visualization (explore dependency chains) +- Decision graph visualization (tree/DAG view) +- Project dashboard with phase completion overview (→ SPEC.md §Requirements #15) ## Dependencies @@ -129,16 +157,19 @@ ``` Phase 1: 1 (skeleton) ──→ 2 (SQLite) -Phase 2: 2 ──→ 3 (scope) ──→ 4 (extraction) ──→ 5 (dashboard) -Phase 3: 5 ──→ 6 (transition) ──→ 7 (design) ──→ 9 (criteria) - 3 ──→ 8 (side-channel) [independent after 3] -Phase 4: 9 ──→ 10 (export) - 2+5 ──→ 11 (snapshots) [independent after 2+5] - 10 ──→ 12 (npx) [or parallelizable earlier] +Phase 2: 2 ──→ 3 (turn schema) ──→ 3b (rich chat UI) ──→ 4 (scope interview) + spike (observer) ──→ 5 (observer agent) + 3 ──→ 5 (observer agent) ──→ 6 (dashboard) + 4 ──→ 5 +Phase 3: 6 ──→ 7 (transitions) ──→ 8 (design) ──→ 9 (requirements) ──→ 10 (criteria) +Phase 4: 6 ──→ 11 (branching) ──→ 12 (invalidation) + 10 ──→ 13 (export) +Phase 5: 13 ──→ 14 (npx) ``` ### Parallelism opportunities -- Slices 8 (side-channel) and 6-7 (phase transition + design) can run in parallel after slice 5 -- Slice 11 (snapshots) can run in parallel with slices 6-10 -- Slice 12 (npx) can start as soon as slice 1 is done (basic launcher), with full completion after slice 10 +- Slice 3b (rich chat UI) and observer spike can proceed in parallel after slice 3 lands +- Slice 6 (dashboard) and slice 7 (transitions) can start in parallel once slice 5 lands +- Slice 11 (branching) can start after slice 6, independent of slices 7-10 +- Slice 14 (npx) can start early with a basic launcher, completing after slice 13 diff --git a/memory/SPEC.md b/memory/SPEC.md index c0238f9e..de491443 100644 --- a/memory/SPEC.md +++ b/memory/SPEC.md @@ -9,83 +9,100 @@ ## Concept & Goal +Brunch is an AI-guided spec elicitation tool that turns natural-language project goals into structured specifications through a multi-phase interview. The interview is driven by an agent that relentlessly asks structured questions — each with options, a recommendation, and strategic grounding ("why this matters") — until shared understanding is reached. A second observer agent extracts decisions and assumptions from each turn, building a dependency graph. The output is a fire-and-forget specification document. +The core data model: -Brunch is an AI-guided spec elicitation tool that turns natural-language project goals into structured specifications through a multi-phase interview. The current prototype works but is overbuilt: Docker (Dolt), optional OpenCode sidecar, two parallel frontends, hand-rolled NDJSON streaming that drops ~80% of available agent events, and domain terminology that doesn't match what the entities actually represent. - -The goal is a clean v2 that runs with `npx brunch` and one env var (`ANTHROPIC_API_KEY`). The interview is driven by the Claude Agent SDK with the full event surface (thinking, tool progress, subagent events, permissions) streamed to a React frontend via the Vercel AI SDK's documented SSE protocol. Output is a fire-and-forget SPEC.md. +- **Turn tree** — The conversation is a tree of turns (question + options + answer), not a flat log. Turns branch when a decision is revisited. The active path from HEAD determines the current state. The turn tree *is* the version history — no snapshots needed. +- **Decision graph** — Decisions are the atoms of the spec. Each is a resolved fork with options, a chosen path, and a rationale. Decisions depend on prior decisions and assumptions, forming a DAG. Revisiting a decision forks the turn tree and soft-invalidates downstream entities. +- **Assumption graph** — Assumptions are the falsifiable beliefs that decisions rest on. They have their own dependency structure (assumptions can rest on prior assumptions). +- **Requirements & criteria** — Downstream projections. Requirements accumulate during the decision drill-down and are reviewed in a dedicated phase. Criteria are proposed against confirmed requirements. The architecture: -- **Agent engine**: Claude Agent SDK (`query()`) — tool use, MCP, session resume, subagents, permissions, rich streaming events +- **Agent engine**: Claude Agent SDK (`query()`) — tool use, MCP, session resume, subagents, permissions, rich streaming events. Each interview phase is an agent skill. +- **Observer agent**: Separate extraction call after each turn — captures decisions, assumptions, and their dependency edges - **Server**: Express.js — iterates SDK messages, translates to AI SDK's UI Message Stream SSE protocol. No AI SDK runtime server-side - **Transport**: AI SDK UI Message Stream protocol (SSE with typed JSON events) - **Client**: React + Vite + `@ai-sdk/react` `useChat` hook — consumes SSE natively - **Database**: SQLite via `better-sqlite3` — zero-config, embedded -- **Output**: Flattened markdown SPEC.md exported on demand +- **Output**: Flattened markdown spec exported on demand from the active path's entities ## Constraints & Non-goals - - - **Anthropic-only** — no multi-provider support (OpenAI, Gemini, Ollama) -- **No decision DAG** — join tables and graph structure deferred; relationships captured in spec text -- **No belief invalidation / cascading** — fire-and-forget; no runtime propagation +- **No belief invalidation cascading** — revisiting a decision soft-invalidates downstream (flags for review), but there is no automatic runtime propagation through the graph - **No task planning** — consumers of the spec, not part of this tool - **No exploratory pathway** — assumes user has a reasonably defined goal - **Single-user** — no collaborative editing - **No custom model selection UI** — single model, configurable via env var at most -- **No Dolt** — replaced by SQLite snapshots +- **No Dolt** — replaced by SQLite with turn-tree versioning - **No AG-UI / CopilotKit** — AI SDK SSE protocol is sufficient +- **No assistant-ui** — its runtime abstraction layer (`AssistantRuntimeProvider`) adds unnecessary indirection over `useChat`; brunch emits custom SSE from Express, not from AI SDK server-side, so the adapter chain (useChat → useChatRuntime → AssistantRuntimeProvider) is overhead without benefit ## Requirements - - 1. Run `npx brunch` with just `ANTHROPIC_API_KEY` and have the tool open in the browser — setup is instant -2. Describe what you're building and have the AI walk through a structured interview — thorough spec without missing important decisions -3. See the AI's thinking process, tool usage, and progress in real-time — CLI-quality visibility -4. See accumulated entities (decisions, assumptions, requirements, acceptance criteria) in a dashboard as the interview progresses -5. AI presents structured questions with ≥2 options and a recommendation — each design fork explicit and recorded -6. Ask clarifying questions or push back without derailing the main flow — explore before committing -7. Summary and confirmation gate at each phase transition — review what's been captured before moving on -8. Export the spec as markdown at any time — hand to a coding agent or share -9. Close the browser and resume later — not forced to complete in one sitting -10. Revisit and change previous decisions, then re-export — spec evolves as understanding deepens +2. Start a new project and have the agent begin a structured interview — framing questions establish context before the design drill-down +3. Each turn presents a question with ≥2 options, a recommendation, and a "why this matters" grounding block — the user sees the strategic significance of each fork +4. See the AI's thinking process, tool usage, and progress in real-time — CLI-quality visibility of the agent's streaming output +5. The observer agent extracts decisions and assumptions from each answered turn, building the dependency graph in the background +6. See accumulated decisions, assumptions, requirements, and criteria in a dashboard as the interview progresses +7. The interviewing agent determines when shared understanding is reached and marks the phase resolved — interview length is emergent, not predetermined +8. Phase transitions show a summary and require user confirmation before moving on +9. Revisit any previous decision by navigating the turn tree — this forks a new branch and soft-invalidates dependent entities for re-review +10. Abandon a revisit branch to return to the previous path — like git checkout +11. The requirements review phase walks the accumulated requirements list, checks for gaps, and confirms completeness +12. The criteria phase proposes testable acceptance criteria against confirmed requirements +13. Export the spec as markdown when all phases are resolved — spec readiness is the compound predicate of phase resolution + requirements reviewed + criteria confirmed +14. Close the browser and resume later — the turn tree, decisions, and assumptions persist in SQLite +15. The project dashboard shows all projects with their phase completion status ## Assumptions - - - | # | Assumption | Confidence | Dependent decisions | Implicated slices | Validation approach | | --- | --------------------------------------------------------------------------------------------------------------------------------- | ---------- | ------------------- | -------------------------- | -------------------------------------------------------------- | -| A1 | AI SDK's UI Message Stream SSE protocol is documented and stable enough to emit conformantly without importing AI SDK server-side | **validated** | D6 | Walking skeleton | Validated: skeleton emits conformant SSE, 15 tests pass | -| A2 | Claude Agent SDK `query()` with `includePartialMessages` provides all streaming event types needed for CLI-quality feedback | **validated** | D6 | Walking skeleton | Validated: adapter translates stream_event messages correctly | -| A3 | Separating interviewer from entity extraction produces better interview quality than inline tool calling | medium | D1 | Entity extraction pipeline | Compare interview coherence with and without tool-calling load | -| A4 | Entity extraction completes in 1-3s during user read/think time (10-60s), adding zero perceived latency | medium | D1 | Entity extraction pipeline | Measure extraction latency with realistic exchange payloads | -| A5 | `better-sqlite3` npm prebuilt binary works across macOS/Linux without native compilation issues | high | D5 | SQLite foundation | Widely tested; validate on CI matrix | -| A6 | Snapshot-based versioning in SQLite is sufficient for undo/redo in a single-user tool | high | D5 | Snapshot versioning | Validate with realistic entity counts | -| A7 | Users arriving at the tool have a reasonably defined goal | medium | — | Interview Phase 1 | User testing; exploratory pathway deferred if false | -| A8 | A single Express port serving API + static assets is sufficient for npx distribution | **validated** | D8 | npx distribution | Validated: Vite proxy to Express works in dev; single port | -| A9 | TanStack AI is too immature for a deliverable (alpha, v0) | medium | D7 | — | Re-evaluate if AI SDK becomes constraining | -| A10 | The `useChat` hook can consume custom SSE without AI SDK server runtime | **validated** | D7 | Walking skeleton | Validated: useChat consumes custom SSE via DefaultChatTransport | - +| A1 | AI SDK's UI Message Stream SSE protocol is documented and stable enough to emit conformantly without importing AI SDK server-side | **validated** | D8 | Walking skeleton | Validated: skeleton emits conformant SSE, 15 tests pass | +| A2 | Claude Agent SDK `query()` with `includePartialMessages` provides all streaming event types needed for CLI-quality feedback | **validated** | D8 | Walking skeleton | Validated: adapter translates stream_event messages correctly | +| A3 | Separating interviewer from observer produces better interview quality than inline tool calling | medium | D1 | Observer agent | Compare interview coherence with and without tool-calling load | +| A4 | Observer extraction completes in 1-3s during user read/think time (10-60s), adding zero perceived latency | medium | D1 | Observer agent | Measure extraction latency with realistic turn payloads | +| A5 | `better-sqlite3` npm prebuilt binary works across macOS/Linux without native compilation issues | **validated** | D7 | SQLite foundation | Validated: installed on macOS without native compilation issues | +| A6 | Turn-tree branching in SQLite is sufficient for decision revisit and undo in a single-user tool | high | D7 | Turn tree | Validate with realistic branch/merge scenarios | +| A7 | Users arriving at the tool have a reasonably defined goal | medium | — | Scope phase | User testing; exploratory pathway deferred if false | +| A8 | A single Express port serving API + static assets is sufficient for npx distribution | **validated** | D10 | npx distribution | Validated: Vite proxy to Express works in dev; single port | +| A9 | TanStack AI is too immature for a deliverable (alpha, v0) | medium | D9 | — | Re-evaluate if AI SDK becomes constraining | +| A10 | The `useChat` hook can consume custom SSE without AI SDK server runtime | **validated** | D9 | Walking skeleton | Validated: useChat consumes custom SSE via DefaultChatTransport | +| A11 | Stateless `query()` with prompt-stuffed history is sufficient for multi-turn interviewing — SDK session persistence is unnecessary and undesirable | **validated** | D8, D12 | SQLite foundation | Validated: formatting history into prompt works. SDK sessions rejected as competing source of truth — opaque, machine-local, incompatible with portable data goals (atomic YAML / git-versionable). Turn tree is sole session model. | +| A12 | `useChat` hook accepts initial messages to hydrate conversation state from server-stored history | **validated** | D9 | SQLite foundation | Validated: `useChat` doesn't have `initialMessages` prop but `setMessages` works for hydration | +| A13 | Claude Agent SDK supports defining interview phases as agent skills with distinct system prompts and tool sets | medium | D2 | Interview phases | Test SDK skill/agent configuration API | +| A14 | A second-thread observer agent can reliably extract decisions, assumptions, and dependency edges from a single turn's Q&A | medium | D1 | Observer agent | Probe with realistic interview exchanges; measure extraction fidelity | +| A15 | The LLM can reliably judge when a phase interview has reached sufficient understanding (is_resolution) | medium | D3 | Phase resolution | Probe across varied project types; measure false-positive resolution rate | +| A16 | AI SDK `useChat` hook's `ToolUIPart` state machine (`input-streaming` → `input-available` → `output-available` / `output-error` / `approval-requested` → `approval-responded` / `output-denied`) models all permutations of pending, error, and success for both interim (thinking, tool calls) and final (response) data | high | D14 | Rich chat UI | Validate by extending SSE adapter to emit tool-call events, confirm `useChat` surfaces all states | +| A17 | AI Elements copy-paste components can be restyled without forking — they are ownable source files, not npm-locked dependencies | high | D14 | Rich chat UI | Install via CLI, inspect source, confirm no hidden npm runtime dependency | ## Decisions +### Domain model +1. **Turn tree as version history** — The conversation is a tree, not a flat log. Each turn points to its parent. Revisiting a decision forks a new branch. `project.active_turn_id` is the HEAD pointer. The active path determines which entities are current — no snapshot tables needed. Depends on: A6. Supersedes: D5-old snapshot versioning model. +2. **Interview phases as agent skills** — Each phase (scope, design, requirements, criteria) is a separate agent skill with its own system prompt and tool configuration. The server orchestrates which skill to invoke based on phase completion state. Phases can be composed, reordered, or replaced independently. Depends on: A13. Supersedes: —. +3. **Phase resolution via LLM judgment** — A turn's `is_resolution` flag is set by the interviewing agent when it judges that shared understanding has been reached for that phase. The active path is resolved for a phase when its latest turn has `is_resolution = true`. Spec export requires all phases resolved. Depends on: A15. Supersedes: —. +4. **Two-agent pattern (interviewer + observer)** — The interviewer focuses solely on conducting the interview with structured questions. After each answered turn, a separate observer agent extracts decisions, assumptions, and dependency edges. The observer can use a cheaper/faster model. Keeps the interviewer prompt clean and extraction independently testable. Depends on: A3, A4, A14. Supersedes: —. +5. **Decision dependency graph** — Decisions depend on prior decisions and/or assumptions via `decision_parent_decision` and `decision_parent_assumption` join tables. Assumptions can depend on prior assumptions via `assumption_parent_assumption`. The observer agent captures these edges during extraction. Depends on: A14. Supersedes: —. +6. **Soft invalidation for requirements and criteria** — When a decision is revisited (branch fork), requirements traced to that decision are flagged for re-review via stale `reviewed_at` timestamps. Criteria inherit the flag transitively from their requirements. The agent handles re-qualification holistically, not mechanistically. Depends on: —. Supersedes: —. -1. **Two-LLM-call pattern (interviewer + extractor)** — The interviewer focuses solely on conducting a high-quality interview; it does not call entity CRUD tools. After each exchange, a separate structured-output call extracts entities from the exchange + current state. Runs during user read/think time. Extraction can use a cheaper/faster model (e.g. Haiku). Keeps the interviewer prompt clean and extraction independently testable. Depends on: A3, A4. Supersedes: —. -2. **Three interview phases with confirm gates** — (0) optional pre-prompting, (1) scope establishment, (2) design tree exploration, (3) acceptance criteria validation. Phase transitions are LLM-proposed, user-confirmed. The summary-and-confirm pattern serves as both UX checkpoint and entity consolidation moment. Interview length is emergent, not predetermined. Depends on: A7. Supersedes: —. -3. **Guided chat with structured escape hatch** — Main flow is LLM-driven with structured questions (≥2 options + recommendation + open-ended). Freeform digressions happen via a separate LLM call scoped to current question context, so tangents don't pollute the interview transcript or entity extraction. Depends on: —. Supersedes: —. -4. **Entity model: materialized for UI, derived from exchanges** — Entities materialize into SQLite for the dashboard, but the interview exchange is the source of truth. Tables: `project`, `interview_exchange`, `goal`, `scope`, `decision`, `assumption`, `requirement`, `acceptance_criterion`, `risk`, `spec_output`. Join tables deferred to v2 — relationships captured in spec text, not enforced in schema. Depends on: A3. Supersedes: —. -5. **SQLite via better-sqlite3 replaces Dolt** — Zero-config embedded DB. Snapshot versioning via `project_snapshot` table (serialized entity state, created at phase transitions and on-demand). Diff is client-side JSON comparison. Undo = restore from snapshot. Dolt's differentiator (cell-level merge across concurrent writers) is a multi-user problem this single-user tool doesn't have. Depends on: A5, A6. Supersedes: Dolt (docker-based). -6. **Express.js server emits AI SDK-conformant SSE** — Plain JS. Iterates SDK's `query()` async generator, translates each `SDKMessage` into SSE events matching AI SDK's UI Message Stream protocol. Event mapping: `SDKPartialAssistantMessage` → `text-delta`/`reasoning-delta`/`tool-input-`*; `SDKToolProgressMessage` → `data-tool-progress`; `SDKResultMessage` → `finish`; domain events use `data-*` custom part pattern. No AI SDK runtime imported server-side — value is purely the documented protocol and the React hook. Depends on: A1, A2. Supersedes: hand-rolled NDJSON streaming. -7. **React + Vite + @ai-sdk/react client** — `useChat` for conversation (streaming, status, stop, message state). Custom components for entity dashboard (updated via `data-`* events). Phase indicator. Freeform side-panel as separate `useChat` instance. AG-UI was rejected (no Claude Agent SDK integration; CopilotKit component model fights the custom interview UI). TanStack AI was too young (alpha, v0). Depends on: A9, A10. Supersedes: Preact, both existing frontends. -8. **npx-launchable single-command distribution** — `bin` entry, launcher starts Express (serves built Vite assets + API on one port), opens browser. Single env var: `ANTHROPIC_API_KEY`. DB auto-created in project directory or `~/.brunch/`. Depends on: A8. Supersedes: multi-step Docker + env var setup. -9. **Drop list** — Dolt/mysql2, OpenCode sidecar, Preact, both existing frontend implementations, NDJSON protocol, JSON Schema definitions (→ Zod), @tanstack/react-table, @dnd-kit/, dompurify, marked, four streaming functions in claude.js, dispatch.js. Depends on: —. Supersedes: —. -10. **Reference list** — Claude Agent SDK integration pattern (`query()` + `includePartialMessages`), Express server structure, Vite config (adapted for React), test structure (Vitest + Supertest), REMODEL.md domain model. Depends on: —. Supersedes: —. +12. **Stateless SDK integration — no session persistence** — Each `query()` call uses `persistSession: false`. Conversation context is reconstructed from the turn tree's active path and injected as formatted history + structured entity summaries. SDK sessions (`resume`, `fork`, session IDs) are not used. The turn tree is the sole session model. Rationale: SDK sessions are an opaque, machine-local competing source of truth incompatible with brunch's branching semantics and future portable-data goals (atomic YAML, git-versionable). Depends on: A11. Supersedes: implicit reliance on SDK session state. +13. **Observer captures derived intelligence** — The observer agent's extraction mandate extends beyond decisions and assumptions to include derived observations (e.g. codebase analysis, domain insights) that the interviewer surfaced through tool use during a turn. These are persisted so subsequent stateless `query()` calls can inject them as context. The exact entity model is TBD — candidates include a dedicated `observation` table, enriched `decision.rationale`, or a `notes` field on `turn`. Depends on: A14, D12. Supersedes: —. + +14. **AI Elements for rich chat UI components** — Copy-paste component source files (via `npx ai-elements`) from Vercel's AI Elements registry, built on shadcn/ui + Radix. Components directly consume AI SDK's `ToolUIPart` types and `useChat` hook state. Provides `Tool` (7-state lifecycle), `Reasoning` (collapsible), `ChainOfThought` (groups reasoning + tool calls), `Message`, `Conversation`, `PromptInput`. Source files are owned, not npm-locked — full restyle control. No runtime abstraction layer. Depends on: A16, A17. Supersedes: hand-rolled message rendering in App.tsx. + +### Technical stack + +7. **SQLite via better-sqlite3** — Zero-config embedded DB. Turn tree, decisions, assumptions, requirements, criteria all in SQLite tables. Schema defined in `docs/design/schema.dbml`. Depends on: A5, A6. Supersedes: Dolt (docker-based). +8. **Express.js server emits AI SDK-conformant SSE** — Iterates SDK's `query()` async generator, translates each `SDKMessage` into SSE events matching AI SDK's UI Message Stream protocol via per-request translator factory. No AI SDK runtime imported server-side. Depends on: A1, A2. Supersedes: hand-rolled NDJSON streaming. +9. **React + Vite + @ai-sdk/react client** — `useChat` for conversation streaming. Custom components for decision/entity dashboard. Phase indicator and navigation. Depends on: A9, A10. Supersedes: Preact, both existing frontends. +10. **npx-launchable single-command distribution** — `bin` entry, launcher starts Express (serves built Vite assets + API on one port), opens browser. Single env var: `ANTHROPIC_API_KEY`. DB auto-created in project directory or `~/.brunch/`. Depends on: A8. Supersedes: multi-step Docker + env var setup. +11. **Drop list** — Dolt/mysql2, OpenCode sidecar, Preact, both existing frontend implementations, NDJSON protocol, JSON Schema definitions (→ Zod), @tanstack/react-table, @dnd-kit/, dompurify, marked, four streaming functions in claude.js, dispatch.js. Depends on: —. Supersedes: —. ## Invariants @@ -97,39 +114,51 @@ The architecture: | # | Invariant | Established by | Protected by | Proves | | --- | ---------------------------- | ------------------ | --------------------------------- | ------ | -| I1 | SSE protocol conformance | Slice 1 (skeleton) | sse-adapter.test.ts | D6 | -| I2 | Stream lifecycle correctness | Slice 1 (skeleton) | app.test.ts | D6 | -| I3 | Thinking/text separation | Slice 1 (skeleton) | sse-adapter.test.ts, app.test.ts | D6 | -| I4 | Vite proxy routing | Slice 1 (skeleton) | vite.config.ts (manual) | D8 | +| I1 | SSE protocol conformance | Slice 1 (skeleton) | sse-adapter.test.ts | D8 | +| I2 | Stream lifecycle correctness | Slice 1 (skeleton) | app.test.ts | D8 | +| I3 | Thinking/text separation | Slice 1 (skeleton) | sse-adapter.test.ts, app.test.ts | D8 | +| I4 | Vite proxy routing | Slice 1 (skeleton) | vite.config.ts (manual) | D10 | +| I5 | DB lifecycle correctness | Slice 2 (SQLite) | db.test.ts | D7 | +| I6 | Message persistence | Slice 2 (SQLite) | db.test.ts, app.test.ts | D7 | +| I7 | Tool call SSE conformance | Slice 3b (rich UI) | sse-adapter.test.ts | D8, D14 | +| I8 | Tool part state rendering | Slice 3b (rich UI) | manual (outer loop) | D14 | ## Lexicon + +### Method terms +| Term | Definition | +| --------------- | --------------------------------------------------------------------------------------------- | +| **assumption** | A falsifiable belief accepted as true; tracked with confidence, linked to decisions and slices | +| **decision** | A recorded choice that resolves a question; ordered, with supersession chain | +| **invariant** | A structural property proven by implementation and protected by tests; must not regress | +| **requirement** | A capability the system must provide | +| **slice** | A thin end-to-end tracer-bullet path through all integration layers | +| **spike** | A time-boxed throwaway investigation to answer one hard question | + +### Domain terms | Term | Definition | | ----------------------- | --------------------------------------------------------------------------------------------------------------------------- | -| **assumption** | A falsifiable belief accepted as true; tracked with confidence, linked to decisions and slices | -| **decision** | A recorded choice that resolves a question; ordered, with supersession chain | -| **requirement** | A capability the system must provide | -| **slice** | A thin end-to-end tracer-bullet path through all integration layers | -| **spike** | A time-boxed throwaway investigation to answer one hard question | -| **invariant** | A structural property proven by implementation and protected by tests; must not regress | -| **phase** (plan) | A temporal grouping of slices and spikes in PLAN.md | -| **exchange** | The universal interaction primitive: one question-answer pair in the interview. Stored in `interview_exchange` | -| **entity** | A structured data item extracted from exchanges: decision, assumption, requirement, acceptance criterion, risk, goal, scope | -| **extraction** | The process of deriving entities from an exchange via a separate LLM call | -| **interviewer** | The primary LLM role: conducts the interview, presents structured questions. Does not call entity CRUD tools | -| **extractor** | The secondary LLM role: derives entities from exchanges. Runs post-exchange during user think time | -| **interview phase** | A stage of the interview flow: scope establishment → design tree exploration → acceptance criteria validation | -| **phase transition** | An LLM-proposed, user-confirmed checkpoint between interview phases, with summary review | -| **structured question** | A question with ≥2 options, a recommendation, and an open-ended "something else" escape | -| **side-channel** | A freeform digression scoped to the current question, isolated from the main interview transcript | -| **dashboard** | The UI sidebar showing accumulated entities by type, updated live via SSE events | -| **snapshot** | A serialized dump of all entity state for a project, stored in `project_snapshot` for undo/redo | -| **spec output** | The flattened markdown SPEC.md generated from entity state + exchanges | -| **pathway** | The interview approach (currently: structured; future: exploratory). Stored on the project | - +| **project** | A spec elicitation session. Has a name, a HEAD pointer (`active_turn_id`), and phase completion state | +| **turn** | One question-answer pair in the interview. Carries phase provenance, options, grounding ("why"), impact signal, and the user's answer. Points to its parent turn — the turn tree is the version history | +| **option** | A structured alternative presented in a turn. At least two per turn. One may be recommended; one is selected by the user | +| **decision** | A resolved fork in the design tree. Extracted by the observer from an answered turn. Depends on prior decisions and/or assumptions. Traced back to its source turn via `turn_decision` | +| **assumption** | A falsifiable belief a decision rests on. Extracted by the observer. Can depend on prior assumptions. Traced back to its source turn via `turn_assumption` | +| **requirement** | What the system must do. Accumulated during the design drill-down, confirmed during the requirements review phase. Traced to source decisions via `requirement_decision`. Has `reviewed_at` for soft-invalidation | +| **criterion** | A testable condition verifying a requirement. Proposed by the agent during the criteria phase, confirmed by the user. Has `reviewed_at` for soft-invalidation | +| **active path** | The branch from HEAD to root in the turn tree. Determines which turns, decisions, and assumptions are currently active | +| **phase** | A stage of the interview: `scope`, `design`, `requirements`, `criteria`. Immutable provenance on each turn. Each phase is backed by an agent skill | +| **phase resolution** | LLM judgment that shared understanding has been reached for a phase. Marked by `turn.is_resolution = true` on the last turn of a phase | +| **interviewer** | The primary agent role: conducts the interview with structured questions, grounding, and impact signals. Does not extract entities | +| **observer** | The secondary agent role: extracts decisions, assumptions, and dependency edges from each answered turn. Runs post-answer during user read time | +| **decision graph** | The DAG of decisions and their dependencies (on prior decisions and assumptions). Revisiting a decision forks the turn tree | +| **soft invalidation** | When a decision is revisited, requirements traced to it are flagged for re-review (stale `reviewed_at`). Criteria inherit the flag transitively. The agent re-qualifies holistically | +| **spec readiness** | Compound predicate: all four phases resolved AND requirements reviewed AND criteria confirmed. Only then is export enabled | ## Verification Design @@ -148,40 +177,48 @@ The architecture: ### Verification Policy -End-to-end slices must be **user-testable**, not just programmatically tested. Each slice that touches the user-facing boundary should be manually verifiable via `npm run dev` (or equivalent). Create demo routes, seed data, or test fixtures as needed to make manual verification possible at every slice. +End-to-end slices must be **user-testable**, not just programmatically tested. Each slice that touches the user-facing boundary should be manually verifiable via `npm run dev` (or equivalent). Use `/tool-cmux` for dev server panes and `/tool-cdp-cli` for browser interaction during outer-loop verification. ### Feedback Loops - **Inner loop** (ms–seconds): type checks, fast unit tests, linting — agent-autonomous, always-on - SSE adapter: given an `SDKMessage`, assert correct SSE event string output → protects I1, I3 - - Entity extraction: given an exchange + entity state, assert correct entity operations (snapshot fixtures) - - Snapshot versioning: create → snapshot → modify → snapshot → restore → assert state match + - Turn persistence: given a turn with options, assert correct storage and retrieval → protects I5, I6 + - Observer extraction: given a turn's Q&A, assert correct decision/assumption output (snapshot fixtures) + - Active path: given a branched turn tree, assert correct entity resolution from HEAD + - Tool call SSE: given an SDK `tool_use` content block, assert correct `tool-call-streaming-start`, `tool-call-delta`, `tool-call` events → protects I7 - **Middle loop** (seconds–minutes): integration tests, regression gates - Interview flow: POST user message via Supertest, assert SSE stream contains expected event types in order → protects I2 - - DB lifecycle: create project → persist exchanges → close → reopen → assert state intact + - DB lifecycle: create project → persist turns → close → reopen → assert state intact → protects I5 + - Decision revisit: create branch → verify active path resolves correctly → verify soft invalidation flags - **Outer loop** (minutes–hours): e2e, human observer - - Full interview walkthrough in browser: structured questions render, entities appear in dashboard, phase transitions work, export produces valid markdown - - Resume test: close browser mid-interview, reopen, verify conversation and entity state intact + - Rich chat rendering: tool calls show all 7 states (input-streaming, input-available, approval-requested, approval-responded, output-available, output-error, output-denied), reasoning collapses, message parts render by type → protects I8 + - Full interview walkthrough in browser: structured questions render with options/grounding/impact, decisions appear in dashboard, phase transitions work + - Resume test: close browser mid-interview, reopen, verify turn tree and entity state intact + - Decision revisit: navigate to a previous decision, fork, verify dashboard updates and invalidation + - Export test: complete all phases, export spec, verify markdown contains all active-path entities ### Current Coverage -| File | Tests | Protects | -| ------------------------ | ----- | ------------ | -| sse-adapter.test.ts | 10 | I1, I3 | -| app.test.ts | 5 | I2, I3 | - -## Acceptance Criteria (exit conditions) + +| File | Tests | Protects | +| ------------------------ | ----- | ---------------- | +| sse-adapter.test.ts | 10 | I1, I3 | +| app.test.ts | 8 | I2, I3, I5, I6 | +| db.test.ts | 10 | I5, I6 | +## Acceptance Criteria (exit conditions) 1. `npx brunch` with `ANTHROPIC_API_KEY` in scope opens a working app in the browser -2. Typing a message produces a streamed response with visible thinking and text -3. AI conducts a structured interview with options and recommendations -4. Entities appear in the dashboard within seconds of answering -5. Phase transitions show summary, require user confirmation -6. Freeform digressions don't pollute the main interview transcript -7. Closing and reopening the browser resumes the interview -8. Clicking export produces a valid markdown spec -9. Snapshot restore reverts entity state to a previous point -10. All inner and middle loop tests pass - +2. Starting a new project launches an interview with structured turns (question + options + grounding + impact) +3. The observer extracts decisions and assumptions from each answered turn, visible in the dashboard +4. The decision dependency graph is navigable — user can see what each decision depends on +5. Phase transitions show a summary, require user confirmation, and mark `is_resolution` +6. Revisiting a decision forks the turn tree and soft-invalidates downstream requirements +7. Abandoning a branch restores the previous active path +8. Requirements review phase walks the list, agent suggests gaps, user confirms +9. Criteria phase proposes testable conditions for each requirement +10. Export produces valid markdown spec when all phases are resolved and entities reviewed +11. Closing and reopening the browser resumes the interview from the active turn +12. All inner and middle loop tests pass diff --git a/package-lock.json b/package-lock.json index 983398d2..21c0a61b 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,11 +9,13 @@ "@anthropic-ai/claude-agent-sdk": "^0.2.77", "@modelcontextprotocol/sdk": "^1.27.1", "@vitejs/plugin-react": "^5.2.0", + "better-sqlite3": "^12.8.0", "express": "^5.2.1", "react": "^19.2.4", "react-dom": "^19.2.4" }, "devDependencies": { + "@types/better-sqlite3": "^7.6.13", "@types/cors": "^2.8.19", "@types/express": "^5.0.6", "@types/react": "^19.2.14", @@ -1823,6 +1825,16 @@ "@babel/types": "^7.28.2" } }, + "node_modules/@types/better-sqlite3": { + "version": "7.6.13", + "resolved": "https://registry.npmjs.org/@types/better-sqlite3/-/better-sqlite3-7.6.13.tgz", + "integrity": "sha512-NMv9ASNARoKksWtsq/SHakpYAYnhBrQgGD8zkLYk/jaK8jUGn08CfEdTRgYhMypUQAfzSP8W6gNLe0q19/t4VA==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, "node_modules/@types/body-parser": { "version": "1.19.6", "resolved": "https://registry.npmjs.org/@types/body-parser/-/body-parser-1.19.6.tgz", @@ -2346,6 +2358,26 @@ "dev": true, "license": "MIT" }, + "node_modules/base64-js": { + "version": "1.5.1", + "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", + "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, "node_modules/baseline-browser-mapping": { "version": "2.10.12", "resolved": "https://registry.npmjs.org/baseline-browser-mapping/-/baseline-browser-mapping-2.10.12.tgz", @@ -2358,6 +2390,40 @@ "node": ">=6.0.0" } }, + "node_modules/better-sqlite3": { + "version": "12.8.0", + "resolved": "https://registry.npmjs.org/better-sqlite3/-/better-sqlite3-12.8.0.tgz", + "integrity": "sha512-RxD2Vd96sQDjQr20kdP+F+dK/1OUNiVOl200vKBZY8u0vTwysfolF6Hq+3ZK2+h8My9YvZhHsF+RSGZW2VYrPQ==", + "hasInstallScript": true, + "license": "MIT", + "dependencies": { + "bindings": "^1.5.0", + "prebuild-install": "^7.1.1" + }, + "engines": { + "node": "20.x || 22.x || 23.x || 24.x || 25.x" + } + }, + "node_modules/bindings": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/bindings/-/bindings-1.5.0.tgz", + "integrity": "sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==", + "license": "MIT", + "dependencies": { + "file-uri-to-path": "1.0.0" + } + }, + "node_modules/bl": { + "version": "4.1.0", + "resolved": "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz", + "integrity": "sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==", + "license": "MIT", + "dependencies": { + "buffer": "^5.5.0", + "inherits": "^2.0.4", + "readable-stream": "^3.4.0" + } + }, "node_modules/body-parser": { "version": "2.2.2", "resolved": "https://registry.npmjs.org/body-parser/-/body-parser-2.2.2.tgz", @@ -2426,6 +2492,30 @@ "node": "^6 || ^7 || ^8 || ^9 || ^10 || ^11 || ^12 || >=13.7" } }, + "node_modules/buffer": { + "version": "5.7.1", + "resolved": "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz", + "integrity": "sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "dependencies": { + "base64-js": "^1.3.1", + "ieee754": "^1.1.13" + } + }, "node_modules/bytes": { "version": "3.1.2", "resolved": "https://registry.npmjs.org/bytes/-/bytes-3.1.2.tgz", @@ -2521,6 +2611,12 @@ "url": "https://github.com/chalk/chalk?sponsor=1" } }, + "node_modules/chownr": { + "version": "1.1.4", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-1.1.4.tgz", + "integrity": "sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==", + "license": "ISC" + }, "node_modules/cliui": { "version": "8.0.1", "resolved": "https://registry.npmjs.org/cliui/-/cliui-8.0.1.tgz", @@ -2735,6 +2831,30 @@ } } }, + "node_modules/decompress-response": { + "version": "6.0.0", + "resolved": "https://registry.npmjs.org/decompress-response/-/decompress-response-6.0.0.tgz", + "integrity": "sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==", + "license": "MIT", + "dependencies": { + "mimic-response": "^3.1.0" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, + "node_modules/deep-extend": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/deep-extend/-/deep-extend-0.6.0.tgz", + "integrity": "sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==", + "license": "MIT", + "engines": { + "node": ">=4.0.0" + } + }, "node_modules/deep-is": { "version": "0.1.4", "resolved": "https://registry.npmjs.org/deep-is/-/deep-is-0.1.4.tgz", @@ -2770,6 +2890,15 @@ "node": ">=6" } }, + "node_modules/detect-libc": { + "version": "2.1.2", + "resolved": "https://registry.npmjs.org/detect-libc/-/detect-libc-2.1.2.tgz", + "integrity": "sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==", + "license": "Apache-2.0", + "engines": { + "node": ">=8" + } + }, "node_modules/dezalgo": { "version": "1.0.4", "resolved": "https://registry.npmjs.org/dezalgo/-/dezalgo-1.0.4.tgz", @@ -2823,6 +2952,15 @@ "node": ">= 0.8" } }, + "node_modules/end-of-stream": { + "version": "1.4.5", + "resolved": "https://registry.npmjs.org/end-of-stream/-/end-of-stream-1.4.5.tgz", + "integrity": "sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==", + "license": "MIT", + "dependencies": { + "once": "^1.4.0" + } + }, "node_modules/es-define-property": { "version": "1.0.1", "resolved": "https://registry.npmjs.org/es-define-property/-/es-define-property-1.0.1.tgz", @@ -3129,6 +3267,15 @@ "node": ">=18.0.0" } }, + "node_modules/expand-template": { + "version": "2.0.3", + "resolved": "https://registry.npmjs.org/expand-template/-/expand-template-2.0.3.tgz", + "integrity": "sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==", + "license": "(MIT OR WTFPL)", + "engines": { + "node": ">=6" + } + }, "node_modules/expect-type": { "version": "1.3.0", "resolved": "https://registry.npmjs.org/expect-type/-/expect-type-1.3.0.tgz", @@ -3273,6 +3420,12 @@ "node": ">=16.0.0" } }, + "node_modules/file-uri-to-path": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/file-uri-to-path/-/file-uri-to-path-1.0.0.tgz", + "integrity": "sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==", + "license": "MIT" + }, "node_modules/finalhandler": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/finalhandler/-/finalhandler-2.1.1.tgz", @@ -3408,6 +3561,12 @@ "node": ">= 0.8" } }, + "node_modules/fs-constants": { + "version": "1.0.0", + "resolved": "https://registry.npmjs.org/fs-constants/-/fs-constants-1.0.0.tgz", + "integrity": "sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==", + "license": "MIT" + }, "node_modules/fsevents": { "version": "2.3.3", "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.3.tgz", @@ -3500,6 +3659,12 @@ "url": "https://github.com/privatenumber/get-tsconfig?sponsor=1" } }, + "node_modules/github-from-package": { + "version": "0.0.0", + "resolved": "https://registry.npmjs.org/github-from-package/-/github-from-package-0.0.0.tgz", + "integrity": "sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==", + "license": "MIT" + }, "node_modules/glob-parent": { "version": "6.0.2", "resolved": "https://registry.npmjs.org/glob-parent/-/glob-parent-6.0.2.tgz", @@ -3633,6 +3798,26 @@ "url": "https://opencollective.com/express" } }, + "node_modules/ieee754": { + "version": "1.2.1", + "resolved": "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz", + "integrity": "sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "BSD-3-Clause" + }, "node_modules/ignore": { "version": "5.3.2", "resolved": "https://registry.npmjs.org/ignore/-/ignore-5.3.2.tgz", @@ -3676,6 +3861,12 @@ "integrity": "sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==", "license": "ISC" }, + "node_modules/ini": { + "version": "1.3.8", + "resolved": "https://registry.npmjs.org/ini/-/ini-1.3.8.tgz", + "integrity": "sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==", + "license": "ISC" + }, "node_modules/ip-address": { "version": "10.1.0", "resolved": "https://registry.npmjs.org/ip-address/-/ip-address-10.1.0.tgz", @@ -3968,6 +4159,18 @@ "url": "https://opencollective.com/express" } }, + "node_modules/mimic-response": { + "version": "3.1.0", + "resolved": "https://registry.npmjs.org/mimic-response/-/mimic-response-3.1.0.tgz", + "integrity": "sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==", + "license": "MIT", + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/sindresorhus" + } + }, "node_modules/minimatch": { "version": "3.1.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-3.1.5.tgz", @@ -3981,6 +4184,21 @@ "node": "*" } }, + "node_modules/minimist": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/minimist/-/minimist-1.2.8.tgz", + "integrity": "sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==", + "license": "MIT", + "funding": { + "url": "https://github.com/sponsors/ljharb" + } + }, + "node_modules/mkdirp-classic": { + "version": "0.5.3", + "resolved": "https://registry.npmjs.org/mkdirp-classic/-/mkdirp-classic-0.5.3.tgz", + "integrity": "sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==", + "license": "MIT" + }, "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", @@ -4005,6 +4223,12 @@ "node": "^10 || ^12 || ^13.7 || ^14 || >=15.0.1" } }, + "node_modules/napi-build-utils": { + "version": "2.0.0", + "resolved": "https://registry.npmjs.org/napi-build-utils/-/napi-build-utils-2.0.0.tgz", + "integrity": "sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==", + "license": "MIT" + }, "node_modules/natural-compare": { "version": "1.4.0", "resolved": "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz", @@ -4021,6 +4245,30 @@ "node": ">= 0.6" } }, + "node_modules/node-abi": { + "version": "3.89.0", + "resolved": "https://registry.npmjs.org/node-abi/-/node-abi-3.89.0.tgz", + "integrity": "sha512-6u9UwL0HlAl21+agMN3YAMXcKByMqwGx+pq+P76vii5f7hTPtKDp08/H9py6DY+cfDw7kQNTGEj/rly3IgbNQA==", + "license": "MIT", + "dependencies": { + "semver": "^7.3.5" + }, + "engines": { + "node": ">=10" + } + }, + "node_modules/node-abi/node_modules/semver": { + "version": "7.7.4", + "resolved": "https://registry.npmjs.org/semver/-/semver-7.7.4.tgz", + "integrity": "sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==", + "license": "ISC", + "bin": { + "semver": "bin/semver.js" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/node-releases": { "version": "2.0.36", "resolved": "https://registry.npmjs.org/node-releases/-/node-releases-2.0.36.tgz", @@ -4243,6 +4491,33 @@ "node": "^10 || ^12 || >=14" } }, + "node_modules/prebuild-install": { + "version": "7.1.3", + "resolved": "https://registry.npmjs.org/prebuild-install/-/prebuild-install-7.1.3.tgz", + "integrity": "sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==", + "deprecated": "No longer maintained. Please contact the author of the relevant native addon; alternatives are available.", + "license": "MIT", + "dependencies": { + "detect-libc": "^2.0.0", + "expand-template": "^2.0.3", + "github-from-package": "0.0.0", + "minimist": "^1.2.3", + "mkdirp-classic": "^0.5.3", + "napi-build-utils": "^2.0.0", + "node-abi": "^3.3.0", + "pump": "^3.0.0", + "rc": "^1.2.7", + "simple-get": "^4.0.0", + "tar-fs": "^2.0.0", + "tunnel-agent": "^0.6.0" + }, + "bin": { + "prebuild-install": "bin.js" + }, + "engines": { + "node": ">=10" + } + }, "node_modules/prelude-ls": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/prelude-ls/-/prelude-ls-1.2.1.tgz", @@ -4266,6 +4541,16 @@ "node": ">= 0.10" } }, + "node_modules/pump": { + "version": "3.0.4", + "resolved": "https://registry.npmjs.org/pump/-/pump-3.0.4.tgz", + "integrity": "sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==", + "license": "MIT", + "dependencies": { + "end-of-stream": "^1.1.0", + "once": "^1.3.1" + } + }, "node_modules/punycode": { "version": "2.3.1", "resolved": "https://registry.npmjs.org/punycode/-/punycode-2.3.1.tgz", @@ -4315,6 +4600,30 @@ "node": ">= 0.10" } }, + "node_modules/rc": { + "version": "1.2.8", + "resolved": "https://registry.npmjs.org/rc/-/rc-1.2.8.tgz", + "integrity": "sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==", + "license": "(BSD-2-Clause OR MIT OR Apache-2.0)", + "dependencies": { + "deep-extend": "^0.6.0", + "ini": "~1.3.0", + "minimist": "^1.2.0", + "strip-json-comments": "~2.0.1" + }, + "bin": { + "rc": "cli.js" + } + }, + "node_modules/rc/node_modules/strip-json-comments": { + "version": "2.0.1", + "resolved": "https://registry.npmjs.org/strip-json-comments/-/strip-json-comments-2.0.1.tgz", + "integrity": "sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==", + "license": "MIT", + "engines": { + "node": ">=0.10.0" + } + }, "node_modules/react": { "version": "19.2.4", "resolved": "https://registry.npmjs.org/react/-/react-19.2.4.tgz", @@ -4345,6 +4654,20 @@ "node": ">=0.10.0" } }, + "node_modules/readable-stream": { + "version": "3.6.2", + "resolved": "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.2.tgz", + "integrity": "sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==", + "license": "MIT", + "dependencies": { + "inherits": "^2.0.3", + "string_decoder": "^1.1.1", + "util-deprecate": "^1.0.1" + }, + "engines": { + "node": ">= 6" + } + }, "node_modules/require-directory": { "version": "2.1.1", "resolved": "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz", @@ -4454,6 +4777,26 @@ "tslib": "^2.1.0" } }, + "node_modules/safe-buffer": { + "version": "5.2.1", + "resolved": "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz", + "integrity": "sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, "node_modules/safer-buffer": { "version": "2.1.2", "resolved": "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz", @@ -4639,6 +4982,51 @@ "dev": true, "license": "ISC" }, + "node_modules/simple-concat": { + "version": "1.0.1", + "resolved": "https://registry.npmjs.org/simple-concat/-/simple-concat-1.0.1.tgz", + "integrity": "sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT" + }, + "node_modules/simple-get": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/simple-get/-/simple-get-4.0.1.tgz", + "integrity": "sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==", + "funding": [ + { + "type": "github", + "url": "https://github.com/sponsors/feross" + }, + { + "type": "patreon", + "url": "https://www.patreon.com/feross" + }, + { + "type": "consulting", + "url": "https://feross.org/support" + } + ], + "license": "MIT", + "dependencies": { + "decompress-response": "^6.0.0", + "once": "^1.3.1", + "simple-concat": "^1.0.0" + } + }, "node_modules/source-map-js": { "version": "1.2.1", "resolved": "https://registry.npmjs.org/source-map-js/-/source-map-js-1.2.1.tgz", @@ -4671,6 +5059,15 @@ "dev": true, "license": "MIT" }, + "node_modules/string_decoder": { + "version": "1.3.0", + "resolved": "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz", + "integrity": "sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==", + "license": "MIT", + "dependencies": { + "safe-buffer": "~5.2.0" + } + }, "node_modules/string-width": { "version": "4.2.3", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", @@ -4774,6 +5171,34 @@ "react": "^16.11.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, + "node_modules/tar-fs": { + "version": "2.1.4", + "resolved": "https://registry.npmjs.org/tar-fs/-/tar-fs-2.1.4.tgz", + "integrity": "sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==", + "license": "MIT", + "dependencies": { + "chownr": "^1.1.1", + "mkdirp-classic": "^0.5.2", + "pump": "^3.0.0", + "tar-stream": "^2.1.4" + } + }, + "node_modules/tar-stream": { + "version": "2.2.0", + "resolved": "https://registry.npmjs.org/tar-stream/-/tar-stream-2.2.0.tgz", + "integrity": "sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==", + "license": "MIT", + "dependencies": { + "bl": "^4.0.3", + "end-of-stream": "^1.4.1", + "fs-constants": "^1.0.0", + "inherits": "^2.0.3", + "readable-stream": "^3.1.1" + }, + "engines": { + "node": ">=6" + } + }, "node_modules/throttleit": { "version": "2.1.0", "resolved": "https://registry.npmjs.org/throttleit/-/throttleit-2.1.0.tgz", @@ -4875,6 +5300,18 @@ "fsevents": "~2.3.3" } }, + "node_modules/tunnel-agent": { + "version": "0.6.0", + "resolved": "https://registry.npmjs.org/tunnel-agent/-/tunnel-agent-0.6.0.tgz", + "integrity": "sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==", + "license": "Apache-2.0", + "dependencies": { + "safe-buffer": "^5.0.1" + }, + "engines": { + "node": "*" + } + }, "node_modules/type-check": { "version": "0.4.0", "resolved": "https://registry.npmjs.org/type-check/-/type-check-0.4.0.tgz", @@ -4981,6 +5418,12 @@ "react": "^16.8.0 || ^17.0.0 || ^18.0.0 || ^19.0.0" } }, + "node_modules/util-deprecate": { + "version": "1.0.2", + "resolved": "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz", + "integrity": "sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==", + "license": "MIT" + }, "node_modules/vary": { "version": "1.1.2", "resolved": "https://registry.npmjs.org/vary/-/vary-1.1.2.tgz", diff --git a/package.json b/package.json index e994d913..bb8286fd 100644 --- a/package.json +++ b/package.json @@ -12,11 +12,13 @@ "@anthropic-ai/claude-agent-sdk": "^0.2.77", "@modelcontextprotocol/sdk": "^1.27.1", "@vitejs/plugin-react": "^5.2.0", + "better-sqlite3": "^12.8.0", "express": "^5.2.1", "react": "^19.2.4", "react-dom": "^19.2.4" }, "devDependencies": { + "@types/better-sqlite3": "^7.6.13", "@types/cors": "^2.8.19", "@types/express": "^5.0.6", "@types/react": "^19.2.14", diff --git a/src/client/App.tsx b/src/client/App.tsx index ae23a595..93a15a06 100644 --- a/src/client/App.tsx +++ b/src/client/App.tsx @@ -1,21 +1,47 @@ -import { useState } from 'react'; +import { useState, useEffect } from 'react'; import { useChat } from '@ai-sdk/react'; +import type { UIMessage } from '@ai-sdk/react'; export function App() { const [input, setInput] = useState(''); - const { messages, sendMessage, status, error } = useChat(); + const [loading, setLoading] = useState(true); + const { messages, sendMessage, setMessages, status, error } = useChat(); const isLoading = status === 'submitted' || status === 'streaming'; - console.log('Chat status:', status, 'messages:', messages.length, 'error:', error?.message); + // Fetch conversation history on mount + useEffect(() => { + fetch('/api/projects/current') + .then((res) => res.json()) + .then((data) => { + if (data.messages?.length > 0) { + const msgs: UIMessage[] = data.messages.map((m: { id: string; role: string; content: string }) => ({ + id: m.id, + role: m.role as 'user' | 'assistant', + parts: [{ type: 'text' as const, text: m.content }], + })); + setMessages(msgs); + } + setLoading(false); + }) + .catch(() => setLoading(false)); + }, []); const handleSubmit = (e: React.FormEvent) => { e.preventDefault(); if (!input.trim() || isLoading) return; - console.log('Sending message:', input); sendMessage({ text: input }); setInput(''); }; + if (loading) { + return ( +
Loading...
+