diff --git a/HANDOFF.md b/HANDOFF.md new file mode 100644 index 00000000..981adb2c --- /dev/null +++ b/HANDOFF.md @@ -0,0 +1,129 @@ +# Handoff + +> Generated by `ln-handoff` at 2026-04-09T19:39Z. Read this file to resume work. + +## Goal + +Finish hardening the Phase 5 design-mode / shared phase-close seam, keep project memory current, and resume the next high-signal planning or review step without re-discovering the just-landed refactor state. + +## Session State + +- **Last completed skill**: `ln-sync` — refreshed `memory/SPEC.md` and `memory/PLAN.md`, pruned temporary refactor assumptions, removed the resolved `memory/REFACTOR.md`, and cleaned stale spike artifacts. +- **Current skill**: `ln-handoff` +- **Flow position**: `grill → spec → plan → [design] → [oracles] → scope → [spike] → build → review → [refactor] → [sync]` +- **Handoff trigger**: user requested wrap-up after the completed phase-close refactor, repo cleanup, and sync pass. + +## In-flight work + +> CRITICAL: These artifacts exist only in the prior conversation, not on disk. +> Reproduce them here with full fidelity. + +### Artifact 1/2 — Sync report + +## Sync Report + +### Graduations +- No new assumption-to-decision promotions were needed during sync. +- The recent phase-close refactor truths were already captured in persisted decisions/invariants (`D75`, `D76`, `I85`, `I86`) before sync began. + +### Pruned +- `A42` removed from `memory/SPEC.md` as **embedded** — it was a temporary cutover assumption for deleting transcript-driven closure-basis recovery, and its truth now lives in `D75` + `I85`. +- `A43` removed from `memory/SPEC.md` as **embedded** — it was a temporary transport-cutover assumption for the discriminated phase-close command union, and its truth now lives in `D73` + `I83`. +- `memory/REFACTOR.md` removed — all six commit steps were complete, so keeping the file would preserve an already-resolved transition artifact. + +### Stale items +- `memory/PLAN.md` slice 8 observed-state paragraph still referenced `memory/REFACTOR.md` by path after the refactor completed. This was updated to describe the completed phase-close refactor directly instead of pointing at a file slated for deletion. + +### Drift +- No material code ↔ memory drift remained after cleanup. +- `slice 8` is still correctly `in-progress`; downstream slices remain `9`, `10`, `11a`, `12`, `13` as documented. + +### Actions taken +- Updated `memory/SPEC.md` prune comment to record the removal of `A42` and `A43`. +- Removed `A42` and `A43` from the assumptions table. +- Updated `memory/PLAN.md` slice 8 observed-state prose to remove the `memory/REFACTOR.md` path reference and describe the completed refactor as durable project state. +- Deleted `memory/REFACTOR.md`. + +### Artifact 2/2 — Repo hygiene verdict for `spike/` + +- `spike/filesystem-tools.ts` was **kept**. + - Reason: `memory/SPEC.md` assumption `A29` still cites it explicitly as the evidence artifact for the validated core-tools spike. +- `spike/observer-fidelity.ts` was **deleted**. + - Reason: unreferenced in repo; relies on the old `@anthropic-ai/claude-agent-sdk` spike path and the pre-canonical ontology; looked like a stale investigative relic rather than a current evidence artifact. +- `spike/raw-sdk-tool-use.ts` was **deleted**. + - Reason: unreferenced in repo; documents a superseded raw Anthropic SDK exploration from before the current AI SDK direction. + +## Review findings + +> ALL findings from ln-review, not just the one being acted on. + +| # | Finding | Status | Implications | +| --- | ------- | ------ | ------------ | +| 1 | No `ln-review` was run in this session after the phase-close refactor sequence | `deferred` | The next thread should strongly consider `ln-review` before widening into slice 9 or 11a | + +## Diagnostic evidence + +> Concrete proof points that informed diagnoses or shifted direction. +> Without these, a new thread inherits conclusions but not reasoning. + +- `rg -n "spike/filesystem-tools|spike/" .` found exactly one live canonical reference to `spike/filesystem-tools.ts`, in `memory/SPEC.md` assumption `A29`: this justified keeping that file. +- `rg` found no live repo references to `spike/observer-fidelity.ts` or `spike/raw-sdk-tool-use.ts`: this justified deleting them as relics. +- `find spike -maxdepth 3 -print` originally showed three files: `filesystem-tools.ts`, `observer-fidelity.ts`, `raw-sdk-tool-use.ts`; after cleanup it showed only `spike/filesystem-tools.ts`. +- `rg -n "memory/REFACTOR\.md" .` after cleanup found only one remaining reference in `docs/design/ln-skills-review-after-alignment.md`; no canonical `memory/*` doc still depended on the file. +- Last full verification before this cleanup was green on commit `d2189fd` via `npm run verify` (184 tests passed, build green). The subsequent cleanup touched only memory docs and deleted unreferenced spike artifacts. + +## Decisions and assumptions + +| Item | Type | Status | Source | +| ---- | ---- | ------ | ------ | +| D75 | `decision` | `persisted` | `memory/SPEC.md` §Decisions | +| D76 | `decision` | `persisted` | `memory/SPEC.md` §Decisions | +| A15 | `assumption` | `persisted` | `memory/SPEC.md` §Assumptions | +| A40 | `assumption` | `persisted` | `memory/SPEC.md` §Assumptions | +| A29 evidence file should remain `spike/filesystem-tools.ts` | `decision` | `volatile` | conversation + sync reasoning | + +## Repo state + +- **Branch**: `ln/fe-540-design-mode-commitment-exploration` +- **Recent commits**: + - `d2189fd feat: deepen the shared phase-close module` + - `eb117ce feat: ship explicit phase-close command union` + - `4f1c3eb feat: cut workflow projection over to durable closure basis` + - `f49e884 feat: persist phase-outcome closure basis` + - `fb2854e feat: project shared force-close availability` +- **Dirty files**: + - `M memory/PLAN.md` + - `M memory/SPEC.md` + - `D memory/REFACTOR.md` + - `D spike/observer-fidelity.ts` + - `D spike/raw-sdk-tool-use.ts` + - `?? HANDOFF.md` +- **Test status**: last known full gate `npm run verify` passed on commit `d2189fd` before the docs/spike cleanup; no code-bearing runtime paths changed afterward. + +## Artifact status + +| Artifact | Exists | Current vs conversation | +| ---------- | ------ | ----------------------- | +| memory/SPEC.md | yes | current | +| memory/PLAN.md | yes | current | +| memory/REFACTOR.md | no | n/a | + +## Next steps + +1. Run `ln-review` on the phase-close / design-mode seam now that the full refactor sequence is complete. +2. If review is clean, use `ln-scope` to pick the next planned slice — most likely `9` (requirements-review mode), unless priority shifts toward `11a` (dashboard workflow state). +3. Commit the current doc/spike cleanup if the user wants this hygiene pass preserved as its own checkpoint. + +## Open questions + +- Should the historical note in `docs/design/ln-skills-review-after-alignment.md` that mentions `memory/REFACTOR.md` be updated, or is it acceptable as archival design commentary? +- Is the next best move `ln-review` first, or should the team jump directly to scoping slice `9`? +- Does the team want to keep `spike/filesystem-tools.ts` indefinitely as evidence for `A29`, or should that evidence eventually migrate into a more durable docs location? + +## Resume prompt + +Paste this into a new session: + +> Read `HANDOFF.md` in the workspace root for this work area. It contains the full state of in-progress work. +> The immediate next step is: run `ln-review` on the phase-close / design-mode seam. +> Start by reviewing the sync report and repo hygiene verdict in the In-flight section, then inspect the dirty files before deciding whether to commit the cleanup or continue into review. diff --git a/docs/design/DESIGN_SCRATCH.md b/docs/design/DESIGN_SCRATCH.md index 5560c389..674b41ea 100644 --- a/docs/design/DESIGN_SCRATCH.md +++ b/docs/design/DESIGN_SCRATCH.md @@ -1,21 +1,34 @@ ### slice and scope mis-matches VS saipo report -- Phase closure/hand-off "force" -- Assistant exploring existing codebase as context -- Some way of getting output from the tool to throw at a coding assistant (e.g. files or whatever) - - ^^ that gets us to the point where it can be experimented with as a way of working. -- Then make it look like the designs (ralph loop?) +- [ ] Exploring existing codebase as kickoff/framing input +- [x] Phase closure/hand-off "force" +- [ ] Output in a format that is ready for a coding assistant (e.g. files or whatever) + - (need to get to the point where it can be experimented with as a way of working.) +- [ ] Make it look like the designs (ralph loop?) - maybe this is something you could kick off overnight one day, pointing Claude to some Figma screens? +- [ ] Stats/dashboard views for results + - when generated, when last editited; title; version? + - "completeness + - verification coverage: how many of the requirements are covered by acceptance criteria w verifications +- [ ] "Scope" of such a spec must ultimately be more flexible -- we naively assume single project scope, and total/ultimate completion scope + +## some early design sketches of the main flow (terminology and data model out of date; but UI style and layout relevant) +- the very beginning of a new project. The first image is the standard starting UI; below it is an alternate ideation, for the use-case where the kickoff is done by analyzing and harvesting from an existing codebase + + ![](assets/kickoff-screen.png) +- the first question of the first phase. a compact nav on left (alternate: across the top) indicates the phases; the wide sidebar on the right shows the (approximately) phase-based collection containers for the data we expect (the labels and terminology are wrong here). NOTE also that the question model is still wrong in these earlier designs, where the options provided are exclusive, and there is no free-text option. There is also the chance to "skip question" which I am dubious about, I would prefer the user explicitly why they are skipping (e.g. "i'm not sure enough about this yet"), as part of the free-text -- This is pretty close to your existing layout (albeit your left-hand is a chat + inline questions rather than just questions) -- This plus adjacent demonstrate a left-hand sidebar where the phase/focus navigation could be -- This could be used as the ‘initial empty state’ for the chat window -- We’ll want to think about shifting from a ‘chat-first’ UI mode to a ‘browse spec, with chat on the side to discuss amendments’ mode once a user has created the initial spec – this kind of thing. The idea here being that user could browse requirements/assumptions etc in the main view, and clicking on them would automatically add them as context to the chat -- Then probably the whole idea of linkages between requirements, assumptions etc? + ![](assets/first-question.png) +- what the main interview roughly looks like, as it develops. the right sidebar is being filled in, so we are further along at thit point. -Welcome any thoughts -- we naively assume single project scope, and total/ultimate completion scope + ![](assets/main-interview.png) +- a "review" style step, such as we've been imagining for the requirements and the (acceptance) criteria. each of these might want to be expandable to show in more detail what the requirement is really about + + ![](assets/reqs-minimal.png) +- a sketch for what the final spec overview might look like + + ![](assets/spec-overview.png) ### "kickoff" (scope) phase needs two path variants diff --git a/docs/design/assets/bottom-bar-example.png b/docs/design/assets/bottom-bar-example.png new file mode 100644 index 00000000..11985af2 Binary files /dev/null and b/docs/design/assets/bottom-bar-example.png differ diff --git a/docs/design/assets/first-question.png b/docs/design/assets/first-question.png new file mode 100644 index 00000000..d5204592 Binary files /dev/null and b/docs/design/assets/first-question.png differ diff --git a/docs/design/assets/kickoff-screen.png b/docs/design/assets/kickoff-screen.png new file mode 100644 index 00000000..dbae6f1c Binary files /dev/null and b/docs/design/assets/kickoff-screen.png differ diff --git a/docs/design/assets/knowledge-graph.png b/docs/design/assets/knowledge-graph.png new file mode 100644 index 00000000..35f7c684 Binary files /dev/null and b/docs/design/assets/knowledge-graph.png differ diff --git a/docs/design/assets/main-interview.png b/docs/design/assets/main-interview.png new file mode 100644 index 00000000..c0214a2f Binary files /dev/null and b/docs/design/assets/main-interview.png differ diff --git a/docs/design/assets/reqs-detailed.png b/docs/design/assets/reqs-detailed.png new file mode 100644 index 00000000..0e31b9fe Binary files /dev/null and b/docs/design/assets/reqs-detailed.png differ diff --git a/docs/design/assets/reqs-minimal.png b/docs/design/assets/reqs-minimal.png new file mode 100644 index 00000000..bd8c38f4 Binary files /dev/null and b/docs/design/assets/reqs-minimal.png differ diff --git a/docs/design/assets/reqs-overview.png b/docs/design/assets/reqs-overview.png new file mode 100644 index 00000000..fe976c9a Binary files /dev/null and b/docs/design/assets/reqs-overview.png differ diff --git a/docs/design/assets/requirements-approval.png b/docs/design/assets/requirements-approval.png new file mode 100644 index 00000000..c6f7dcd1 Binary files /dev/null and b/docs/design/assets/requirements-approval.png differ diff --git a/docs/design/assets/secondary-chat.png b/docs/design/assets/secondary-chat.png new file mode 100644 index 00000000..5b8b33c2 Binary files /dev/null and b/docs/design/assets/secondary-chat.png differ diff --git a/docs/design/assets/spec-overview.png b/docs/design/assets/spec-overview.png new file mode 100644 index 00000000..2fbb10b7 Binary files /dev/null and b/docs/design/assets/spec-overview.png differ diff --git a/docs/design/image-notes.md b/docs/design/image-notes.md new file mode 100644 index 00000000..72901fbf --- /dev/null +++ b/docs/design/image-notes.md @@ -0,0 +1,10 @@ +## other examples + +- an example of a bottom bar with a button: this bottom bar is a good ui pattern for the phase status and "force closure" workflow + + ![](assets/bottom-bar-example.png) +- ![](assets/knowledge-graph.png) +- ![](assets/secondary-chat.png) + ![](assets/reqs-detailed.png) + ![](assets/requirements-approval.png) + ![](assets/reqs-overview.png) diff --git a/drizzle/0006_phase_outcome_closure_basis.sql b/drizzle/0006_phase_outcome_closure_basis.sql new file mode 100644 index 00000000..de29fa29 --- /dev/null +++ b/drizzle/0006_phase_outcome_closure_basis.sql @@ -0,0 +1 @@ +ALTER TABLE `phase_outcome` ADD `closure_basis` text; diff --git a/drizzle/meta/_journal.json b/drizzle/meta/_journal.json index 72ab43c4..398d276c 100644 --- a/drizzle/meta/_journal.json +++ b/drizzle/meta/_journal.json @@ -43,6 +43,13 @@ "when": 1775710000000, "tag": "0005_generic_commitment_graph", "breakpoints": true + }, + { + "idx": 6, + "version": "7", + "when": 1775715000000, + "tag": "0006_phase_outcome_closure_basis", + "breakpoints": true } ] } \ No newline at end of file diff --git a/memory/PLAN.md b/memory/PLAN.md index 18208f7f..8c4932d4 100644 --- a/memory/PLAN.md +++ b/memory/PLAN.md @@ -41,7 +41,7 @@ 4c. **UI foundation: shadcn/ui + Tailwind 4 + AI Elements** `FE-558` `done` 5. **Observer agent + entity persistence** `FE-537` `done` — I20, I21, I22 6. **Entity sidebar (read-only)** `FE-538` `done` — I23 -6b. **AI SDK-native chat pivot** `FE-559` `done` — I21↑, I22↑, I23↑; core tools spike proven (A29) +6b. **AI SDK-native chat pivot** `FE-559` `done` — I21↑, I22↑, I23↑; core tools spike proven 6b1. **Workspace seam characterization oracle** `done` — I24, I25 - Purpose: add a client integration harness around the interview workspace before the state-ownership refactor - Coverage: initial hydration from persisted turns, same-project refresh stability, observer-result sidebar reactivity, option-selection follow-through @@ -149,7 +149,7 @@ 7b. **Canonical knowledge model foundation + cutover seam** — Introduce canonical `goal` / `term` / `context` kinds, unify durable knowledge storage and cross-kind edges behind the generic seam, and preserve slice 7 coherence through the smallest necessary compatibility projection rather than migration-hardening legacy scratch data. `done` - Requirements: → SPEC.md §Requirements #5, #6, #7, #11, #12, #13 - - Assumptions: → SPEC.md §Assumptions A14, A40, A41 + - Assumptions: → SPEC.md §Assumptions A14, A40 - Decisions: → SPEC.md §Decisions D5, D17, D49, D51, D52, D53, D54, D59, D61, D62, D63, D67, D68, D69 - Candidate invariant goals: canonical knowledge writes/readiness coexist with scope closure during cutover; no new Phase 5/6 slice depends on durable `framing` - Invariants to respect: → SPEC.md §Invariants I20, I21, I23, I68, I72 @@ -161,14 +161,20 @@ - `7b.1` Canonical scope kinds through the generic seam. `done` - `7b.2` Generic edge/storage cutover + scope-readiness compatibility projection beyond legacy decision/assumption tables. `done` -8. **Design mode (commitment / exploration)** — Implement the second workflow mode on the new turn and canonical knowledge model after 7b lands, while generalizing the current scope-only proposal/confirmation seam into a shared phase-closing model with deterministic closeability, coarse readiness bands, and explicit closure basis. The interviewer walks design forks; the observer captures decisions, assumptions, new constraints, and emerging requirements against the unified knowledge seam. `not-started` +8. **Design mode (commitment / exploration)** — Implement the second workflow mode on the new turn and canonical knowledge model after 7b lands, while generalizing the current scope-only proposal/confirmation seam into a shared phase-closing model with deterministic closeability, coarse readiness bands, and explicit closure basis. The interviewer walks design forks; the observer captures decisions, assumptions, new constraints, and emerging requirements against the unified knowledge seam. `in-progress` - Requirements: → SPEC.md §Requirements #2, #3, #5, #6, #7, #8 - Assumptions: → SPEC.md §Assumptions A14, A15, A28, A40 - - Decisions: → SPEC.md §Decisions D2, D5, D6, D61, D62, D65, D66, D67, D68, D70 + - Decisions: → SPEC.md §Decisions D2, D5, D6, D61, D62, D65, D66, D67, D68, D70, D71, D72, D73, D74, D75 - Candidate invariant goals: mode transition preserves interview continuity; design-mode turns produce coherent decision/assumption graph growth on the canonical knowledge seam; phase-closing state separates status, closeability, readiness, and closure basis instead of hidden interviewer authority - Invariants to respect: → SPEC.md §Invariants I18, I19, I21, I22, I72, I73 + - Invariants established: → SPEC.md §Invariants I79, I80, I81, I82, I83, I84, I85, I86 - Acceptance: after scope closes and slice 7b lands, the interview enters design mode; design turns yield coherent commitments and assumptions on the canonical knowledge layer; the UI projects design status/closeability/readiness; and once the minimum bar is met the user can either accept an interviewer-recommended close or force-close design with persisted closure basis/readiness snapshot + - **Observed current state (2026-04-09, tracer bullets 8.1–8.3 + completed phase-close refactor):** confirmed scope closure now projects through a shared workflow state carrying `status`, `closeability`, `readiness`, `closureBasis`, and pending-proposal visibility instead of the old scope-only `open/proposed/confirmed` seam. The next prepared turn after confirmed scope closure now enters `design` automatically, the observer runs against that design turn phase, and the workspace header renders shared workflow summaries for closed scope plus the newly active design phase rather than hardcoding scope-only status copy. Design now also uses the same typed `data-phase-summary` closure seam as scope: the design interviewer can recommend closure, the workflow projects a pending design summary through the shared phase state, confirmation persists design closure, and the next prepared turn enters `requirements`. That same typed confirmation seam now also carries a user-forced design close with visible `closureBasis: user_forced`, so forced-close debt survives refresh/resume and still hands the next prepared turn into `requirements`. The completed phase-close refactor also hardened the seam end to end: close intent moved into explicit shared phase-close commands, force-close availability now projects from one shared workflow-policy seam consumed by both UI and server validation, confirmed `phase_outcome` rows persist durable `closure_basis`, read-side workflow projection trusts that durable phase-outcome field instead of reconstructing provenance from confirmation-turn payloads, `data-confirmation` is now an explicit discriminated command union consumed consistently by the workspace controller and `/chat` request handling, and the remaining user-visible close-command labels, rejection copy, and forced-close summary text now also project from the shared phase-close module instead of being rebuilt inline across layers. - **Verification approach**: inner — mode-transition/controller/workflow-state projection tests. Outer — manual design walkthrough covering interviewer-recommended close, user-forced close, and visible carried-debt caveats. + - Tracer bullets: + - `8.1` Design-mode entry + shared workflow-state projection. `done` + - `8.2` Design-phase closure proposal + requirements handoff. `done` + - `8.3` User-forced design close + carried-debt projection. `done` 9. **Requirements-review mode** — Synthesize the requirement set from the full canonical knowledge layer, then let the user approve, edit, merge, reject, and add requirements through review turns using the shared phase-closing seam rather than a requirements-only completion bit. This slice assumes the redesigned ontology/graph from 7a + 7b, not the current transitional `framing` seam. `not-started` - Requirements: → SPEC.md §Requirements #6, #7, #8, #11, #13 diff --git a/memory/SPEC.md b/memory/SPEC.md index 5c9508b4..4aa0404c 100644 --- a/memory/SPEC.md +++ b/memory/SPEC.md @@ -22,7 +22,7 @@ The architecture (layered: db → core → adapters): - **Database**: SQLite via Drizzle ORM + `better-sqlite3` — TypeScript schema is single source of truth for types, DDL, and migrations. Auto-applies at startup. - **Core**: Interface-agnostic service layer — turn tree operations, project-state loading, typed prompt/context building, knowledge-item lifecycle, observer invocation, phase management, readiness management, and export. No transport knowledge. -- **Agent engine**: AI SDK + Anthropic provider (`ai`, `@ai-sdk/anthropic`) — `ToolLoopAgent` powers the interviewer and `generateObject` powers the observer. Shared `BrunchUIMessage` / data-part contracts span request validation, persistence, server streaming, and client hydration. Future multi-step hardening builds on the AI SDK loop surface rather than a handwritten raw-event translator. (D30, D31) +- **Agent engine**: AI SDK + Anthropic provider (`ai`, `@ai-sdk/anthropic`) — `ToolLoopAgent` powers the interviewer and `generateObject` powers the observer. Shared `BrunchUIMessage` / data-part contracts span request validation, persistence, server streaming, and client hydration. Future multi-step hardening builds on the AI SDK loop surface rather than a handwritten raw-event translator. (D30) - **Observer agent**: Separate extraction call after each turn — captures typed knowledge items plus dependency / derivation edges using a phase-aware extraction policy. Invoked by core after turn completion. - **Web adapter**: Express.js returns AI SDK UI Message Stream SSE directly via `createUIMessageStream`. React + Vite + `@ai-sdk/react` `useChat` client consume the same typed message contract. - **CLI adapter**: (future) Terminal I/O consuming the same `DomainEvent` stream @@ -79,7 +79,14 @@ Detailed schema and mode-model rationale: `docs/design/INTERVIEW_MODE_MODEL.md`. + assumption would preserve a process-local question after the structural choice landed. + Pruned 2026-04-09: removed A42, A43 — temporary phase-close refactor cutover assumptions. + Both were validated and are now embedded in D73, D75 and invariants I85, I86 rather than + remaining live planning questions. + Pruned 2026-04-09: removed A29, A41 — validated implementation-era cutover assumptions now + embedded in shipped architecture rather than live planning questions. A29 is embodied in the + shared core-tools boundary and horizon note; A41 is embodied in the completed 7b cutover and + no longer constrains future work. --> | # | Assumption | Confidence | Dependent decisions | Implicated slices | Validation approach | | --- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------------- | --------------------------------- | ----------------------------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | @@ -88,23 +95,24 @@ Detailed schema and mode-model rationale: `docs/design/INTERVIEW_MODE_MODEL.md`. | A6 | Turn-tree branching in SQLite is sufficient for decision revisit and undo in a single-user tool | high | D7 | Turn tree, Branching | Validate with realistic branch/merge scenarios | | A7 | Users arriving at the tool have a reasonably defined goal | medium | — | Scope phase | User testing; characterization kickoff mode mitigates if false | | A14 | A second-thread observer agent can reliably extract typed knowledge items and graph edges from a turn plus accumulated context | **validated** | D4, D5, D13, D52, D53, D54, D55, D56, D61, D62 | Observer agent, Knowledge layer | Validated structurally for the current widened ontology — canonical scope-mode `goal` / `term` / `context` / `constraint`, design-mode mixed scope-kind + generic `decision` / `assumption` deltas, requirements-mode `requirement` extraction, and criteria-mode `criterion` extraction — through the observer seam: `observer.test.ts`, `context.test.ts`, `parts.test.ts`, `app.test.ts`, and `InterviewWorkspace.test.tsx` prove widened observer contracts, generic persistence, in-band sync, and workspace refresh. Live-model discriminability for the sharper canonical scope kinds remains an outer-loop concern tracked separately in A40. | -| A15 | The LLM can produce a useful coarse readiness estimate and closure recommendation, but phase closure authority must not depend solely on that judgment | medium | D3, D65, D66, D70 | Phase resolution, readiness projection | Probe across varied project types; compare model readiness bands and closure recommendations against user closes / forced closes, measuring disagreement, false-positive close recommendations, and override frequency | +| A15 | The LLM can produce a useful coarse readiness estimate and closure recommendation, but phase closure authority must not depend solely on that judgment | medium | D3, D65, D66, D70 | Phase resolution, readiness projection | Partially validated structurally: slices 8.2 and 8.3 now prove the shared phase-closing seam supports both interviewer-recommended design closure and user-forced design closure, with persisted closure basis surviving reload and handoff into requirements (`db.test.ts`, `core.test.ts`, `app.test.ts`, `InterviewWorkspace.test.tsx`). Remaining validation still depends on outer-loop comparison of model recommendations vs user overrides across varied project types. | | A16 | AI SDK `useChat` hook's `ToolUIPart` state machine models all permutations of pending, error, and success for tool calls | high | D14, D58 | Rich chat UI, pending-question projection | Partially validated: typed `tool-ask_question` parts render with correct state labels, streamed ask-question output now projects into a dedicated pending-question turn-card state without fabricating persisted turns in `workspace-data.test.ts`, `workspace-controller.test.tsx`, and `InterviewWorkspace.test.tsx`, and manual browser verification confirmed the pending-question card now appears without refresh. | | A20 | Observer results can be delivered as typed data parts on the existing chat stream without holding the connection open unacceptably long | high | D22 | Observer agent, Entity sidebar | Measure observer latency with `generateObject`; if >5s, fall back to out-of-band SSE | | A21 | `useChat` `onData` callback reliably bridges to `queryClient.invalidateQueries` without stale-closure issues | **validated** | D22 | Entity sidebar | Validated: `InterviewWorkspace.test.tsx` covers `data-observer-result` → query invalidation → sidebar refresh, plus manual outer-loop verification remains for live browser/runtime behavior. | -| A28 | AI SDK `ToolLoopAgent` with `stopWhen: stepCountIs(N)` is sufficient for brunch's multi-step interviewing, review, and phase-transition needs — no custom agent loop required | high | D31 | Agent loop, Phase transitions | Validate with mode-transition and review slices: agent must ask, synthesize, and propose closure without a handwritten loop. | -| A29 | Models can reliably compose generic filesystem tools (read, write, edit, bash, grep, find, ls) to explore and characterize an existing project | **validated** | D32 | Characterization kickoff | Validated (spike): `ToolLoopAgent` with 7 core tools explored brunch in 22 tool calls across 23 steps. See `spike/filesystem-tools.ts`. | +| A28 | AI SDK `ToolLoopAgent` with `stopWhen: stepCountIs(N)` is sufficient for brunch's multi-step interviewing, review, and phase-transition needs — no custom agent loop required | high | D30 | Agent loop, Phase transitions | Partially validated structurally: slices 8.1–8.3 now prove confirmed scope closure can hand off into a design-phase interviewer/observer turn, that design can recommend closure and hand off into requirements, and that a user-forced design close can bypass a recommendation without a handwritten loop (`interview.test.ts`, `db.test.ts`, `core.test.ts`, `app.test.ts`). Remaining proof for later review/closure slices still depends on the downstream phase-transition work. | | A33 | Structured turn responses can replace today's single-select flow while keeping persisted response parts, transcript hydration, and downstream context projection aligned for the first thin slice | **validated** | D23, D24, D25, D45, D46, D47, D48, D57, D60 | 6d flexible turn-response model; Phase 4 response-seam refactor | Validated: `parts.test.ts`, `app.test.ts`, `context.test.ts`, `observer.test.ts`, `turn-response.test.ts`, `workspace-data.test.ts`, and `InterviewWorkspace.test.tsx` now prove zero/one/many selected options plus optional free-text persist, rehydrate, and reach interviewer history, observer context, and workspace turn-card state coherently through the shared turn-response seam rather than selected-option flags. | | A40 | The observer and review workspace can discriminate `goal`, `term`, and `context` well enough for a first canonical-scope implementation if low-confidence cases stay reviewable instead of collapsing back into `framing` | medium | D5, D67, D68, D69 | 7b canonical knowledge model foundation; 8 design mode; 9 requirements-review mode; 12 knowledge workspace | Validate with fixture probes and the first canonical-scope review flow: measure confusion between `goal` / `term` / `context`, then confirm that workspace normalization/editing can correct low-confidence captures without losing provenance or blocking downstream review. | -| A41 | Existing SQLite interview data is disposable enough that the canonical-knowledge cutover can prefer destructive schema/table replacement over migration-hardening, as long as the new end-to-end logic is coherent and complete | **validated** | D61, D67, D68, D69 | 7b canonical knowledge model foundation | Validated structurally by slices 7b.1 and 7b.2: the clean-DB path now passes through shared registry → observer-result schema → generic persistence for all active knowledge kinds, including decision/assumption commitments and dependency edges, with compatibility projection tests instead of migration-specific bridge logic. | ## Decisions -30. **Vercel AI SDK replaces both Claude Agent SDK and raw Anthropic SDK** — `@ai-sdk/anthropic` provider with AI SDK primitives: `ToolLoopAgent` powers the interviewer (typed tools via `tool()` with Zod schemas, multi-step loop via `stopWhen`), `generateObject` powers the observer (structured extraction with Zod schema, no JSON parsing), `createUIMessageStream` + `pipeUIMessageStreamToResponse` handle server-side streaming, `validateUIMessages` validates incoming chat payloads. No hand-written stream translator, no DomainEvent layer on the web path. The `@anthropic-ai/sdk` package remains as a transitive dependency only. Depends on: —. Supersedes: Claude Agent SDK, raw Anthropic SDK approach, D27 (generator composition), D28 (outputFormat), D29 (ResultMessage metrics), custom agent loop plan (old D31). + -31. **`ToolLoopAgent` as the agent loop** — AI SDK's built-in `ToolLoopAgent` provides the tool execution loop: model calls tool → SDK validates input via Zod → executes handler → re-submits result → repeats until `stopWhen` condition or `end_turn`. No custom `agentLoop()` function needed. `activeTools` and `prepareCall` enable per-step tool gating for future phase-specific behavior. Depends on: D30. Supersedes: planned custom agent loop modeled after pi-mono. +30. **Vercel AI SDK replaces both Claude Agent SDK and raw Anthropic SDK** — `@ai-sdk/anthropic` provider with AI SDK primitives: `ToolLoopAgent` powers the interviewer (typed tools via `tool()` with Zod schemas, multi-step loop via `stopWhen`), `generateObject` powers the observer (structured extraction with Zod schema, no JSON parsing), `createUIMessageStream` + `pipeUIMessageStreamToResponse` handle server-side streaming, `validateUIMessages` validates incoming chat payloads. No hand-written stream translator, no DomainEvent layer on the web path. The `@anthropic-ai/sdk` package remains as a transitive dependency only. Depends on: —. Supersedes: Claude Agent SDK, raw Anthropic SDK approach, D27 (generator composition), D28 (outputFormat), D29 (ResultMessage metrics), custom agent loop plan (old D31). -32. **Core filesystem tools following pi-mono pattern** — 7 generic tools (read, write, edit, bash, grep, find, ls) in `src/server/tools/`, each a factory function returning an AI SDK `tool()` bound to a working directory. Tools are thin wrappers around Node.js fs APIs and shell commands (rg, fd), with truncation limits (500 lines / 64KB) following pi-mono's defaults. Composed via `createCoreTools(cwd)`. First use case: project characterization kickoff mode. Depends on: D30, A29. Supersedes: —. +32. **Core filesystem tools following pi-mono pattern** — 7 generic tools (read, write, edit, bash, grep, find, ls) in `src/server/tools/`, each a factory function returning an AI SDK `tool()` bound to a working directory. Tools are thin wrappers around Node.js fs APIs and shell commands (rg, fd), with truncation limits (500 lines / 64KB) following pi-mono's defaults. Composed via `createCoreTools(cwd)`. First use case: project characterization kickoff mode. Depends on: D30. Supersedes: —. 33. **Component-level workspace oracle before state refactors** — The interview workspace has a client integration harness (`InterviewWorkspace.test.tsx`) that uses the real React Query cache and component tree while mocking `useChat` transport boundaries. It locks four seam behaviors before state-ownership refactors: initial hydration from persisted turns, same-project refresh preserving local chat state, `data-observer-result` invalidating entities into the sidebar, and option selection flowing through route refresh and chat submission. Depends on: D19, D22. Supersedes: manual-only workspace seam verification. @@ -172,7 +180,7 @@ Detailed schema and mode-model rationale: `docs/design/INTERVIEW_MODE_MODEL.md`. 65. **Explicit phase outcomes persist closure provenance, while workflow state projects status, closeability, and readiness from the active path** — A phase outcome should persist as a durable `phase_outcome` row with proposal-turn provenance, closure-turn linkage, summary text, closure basis, and a captured readiness-band snapshot at the moment of close rather than hiding workflow state behind `turn.is_resolution` or transient UI state. The current workflow state for each phase should project (a) lifecycle status (`unstarted` / `in_progress` / `closed` / `invalidated`), (b) deterministic phase-specific closeability, and (c) current readiness band from the active path instead of collapsing them into one opaque completion bit. Depends on: D3, D17, D62. Supersedes: D65-old workflow-state projection that exposed only proposal / confirmation state. -66. **Phase closing uses one transcript-friendly seam whether the interviewer recommends it or the user forces it** — The interviewer may either ask another structured question or emit a closure recommendation with a typed `data-phase-summary` artifact. Once the phase-specific closeability rule is satisfied, the user may either accept that recommendation or force-close the phase anyway; both paths should persist through the same chat-friendly seam with explicit closure basis so downstream UI and export can distinguish normal close from carried debt. Depends on: D24, D31, D62, D65. Supersedes: D66-old confirmation-only closure flow in which only the interviewer could initiate transition. +66. **Phase closing uses one transcript-friendly seam whether the interviewer recommends it or the user forces it** — The interviewer may either ask another structured question or emit a closure recommendation with a typed `data-phase-summary` artifact. Once the phase-specific closeability rule is satisfied, the user may either accept that recommendation or force-close the phase anyway; both paths should persist through the same chat-friendly seam with explicit closure basis so downstream UI and export can distinguish normal close from carried debt. Depends on: D24, D30, D62, D65. Supersedes: D66-old confirmation-only closure flow in which only the interviewer could initiate transition. 67. **Scope closure stays anchored to a projected scope bundle, not a `framing` bucket** — Slice 7’s phase-outcome architecture remains valid if scope readiness reads from a projection over canonical `goal` / `term` / `context` / `constraint` items plus any unmigrated legacy `framing` rows during migration, rather than binding closure semantics to one transitional storage kind. Depends on: D3, D5, D61, D62, D65. Supersedes: coupling scope closure semantics to the durability of the current `framing` label. @@ -182,6 +190,16 @@ Detailed schema and mode-model rationale: `docs/design/INTERVIEW_MODE_MODEL.md`. 70. **First-pass closeability rules are deterministic, phase-specific, and intentionally minimal** — Closeability should start as an existential minimum bar the frontend can explain: a phase must have at least one phase-relevant captured signal before the user is allowed to close it. Readiness remains a separate descriptive band that may still be low when the user closes, which avoids both hidden model authority and false-precision percentage gates. Depends on: D65, D66. Supersedes: interviewer-only implicit closure thresholds. +71. **The active interviewer phase is the first workflow phase on the active path without a closed outcome** — Once a phase is confirmed closed on the active path, subsequent turns should automatically enter the next workflow mode rather than inheriting the previous turn's phase. The shared workflow projection remains the source of truth for both server-side turn preparation and client-side phase display, so scope closure hands off directly into design mode before any design-specific closure mechanics exist. Depends on: D65, D66, D70. Supersedes: defaulting new turns to `scope` and hardcoding scope-only workflow display. + +72. **User-forced phase closes reuse the typed confirmation seam instead of a separate mutation path** — Interviewer-recommended closes and user-forced closes should both travel through the same `data-confirmation` seam, now as explicit command variants rather than optional-field payloads. A recommendation acceptance carries `{ kind: 'confirm-proposed-phase-closure', proposalTurnId, phase }`; a user-forced close carries `{ kind: 'force-close-active-phase', phase }`. This preserves one chat-native close entrypoint without introducing a separate mutation path or UI-only state channel. Depends on: D66, D71. Supersedes: adding a dedicated force-close mutation path separate from proposal confirmation. + +73. **`data-confirmation` now carries an explicit discriminated phase-close command union end to end** — The chat transport for phase closing should make intent explicit at the schema boundary: `confirm-proposed-phase-closure` and `force-close-active-phase` are the payload variants, not an interpretation layered over optional fields. Shared builders, request validation, app routing, and persistence should all consume that same discriminated command shape so invalid mixed states are unrepresentable. Depends on: D66, D72. Supersedes: ad hoc optional-field branching over `data-confirmation` payloads. + +74. **Force-close availability projects from workflow truth through one shared policy seam** — The rule for whether a phase may be force-closed should be derived once from workflow state rather than re-encoded separately in UI predicates and server guards. In the current slice that shared projection remains intentionally narrow — design only, active phase only, closeable, and no pending proposal — but both affordance rendering and request validation should read the same policy and preserve the current rejection semantics. Depends on: D65, D66, D73. Supersedes: duplicated force-close availability checks across route and server layers. + +75. **Workflow projection reads closure provenance only from durable phase outcomes** — Confirmed `phase_outcome` rows should store durable `closure_basis` directly at write time for both interviewer-recommended and user-forced closes, and workflow projection should trust that durable field as the sole provenance source. If a confirmed outcome lacks `closure_basis`, projection should surface `closureBasis: null` rather than reconstructing provenance from confirmation-turn payloads. Depends on: D65, D72, D73, D74. Supersedes: transcript-driven closure-basis recovery during workflow projection. + 26. **`md-pen` for programmatic markdown rendering** — Structured data (entity tables, dependency graphs, checklists) rendered to markdown via `md-pen` rather than hand-rolled string concatenation. Pure string-return functions (`table()`, `taskList()`, `mermaid()`, `heading()`, `alert()`, `details()`) compose by nesting — no AST, no intermediate representation. Escaping is context-aware per function (table cells, URLs, code fences), eliminating a class of bugs when rendering user-supplied text from interviews. Primary use cases: (1) observer context builders presenting growing entity graphs to agents (`table()` for decisions/assumptions with metadata, `taskList()` for reviewed/unreviewed items), (2) spec export rendering active-path entities into downloadable markdown (slice 13), (3) any future agent-facing or user-facing projection of structured data. Zero dependencies, ESM-only, TypeScript-first. Depends on: —. Supersedes: hand-rolled string assembly in context builders. ### Domain model @@ -243,7 +261,7 @@ Detailed schema and mode-model rationale: `docs/design/INTERVIEW_MODE_MODEL.md`. | I19 | Context builder equivalence | Slice 4a (parts persistence) | context.test.ts (9 tests) | D25 | | I20 | Entity persistence with turn linkage | Slice 5 (observer) | db.test.ts (7 tests), observer.test.ts | D4, D5 | | I21 | Observer-result in-band sync | Slice 5 (observer) | observer.test.ts, app.test.ts | D22 | -| I22 | AI SDK-native interviewer path | Slice 6b (AI SDK pivot) | app.test.ts, interview.test.ts | D30, D31 | +| I22 | AI SDK-native interviewer path | Slice 6b (AI SDK pivot) | app.test.ts, interview.test.ts | D30 | | I23 | Entity sidebar reactive update | Slice 6 (sidebar) | app.test.ts, manual (outer loop) | D22 | | I24 | Workspace hydration boundary stability | Slice 6b1 (workspace oracle) | InterviewWorkspace.test.tsx | D19, D22 | | I25 | Workspace event bridge correctness | Slice 6b1 (workspace oracle) | InterviewWorkspace.test.tsx | D9, D22 | @@ -300,6 +318,14 @@ Detailed schema and mode-model rationale: `docs/design/INTERVIEW_MODE_MODEL.md`. | I71 | Workspace turn-card selected-option rendering rehydrates from persisted `data-turn-response` option IDs, so saved replies stay visibly selected even when durable option flags are false | Post-refactor cleanup slice (workspace durable turn-response seam) | workspace-data.test.ts, InterviewWorkspace.test.tsx | D39, D45, D48, D60 | | I72 | Explicit scope phase outcomes persist proposal/confirmation state in a dedicated readiness seam, project current workflow status from the active path, and supersede when the proposal turn leaves that path | Slice 7 (explicit phase outcomes + scope closure) | db.test.ts, app.test.ts | D3, D17, D65 | | I73 | Workspace view-state can project a typed `data-phase-summary` closure proposal into a confirmable card and submit typed `data-confirmation` parts through the chat seam without reopening the normal prompt | Slice 7 (explicit phase outcomes + scope closure) | workspace-data.test.ts, InterviewWorkspace.test.tsx | D24, D66 | +| I79 | Workflow projection now exposes shared `status`, `closeability`, `readiness`, and `closureBasis` fields across a closed scope phase and the newly active design phase instead of the old scope-only `open/proposed/confirmed` seam | Slice 8.1 (design-mode entry + shared workflow projection) | db.test.ts, app.test.ts | D65, D70 | +| I80 | After confirmed scope closure, the next prepared turn and workspace both enter design mode by reading the shared workflow projection rather than defaulting new turns and UI state back to scope | Slice 8.1 (design-mode entry + shared workflow projection) | core.test.ts, app.test.ts, InterviewWorkspace.test.tsx | D66, D71 | +| I81 | Scope and design now share one typed phase-summary closure seam, so a design closure proposal can persist, project `proposalPending`, confirm through chat, and advance the next active turn into requirements | Slice 8.2 (design-phase closure proposal + requirements handoff) | interview.test.ts, core.test.ts, app.test.ts | D66, D71 | +| I82 | Typed `data-confirmation` parts now carry either interviewer-recommended proposal confirmation or a user-forced design close, and workflow projection recovers the forced-close basis from persisted confirmation turns so requirements handoff survives reload through the same chat seam | Slice 8.3 (user-forced design close + carried-debt projection) | parts.test.ts, db.test.ts, core.test.ts, app.test.ts, InterviewWorkspace.test.tsx | D66, D72 | +| I83 | `data-confirmation` now carries an explicit discriminated phase-close command union, and workspace/controller/request seams consume that command shape end to end for both interviewer-recommended and user-forced closes | Refactor commit 5 (explicit close-command transport) | phase-close.test.ts, parts.test.ts, app.test.ts, InterviewWorkspace.test.tsx | D73 | +| I84 | Force-close availability now projects once from shared workflow policy, and both workspace affordance rendering and server-side validation consume that projection while preserving the current rejection semantics | Refactor commit 2 (shared force-close action projection) | phase-close.test.ts, app.test.ts, InterviewWorkspace.test.tsx | D74 | +| I85 | Workflow projection reads closure provenance only from durable `phase_outcome.closure_basis`, so confirmed outcomes with missing durable provenance project `closureBasis: null` instead of re-reading confirmation-turn payloads | Refactor commit 4 (workflow projection closure-basis cutover) | db.test.ts, app.test.ts | D75 | +| I86 | Phase-close command labels, force-close rejection messages, and user-forced close summaries derive from shared `phase-close` helpers, so UI and server layers no longer reconstruct those semantics inline | Refactor commit 6 (phase-close module deepening) | phase-close.test.ts, app.test.ts, InterviewWorkspace.test.tsx | D73, D74 | ## Lexicon @@ -365,7 +391,7 @@ Detailed schema and mode-model rationale: `docs/design/INTERVIEW_MODE_MODEL.md`. | **in-band sync** | Observer knowledge updates delivered as typed data parts on the existing chat SSE stream. Default mechanism — zero additional infrastructure (D22). | | **out-of-band sync** | Observer knowledge updates delivered via a dedicated `EventSource` SSE channel (`/api/events/:projectId`). Fallback mechanism if observer becomes async (D22). | | **cache invalidation** | Signaling TanStack Query that cached data is stale. In the current web path, `useChat` `onData` invalidates the entity query from observer results, while route invalidation refreshes durable project state on stream completion. | -| **ToolLoopAgent** | AI SDK's built-in agent class that manages the model → tool-call → execute → re-submit loop. Powers the interviewer. Configured with `tools`, `stopWhen`, `providerOptions`. Methods: `generate()` (non-streaming), `stream()` (streaming). See D31. | +| **ToolLoopAgent** | AI SDK's built-in agent class that manages the model → tool-call → execute → re-submit loop. Powers the interviewer. Configured with `tools`, `stopWhen`, `providerOptions`. Methods: `generate()` (non-streaming), `stream()` (streaming). See D30. | | **generateObject** | AI SDK function for structured output. Takes a Zod schema and returns a validated object. Powers the observer's extraction pass. See D30. | | **core tools** | 7 generic filesystem tools (read, write, edit, bash, grep, find, ls) in `src/server/tools/`. Factory: `createCoreTools(cwd)`. Follow pi-mono's pattern. See D32. | | **BrunchUIMessage** | `UIMessage` — the typed message contract spanning server validation, persistence, SSE streaming, and client hydration. Defined in `src/shared/chat.ts`. | @@ -510,16 +536,17 @@ This projection difference is a deliberate design choice, not an implementation | File | Tests | Protects | | ----------------------------- | ----- | ----------------------------------------------------- | -| db.test.ts | 35 | I5, I6, I9, I10, I11, I20, I48, I50, I52, I72, I74, I77, I78 | +| db.test.ts | 37 | I5, I6, I9, I10, I11, I20, I48, I50, I52, I72, I74, I77, I78, I79, I82, I85 | | knowledge.test.ts | 1 | I68, I75 | -| app.test.ts | 16 | I1, I2, I3, I7, I14, I21, I23, I44, I46, I47, I49, I51, I53, I55, I57, I59, I61, I63, I64, I69, I72, I74, I77 | -| core.test.ts | 6 | I12, I13, I18 | -| interview.test.ts | 6 | I16 | -| parts.test.ts | 12 | I17, I18, I44, I46, I47, I55, I57, I59, I61, I63 | +| app.test.ts | 24 | I1, I2, I3, I7, I14, I21, I23, I44, I46, I47, I49, I51, I53, I55, I57, I59, I61, I63, I64, I69, I72, I74, I77, I79, I80, I81, I82, I83, I84, I85, I86 | +| core.test.ts | 9 | I12, I13, I18, I80, I81, I82 | +| interview.test.ts | 8 | I16, I81 | +| parts.test.ts | 15 | I17, I18, I44, I46, I47, I55, I57, I59, I61, I63, I82, I83 | | context.test.ts | 13 | I19, I45, I47, I54, I56, I60, I62, I66, I68, I69 | | observer.test.ts | 9 | I20, I21, I54, I56, I58, I60, I62, I66, I74, I76, I77 | +| phase-close.test.ts | 13 | I83, I84, I86 | | turn-response.test.ts | 4 | I66, I69 | -| InterviewWorkspace.test.tsx | 17 | I24, I25, I23, I33, I34, I35, I36, I43, I44, I46, I47, I49, I51, I53, I55, I57, I59, I61, I63, I67, I68, I71, I73, I75 | +| InterviewWorkspace.test.tsx | 21 | I24, I25, I23, I33, I34, I35, I36, I43, I44, I46, I47, I49, I51, I53, I55, I57, I59, I61, I63, I67, I68, I71, I73, I75, I80, I82, I83, I84, I86 | | ProjectList.test.tsx | 2 | I36 | | workspace-data.test.ts | 7 | I33, I49, I51, I53, I65, I67, I70, I71, I73, I75 | | chat-hydration.test.ts | 3 | I35 | diff --git a/spike/filesystem-tools.ts b/spike/filesystem-tools.ts deleted file mode 100644 index d54d2f1c..00000000 --- a/spike/filesystem-tools.ts +++ /dev/null @@ -1,87 +0,0 @@ -/** - * Spike: Core filesystem tools with ToolLoopAgent - * - * Question: Can a ToolLoopAgent with generic filesystem tools reliably - * explore and characterize an existing project? - * - * Run: npx tsx --env-file=.env spike/filesystem-tools.ts [target-dir] - * - * Defaults to the brunch project root if no target dir is given. - */ -import { resolve } from 'node:path'; - -import { anthropic } from '@ai-sdk/anthropic'; -import { ToolLoopAgent, stepCountIs } from 'ai'; - -import { createCoreTools } from '../src/server/tools/index.js'; - -const targetDir = resolve(process.argv[2] ?? '.'); - -console.log(`\n═══ Filesystem Tools Spike ═══`); -console.log(`Target: ${targetDir}`); -console.log(`Model: ${process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-20250514'}\n`); - -const tools = createCoreTools(targetDir); - -const agent = new ToolLoopAgent({ - model: anthropic(process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-20250514'), - instructions: `You are a project analyst. You have access to filesystem tools to explore a codebase. - -Your job is to explore the project at the working directory and produce a structured characterization. - -Strategy: -1. Start with list_directory to see the top-level structure -2. Read key files (package.json, README, config files) to understand the tech stack -3. Use find_files and grep to understand the architecture -4. Read a few key source files to understand the domain - -Produce a summary covering: -- Project name and purpose -- Tech stack (languages, frameworks, key dependencies) -- Architecture (main modules, entry points, data flow) -- Key abstractions and domain concepts -- Current state (what's built, what's in progress)`, - tools, - providerOptions: { - anthropic: { - sendReasoning: true, - thinking: { - type: 'enabled', - budgetTokens: 8000, - }, - }, - }, - maxOutputTokens: 8000, - stopWhen: stepCountIs(30), -}); - -async function run() { - const startMs = Date.now(); - let stepCount = 0; - - const result = await agent.generate({ - prompt: `Explore and characterize the project in the current working directory. Use the available tools to understand its structure, purpose, and current state.`, - onStepFinish: (step) => { - stepCount++; - const toolCalls = step.toolCalls?.length ?? 0; - const toolNames = step.toolCalls?.map((tc) => tc.toolName).join(', ') ?? 'none'; - console.log(` Step ${stepCount}: ${toolCalls} tool call(s) [${toolNames}]`); - }, - }); - - const durationMs = Date.now() - startMs; - - console.log(`\n═══ Results ═══`); - console.log(`Steps: ${stepCount}`); - console.log(`Duration: ${(durationMs / 1000).toFixed(1)}s`); - const u = result.usage; - console.log(`Tokens: ${u.totalTokens} (${u.promptTokens} prompt, ${u.completionTokens} completion)`); - console.log(`Finish reason: ${result.finishReason}`); - console.log(`\n═══ Agent Summary ═══\n`); - console.log(result.text); -} - -run().catch((err) => { - console.error('Spike failed:', err); - process.exit(1); -}); diff --git a/spike/observer-fidelity.ts b/spike/observer-fidelity.ts deleted file mode 100644 index f8b0fd88..00000000 --- a/spike/observer-fidelity.ts +++ /dev/null @@ -1,287 +0,0 @@ -/** - * Spike: Observer extraction fidelity - * - * Question: Can the LLM reliably extract decisions, assumptions, and - * dependency edges from a single turn's Q&A? - * - * Approach: 5 realistic fixture turns → observer extraction via query() → - * compare against hand-labeled golden master → measure capture rate. - * - * THROWAWAY CODE — not for promotion to production. - */ -import { query } from '@anthropic-ai/claude-agent-sdk'; - -interface Entity { - type: 'decision' | 'assumption'; - content: string; -} - -interface Fixture { - name: string; - turn: { - question: string; - why: string; - impact: string; - answer: string; - options: string[]; - }; - existingEntities: Entity[]; - expected: { - decisions: string[]; - assumptions: string[]; - }; -} - -const FIXTURES: Fixture[] = [ - { - name: 'scope-target-audience', - turn: { - question: 'Who is the primary target audience for this product?', - why: 'Target audience shapes feature priorities, UX complexity, and go-to-market strategy.', - impact: 'high', - answer: 'Developer tools teams at mid-size companies (50-500 engineers). They need to standardize how specs are written across teams.', - options: ['Individual developers', 'Developer tools teams at mid-size companies', 'Enterprise architecture groups', 'Startup founders'], - }, - existingEntities: [], - expected: { - decisions: ['Target audience is developer tools teams at mid-size companies'], - assumptions: ['Mid-size companies need standardized spec processes', 'Teams of 50-500 engineers have enough complexity to benefit'], - }, - }, - { - name: 'scope-deployment-model', - turn: { - question: 'How should the tool be deployed and accessed?', - why: 'Deployment model affects architecture, security requirements, and adoption friction.', - impact: 'high', - answer: 'Local-first CLI tool that runs on the developer\'s machine. No cloud service, no account creation. Just npx and an API key.', - options: ['Cloud SaaS with team accounts', 'Local CLI tool (npx)', 'VS Code extension', 'Self-hosted server'], - }, - existingEntities: [ - { type: 'decision', content: 'Target audience is developer tools teams at mid-size companies' }, - ], - expected: { - decisions: ['Local-first CLI deployment via npx'], - assumptions: ['Users are comfortable with CLI tools', 'API key management is acceptable friction', 'No cloud service needed for single-user tool'], - }, - }, - { - name: 'design-data-persistence', - turn: { - question: 'How should interview data be persisted between sessions?', - why: 'Persistence strategy affects resume capability, data portability, and architecture complexity.', - impact: 'high', - answer: 'SQLite embedded database, stored locally. Simple, zero-config, and the data can be inspected with standard tools.', - options: ['SQLite local database', 'JSON files on disk', 'Cloud database with sync', 'In-memory only (no persistence)'], - }, - existingEntities: [ - { type: 'decision', content: 'Target audience is developer tools teams at mid-size companies' }, - { type: 'decision', content: 'Local-first CLI deployment via npx' }, - { type: 'assumption', content: 'Users are comfortable with CLI tools' }, - ], - expected: { - decisions: ['SQLite for local data persistence'], - assumptions: ['SQLite is sufficient for single-user workloads', 'Users want to inspect data with standard tools'], - }, - }, - { - name: 'design-conversation-model', - turn: { - question: 'Should the interview be a flat conversation or support branching when decisions are revisited?', - why: 'The conversation model determines how decision revisits work and whether spec evolution is traceable.', - impact: 'high', - answer: 'Tree-based conversation with branching. When a decision is revisited, the conversation forks. The active path determines the current spec state.', - options: ['Flat conversation log', 'Tree with branching (git-like)', 'Append-only with edit markers'], - }, - existingEntities: [ - { type: 'decision', content: 'SQLite for local data persistence' }, - { type: 'decision', content: 'Local-first CLI deployment via npx' }, - ], - expected: { - decisions: ['Tree-based conversation model with branching'], - assumptions: ['Users understand branching metaphor from git', 'Decision revisit is a core workflow'], - }, - }, - { - name: 'constraints-api-provider', - turn: { - question: 'Should the tool support multiple AI providers or focus on one?', - why: 'Multi-provider support adds abstraction cost and testing burden. Single-provider allows deeper integration.', - impact: 'medium', - answer: 'Anthropic only for now. We can use the Claude Agent SDK directly without an abstraction layer. Multi-provider is a future consideration if demand exists.', - options: ['Anthropic only (Claude Agent SDK)', 'Multi-provider via AI SDK', 'Pluggable provider interface'], - }, - existingEntities: [ - { type: 'decision', content: 'Tree-based conversation model with branching' }, - { type: 'decision', content: 'SQLite for local data persistence' }, - { type: 'assumption', content: 'Users understand branching metaphor from git' }, - ], - expected: { - decisions: ['Anthropic-only, using Claude Agent SDK directly'], - assumptions: ['Claude Agent SDK is sufficient without abstraction layer', 'Multi-provider demand is uncertain'], - }, - }, -]; - -const OBSERVER_SYSTEM_PROMPT = `You are an observer agent for a spec elicitation tool. Your job is to extract decisions and assumptions from a single interview turn. - -A DECISION is a resolved choice the user made — something they committed to. -An ASSUMPTION is a belief that underlies the decision — something that could be falsified. - -Rules: -- Extract ONLY what this specific turn added. Do not repeat entities from the existing graph. -- Each decision should be a concise statement of the choice made. -- Each assumption should be a falsifiable belief. -- Keep extractions tight — 1-3 decisions and 0-3 assumptions per turn is typical. - -You MUST respond with ONLY a raw JSON object. No markdown fences, no explanation, no preamble. - -Format: -{"decisions": ["decision 1"], "assumptions": ["assumption 1"]} - -Start your response with { and end with }. Nothing else.`; - -function buildPrompt(fixture: Fixture): string { - const sections: string[] = []; - - if (fixture.existingEntities.length > 0) { - const lines = ['Existing entities (do NOT re-extract these):']; - for (const e of fixture.existingEntities) { - lines.push(` ${e.type}: ${e.content}`); - } - sections.push(lines.join('\n')); - } - - sections.push(`Current turn: - Question: ${fixture.turn.question} - Why: ${fixture.turn.why} - Impact: ${fixture.turn.impact} - Options: ${fixture.turn.options.join(', ')} - User's answer: ${fixture.turn.answer}`); - - return sections.join('\n\n'); -} - -function fuzzyMatch(extracted: string, expected: string): boolean { - const normalize = (s: string) => s.toLowerCase().replace(/[^a-z0-9 ]/g, ' ').replace(/\s+/g, ' ').trim(); - const a = normalize(extracted); - const b = normalize(expected); - if (a.includes(b) || b.includes(a)) return true; - const aWords = new Set(a.split(' ')); - const bWords = new Set(b.split(' ')); - const significant = [...bWords].filter((w) => w.length > 2); - const overlap = significant.filter((w) => aWords.has(w)); - return overlap.length >= Math.ceil(significant.length * 0.4); -} - -function scoreExtraction( - extracted: { decisions: string[]; assumptions: string[] }, - expected: { decisions: string[]; assumptions: string[] }, -): { decisionCapture: number; assumptionCapture: number; total: number } { - let decisionHits = 0; - for (const exp of expected.decisions) { - if (extracted.decisions.some((d) => fuzzyMatch(d, exp))) decisionHits++; - } - - let assumptionHits = 0; - for (const exp of expected.assumptions) { - if (extracted.assumptions.some((a) => fuzzyMatch(a, exp))) assumptionHits++; - } - - const totalExpected = expected.decisions.length + expected.assumptions.length; - const totalHits = decisionHits + assumptionHits; - - return { - decisionCapture: expected.decisions.length > 0 ? decisionHits / expected.decisions.length : 1, - assumptionCapture: expected.assumptions.length > 0 ? assumptionHits / expected.assumptions.length : 1, - total: totalExpected > 0 ? totalHits / totalExpected : 1, - }; -} - -async function runFixture(fixture: Fixture): Promise<{ - name: string; - extracted: { decisions: string[]; assumptions: string[] }; - score: ReturnType; - latencyMs: number; - error?: string; -}> { - const prompt = buildPrompt(fixture); - const start = Date.now(); - - try { - let responseText = ''; - for await (const msg of query({ - prompt, - options: { - model: process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-20250514', - systemPrompt: OBSERVER_SYSTEM_PROMPT, - maxTurns: 1, - includePartialMessages: true, - }, - })) { - const m = msg as any; - if (m.type === 'stream_event' && m.event?.type === 'content_block_delta') { - if (m.event.delta?.type === 'text_delta' && m.event.delta.text) { - responseText += m.event.delta.text; - } - } else if (m.type === 'assistant') { - for (const block of m.message?.content ?? []) { - if (block.type === 'text') responseText = block.text; - } - } - } - - const latencyMs = Date.now() - start; - const jsonStr = responseText.replace(/```json\n?/g, '').replace(/```\n?/g, '').trim(); - const extracted = JSON.parse(jsonStr) as { decisions: string[]; assumptions: string[] }; - const score = scoreExtraction(extracted, fixture.expected); - - return { name: fixture.name, extracted, score, latencyMs }; - } catch (err) { - return { - name: fixture.name, - extracted: { decisions: [], assumptions: [] }, - score: { decisionCapture: 0, assumptionCapture: 0, total: 0 }, - latencyMs: Date.now() - start, - error: err instanceof Error ? err.message : String(err), - }; - } -} - -async function main() { - console.log('Observer Extraction Fidelity Spike'); - console.log('==================================\n'); - - const results = []; - for (const fixture of FIXTURES) { - console.log(`Running: ${fixture.name}...`); - const result = await runFixture(fixture); - results.push(result); - - if (result.error) { - console.log(` ERROR: ${result.error}`); - } else { - console.log(` Latency: ${result.latencyMs}ms`); - console.log(` Decisions: ${result.extracted.decisions.join('; ')}`); - console.log(` Assumptions: ${result.extracted.assumptions.join('; ')}`); - console.log(` Score: decisions=${(result.score.decisionCapture * 100).toFixed(0)}% assumptions=${(result.score.assumptionCapture * 100).toFixed(0)}% total=${(result.score.total * 100).toFixed(0)}%`); - } - console.log(); - } - - const avgTotal = results.reduce((sum, r) => sum + r.score.total, 0) / results.length; - const avgDecision = results.reduce((sum, r) => sum + r.score.decisionCapture, 0) / results.length; - const avgAssumption = results.reduce((sum, r) => sum + r.score.assumptionCapture, 0) / results.length; - const avgLatency = results.reduce((sum, r) => sum + r.latencyMs, 0) / results.length; - const errors = results.filter((r) => r.error).length; - - console.log('Summary'); - console.log('-------'); - console.log(`Fixtures: ${results.length}, Errors: ${errors}`); - console.log(`Avg capture: decisions=${(avgDecision * 100).toFixed(0)}% assumptions=${(avgAssumption * 100).toFixed(0)}% total=${(avgTotal * 100).toFixed(0)}%`); - console.log(`Avg latency: ${avgLatency.toFixed(0)}ms`); - console.log(`Threshold: ≥80% total capture`); - console.log(`Result: ${avgTotal >= 0.8 ? 'PASS ✓' : 'FAIL ✗'}`); -} - -main().catch(console.error); diff --git a/spike/raw-sdk-tool-use.ts b/spike/raw-sdk-tool-use.ts deleted file mode 100644 index 6fbd12d0..00000000 --- a/spike/raw-sdk-tool-use.ts +++ /dev/null @@ -1,263 +0,0 @@ -/** - * Spike: Raw Anthropic SDK tool execution - * - * Question: Can we replace @anthropic-ai/claude-agent-sdk query() with - * @anthropic-ai/sdk client.messages.stream() for reliable tool calls? - * - * Run: npx tsx --env-file=.env spike/raw-sdk-tool-use.ts - */ -import Anthropic from '@anthropic-ai/sdk'; - -const client = new Anthropic(); - -// Hand-written tool schema (oracle advice: avoid Zod-to-JSON-Schema edge cases for spike) -const ASK_QUESTION_TOOL: Anthropic.Messages.Tool = { - name: 'ask_question', - description: - 'Ask the user a structured interview question with options, strategic grounding, and impact signal.', - input_schema: { - type: 'object' as const, - properties: { - question: { type: 'string', description: 'The interview question' }, - why: { type: 'string', description: 'Why this question matters for the spec' }, - impact: { type: 'string', enum: ['high', 'medium', 'low'] }, - options: { - type: 'array', - items: { - type: 'object', - properties: { - content: { type: 'string' }, - is_recommended: { type: 'boolean' }, - }, - required: ['content', 'is_recommended'], - }, - minItems: 2, - }, - }, - required: ['question', 'why', 'impact', 'options'], - }, -}; - -// ── Test 1: Forced tool call with streaming ────────────────────────── - -async function testForcedToolCall() { - console.log('\n═══ Test 1: Forced tool_choice with streaming ═══\n'); - - const startMs = Date.now(); - - const stream = client.messages.stream({ - model: process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-20250514', - max_tokens: 1024, - system: `You are a spec elicitation interviewer conducting the SCOPE phase. -Your job is to understand the user's project goal through structured questions. -For every turn, you MUST use the ask_question tool. Never respond with plain text.`, - messages: [{ role: 'user', content: 'I want to build a local-first note-taking app' }], - tools: [ASK_QUESTION_TOOL], - tool_choice: { type: 'tool' as const, name: 'ask_question' }, - }); - - // Collect streaming events to verify format - const eventTypes: string[] = []; - let toolCallId = ''; - let toolName = ''; - let jsonChunks = ''; - - stream.on('message_start', () => eventTypes.push('message_start')); - stream.on('contentBlockStart', (block) => { - eventTypes.push(`content_block_start:${block.content_block.type}`); - if (block.content_block.type === 'tool_use') { - toolCallId = block.content_block.id; - toolName = block.content_block.name; - } - }); - stream.on('inputJson', (_delta, snapshot) => { - jsonChunks = snapshot; - }); - stream.on('contentBlockStop', () => eventTypes.push('content_block_stop')); - stream.on('message_stop', () => eventTypes.push('message_stop')); - - const finalMessage = await stream.finalMessage(); - const durationMs = Date.now() - startMs; - - // Extract results - const toolUse = finalMessage.content.find( - (block): block is Anthropic.Messages.ToolUseBlock => - block.type === 'tool_use' && block.name === 'ask_question', - ); - - console.log('Event types observed:', eventTypes); - console.log('Stop reason:', finalMessage.stop_reason); - console.log('Tool call ID:', toolCallId); - console.log('Tool name:', toolName); - console.log('Tool use block found:', !!toolUse); - console.log('Duration:', durationMs, 'ms'); - console.log( - 'Usage:', - `in=${finalMessage.usage.input_tokens} out=${finalMessage.usage.output_tokens}`, - ); - - if (toolUse) { - const args = toolUse.input as Record; - console.log('\n── Tool call args ──'); - console.log(' question:', args.question); - console.log(' why:', args.why); - console.log(' impact:', args.impact); - console.log(' options:', JSON.stringify(args.options, null, 2)); - console.log('\n✅ PASS: Model called ask_question with structured args'); - } else { - console.log('\n❌ FAIL: Model did not call ask_question'); - } - - return !!toolUse; -} - -// ── Test 2: Raw stream events match translator expectations ────────── - -async function testRawStreamEvents() { - console.log('\n═══ Test 2: Raw stream event format (for translator compat) ═══\n'); - - // Use the low-level streaming API to get raw SSE events - const rawStream = await client.messages.create({ - model: process.env.ANTHROPIC_MODEL || 'claude-sonnet-4-20250514', - max_tokens: 1024, - stream: true, - system: 'Ask one structured question using the ask_question tool.', - messages: [{ role: 'user', content: 'I want to build a CLI password manager' }], - tools: [ASK_QUESTION_TOOL], - tool_choice: { type: 'tool' as const, name: 'ask_question' }, - }); - - // Collect raw events to check shape - const rawEvents: Array<{ type: string; hasIndex: boolean; hasContentBlock: boolean; hasDelta: boolean }> = []; - - for await (const event of rawStream) { - rawEvents.push({ - type: event.type, - hasIndex: 'index' in event, - hasContentBlock: 'content_block' in event, - hasDelta: 'delta' in event, - }); - } - - console.log('Raw event shapes:'); - for (const e of rawEvents) { - console.log(` ${e.type} | index: ${e.hasIndex} | content_block: ${e.hasContentBlock} | delta: ${e.hasDelta}`); - } - - // Check: events are directly typed (NOT wrapped in { type: 'stream_event', event: {...} }) - const hasMessageStart = rawEvents.some((e) => e.type === 'message_start'); - const hasContentBlockStart = rawEvents.some((e) => e.type === 'content_block_start'); - const hasContentBlockDelta = rawEvents.some((e) => e.type === 'content_block_delta'); - const hasContentBlockStop = rawEvents.some((e) => e.type === 'content_block_stop'); - const hasMessageDelta = rawEvents.some((e) => e.type === 'message_delta'); - const hasMessageStop = rawEvents.some((e) => e.type === 'message_stop'); - - console.log('\nEvent coverage:'); - console.log(' message_start:', hasMessageStart); - console.log(' content_block_start:', hasContentBlockStart); - console.log(' content_block_delta:', hasContentBlockDelta); - console.log(' content_block_stop:', hasContentBlockStop); - console.log(' message_delta:', hasMessageDelta); - console.log(' message_stop:', hasMessageStop); - - const allPresent = - hasMessageStart && - hasContentBlockStart && - hasContentBlockDelta && - hasContentBlockStop && - hasMessageStop; - - if (allPresent) { - console.log( - '\n✅ PASS: All expected event types present. Translator needs: remove stream_event envelope, consume events directly', - ); - } else { - console.log('\n❌ FAIL: Missing expected event types'); - } - - return allPresent; -} - -// ── Test 3: Observer structured output ─────────────────────────────── - -async function testObserverStructuredOutput() { - console.log('\n═══ Test 3: Observer structured output via raw API ═══\n'); - - // Test if we can get structured JSON output without the Agent SDK - const response = await client.messages.create({ - model: process.env.OBSERVER_MODEL || 'claude-haiku-4-5-20251001', - max_tokens: 2048, - system: `Extract decisions and assumptions from this interview exchange. -Return JSON matching the schema exactly.`, - messages: [ - { - role: 'user', - content: `Q: What platform are you targeting? -A: Desktop only, macOS and Linux. No mobile. - -Q: What database should we use? -A: SQLite — I want it to be local-first with no server. - -Existing entities: (none) - -Extract any NEW decisions and assumptions from these exchanges.`, - }, - ], - }); - - const textBlock = response.content.find( - (b): b is Anthropic.Messages.TextBlock => b.type === 'text', - ); - - console.log('Stop reason:', response.stop_reason); - console.log('Has text block:', !!textBlock); - console.log( - 'Usage:', - `in=${response.usage.input_tokens} out=${response.usage.output_tokens}`, - ); - - if (textBlock) { - console.log('\nRaw response (first 500 chars):', textBlock.text.slice(0, 500)); - - // Try to parse as JSON - try { - // Strip markdown code fences if present - const jsonStr = textBlock.text.replace(/^```json\n?/, '').replace(/\n?```$/, ''); - const parsed = JSON.parse(jsonStr); - console.log('\n✅ PASS: Response parses as valid JSON'); - console.log(' decisions count:', parsed.decisions?.length ?? 'missing'); - console.log(' assumptions count:', parsed.assumptions?.length ?? 'missing'); - } catch { - console.log('\n⚠️ WARNING: Response is not valid JSON — may need output_config.format'); - } - } - - return !!textBlock; -} - -// ── Run all tests ──────────────────────────────────────────────────── - -async function main() { - console.log('Spike: Raw Anthropic SDK Tool Use'); - console.log('=================================\n'); - - const results = { - forcedToolCall: await testForcedToolCall(), - rawStreamEvents: await testRawStreamEvents(), - observerOutput: await testObserverStructuredOutput(), - }; - - console.log('\n═══ Summary ═══'); - console.log('Forced tool call:', results.forcedToolCall ? '✅' : '❌'); - console.log('Stream events:', results.rawStreamEvents ? '✅' : '❌'); - console.log('Observer output:', results.observerOutput ? '✅' : '❌'); - - const allPass = Object.values(results).every(Boolean); - console.log('\nOverall:', allPass ? '✅ ALL PASS' : '❌ SOME FAILED'); - process.exit(allPass ? 0 : 1); -} - -main().catch((err) => { - console.error('Spike failed:', err); - process.exit(1); -}); diff --git a/src/client/routes/InterviewWorkspace.test.tsx b/src/client/routes/InterviewWorkspace.test.tsx index 38d00710..24bfcbcd 100644 --- a/src/client/routes/InterviewWorkspace.test.tsx +++ b/src/client/routes/InterviewWorkspace.test.tsx @@ -136,18 +136,7 @@ function createProjectState({ is_recommended: boolean; is_selected: boolean; }>; - workflow?: { - phases: { - scope: { status: 'open' | 'proposed' | 'confirmed'; turnId: number | null; summary: string | null }; - design: { status: 'open' | 'proposed' | 'confirmed'; turnId: number | null; summary: string | null }; - requirements: { - status: 'open' | 'proposed' | 'confirmed'; - turnId: number | null; - summary: string | null; - }; - criteria: { status: 'open' | 'proposed' | 'confirmed'; turnId: number | null; summary: string | null }; - }; - }; + workflow?: ProjectState['workflow']; assistantParts?: Array>; } = {}): ProjectState { return { @@ -160,10 +149,42 @@ function createProjectState({ }, workflow: workflow ?? { phases: { - scope: { status: 'open', turnId: null, summary: null }, - design: { status: 'open', turnId: null, summary: null }, - requirements: { status: 'open', turnId: null, summary: null }, - criteria: { status: 'open', turnId: null, summary: null }, + scope: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + design: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + requirements: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + criteria: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, }, }, turns: [ @@ -219,18 +240,7 @@ function createWorkspaceLoaderData({ is_recommended: boolean; is_selected: boolean; }>; - workflow?: { - phases: { - scope: { status: 'open' | 'proposed' | 'confirmed'; turnId: number | null; summary: string | null }; - design: { status: 'open' | 'proposed' | 'confirmed'; turnId: number | null; summary: string | null }; - requirements: { - status: 'open' | 'proposed' | 'confirmed'; - turnId: number | null; - summary: string | null; - }; - criteria: { status: 'open' | 'proposed' | 'confirmed'; turnId: number | null; summary: string | null }; - }; - }; + workflow?: ProjectState['workflow']; assistantParts?: Array>; entitySnapshot?: EntitiesData; } = {}): WorkspaceLoaderData { @@ -1018,15 +1028,43 @@ describe('InterviewWorkspace', () => { workflow: { phases: { scope: { - status: 'proposed', + status: 'in_progress', + closeability: true, + readiness: 'medium', + closureBasis: null, + proposalPending: true, turnId: 1, summary: 'Goals, terms, context, and constraints are sufficiently captured.', }, - design: { status: 'open', turnId: null, summary: null }, - requirements: { status: 'open', turnId: null, summary: null }, - criteria: { status: 'open', turnId: null, summary: null }, + design: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + requirements: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + criteria: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, }, - }, + } as any, assistantParts: [ { type: 'data-phase-summary', @@ -1047,12 +1085,228 @@ describe('InterviewWorkspace', () => { expect(useChatHarness.sendMessage).toHaveBeenCalledWith({ parts: [ { type: 'text', text: 'Confirm scope closure' }, - { type: 'data-confirmation', data: { turnId: 1, confirmed: true } }, + { + type: 'data-confirmation', + data: { kind: 'confirm-proposed-phase-closure', proposalTurnId: 1, phase: 'scope' }, + }, + ], + }); + }); + }); + + it('submits a force-close action for design through chat with typed confirmation parts', async () => { + currentLoaderData = createWorkspaceLoaderData({ + workflow: { + phases: { + scope: { + status: 'closed', + closeability: false, + readiness: 'high', + closureBasis: 'interviewer_recommended', + proposalPending: false, + turnId: 1, + summary: 'Goals, terms, context, and constraints are sufficiently captured.', + }, + design: { + status: 'in_progress', + closeability: true, + readiness: 'medium', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + requirements: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + criteria: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + }, + } as any, + }); + + renderWorkspace(); + + fireEvent.click(await screen.findByRole('button', { name: /force design closure/i })); + + await waitFor(() => { + expect(useChatHarness.sendMessage).toHaveBeenCalledWith({ + parts: [ + { type: 'text', text: 'Force design closure' }, + { + type: 'data-confirmation', + data: { kind: 'force-close-active-phase', phase: 'design' }, + }, ], }); }); }); + it('hides the force-close action when design already has a pending closure proposal', async () => { + currentLoaderData = createWorkspaceLoaderData({ + workflow: { + phases: { + scope: { + status: 'closed', + closeability: false, + readiness: 'high', + closureBasis: 'interviewer_recommended', + proposalPending: false, + turnId: 1, + summary: 'Goals, terms, context, and constraints are sufficiently captured.', + }, + design: { + status: 'in_progress', + closeability: true, + readiness: 'medium', + closureBasis: null, + proposalPending: true, + turnId: 3, + summary: 'The main architectural commitments are captured well enough to review requirements.', + }, + requirements: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + criteria: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + }, + } as any, + }); + + renderWorkspace(); + + expect(screen.queryByRole('button', { name: /force design closure/i })).toBeNull(); + }); + + it('renders shared workflow state for closed scope and active design mode', async () => { + currentLoaderData = createWorkspaceLoaderData({ + workflow: { + phases: { + scope: { + status: 'closed', + closeability: false, + readiness: 'high', + closureBasis: 'interviewer_recommended', + proposalPending: false, + turnId: 1, + summary: 'Goals, terms, context, and constraints are sufficiently captured.', + }, + design: { + status: 'in_progress', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + requirements: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + criteria: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + }, + } as any, + }); + + renderWorkspace(); + + expect(await screen.findByText(/scope closed/i)).toBeTruthy(); + expect(screen.getByText(/recommended close/i)).toBeTruthy(); + expect(screen.getByText(/design in progress/i)).toBeTruthy(); + expect(screen.getAllByText(/low readiness/i).length).toBeGreaterThan(0); + }); + + it('renders forced-close workflow state for closed design and active requirements mode', async () => { + currentLoaderData = createWorkspaceLoaderData({ + workflow: { + phases: { + scope: { + status: 'closed', + closeability: false, + readiness: 'high', + closureBasis: 'interviewer_recommended', + proposalPending: false, + turnId: 1, + summary: 'Goals, terms, context, and constraints are sufficiently captured.', + }, + design: { + status: 'closed', + closeability: false, + readiness: 'high', + closureBasis: 'user_forced', + proposalPending: false, + turnId: 4, + summary: 'Design closed by user without an interviewer recommendation.', + }, + requirements: { + status: 'in_progress', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + criteria: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + }, + } as any, + }); + + renderWorkspace(); + + expect(await screen.findByText(/design closed/i)).toBeTruthy(); + expect(screen.getByText(/forced close/i)).toBeTruthy(); + expect(screen.getByText(/requirements in progress/i)).toBeTruthy(); + }); + it('posts free-text-only turn responses and forwards the text into chat', async () => { currentLoaderData = createWorkspaceLoaderData({ options: [ diff --git a/src/client/routes/InterviewWorkspace.tsx b/src/client/routes/InterviewWorkspace.tsx index 0e3b0619..635ec2ed 100644 --- a/src/client/routes/InterviewWorkspace.tsx +++ b/src/client/routes/InterviewWorkspace.tsx @@ -20,8 +20,9 @@ import { Tool, ToolHeader, ToolContent, ToolInput, ToolOutput } from '@/componen import { EntitySidebar } from '@/components/EntitySidebar'; import { cn } from '@/lib/utils'; -import type { ProjectStateTurn } from '../../shared/api-types.js'; +import type { ProjectState, ProjectStateTurn } from '../../shared/api-types.js'; import { isAskQuestionUIPart, type BrunchUIMessage } from '../../shared/chat.js'; +import { getForceClosePhaseAction, getPhaseClosureCommandText } from '../../shared/phase-close.js'; import { useWorkspaceController } from '../workspace/workspace-controller'; import { getPersistedSelectedPositions, @@ -39,29 +40,54 @@ type TurnCardOption = Pick< 'position' | 'content' | 'is_recommended' >; -function getScopeStatusLabel(status: 'open' | 'proposed' | 'confirmed') { - switch (status) { - case 'proposed': - return 'Scope ready to confirm'; - case 'confirmed': - return 'Scope closed'; - default: - return 'Scope in progress'; +type WorkflowPhaseState = ProjectState['workflow']['phases'][ProjectStateTurn['phase']]; + +function getWorkflowStatusLabel(phase: ProjectStateTurn['phase'], state: WorkflowPhaseState) { + const phaseLabel = phase[0].toUpperCase() + phase.slice(1); + if (state.status === 'closed') { + return `${phaseLabel} closed`; + } + if (state.proposalPending) { + return `${phaseLabel} ready to confirm`; + } + if (state.status === 'unstarted') { + return `${phaseLabel} not started`; + } + return `${phaseLabel} in progress`; +} + +function getWorkflowMetaLabel(state: WorkflowPhaseState) { + const parts = [`${state.readiness[0].toUpperCase() + state.readiness.slice(1)} readiness`]; + parts.push(state.closeability ? 'Closeable now' : 'Not yet closeable'); + if (state.closureBasis === 'interviewer_recommended') { + parts.push('Recommended close'); } + if (state.closureBasis === 'user_forced') { + parts.push('Forced close'); + } + return parts.join(' · '); +} + +function canForceClosePhase(workflow: ProjectState['workflow'], phase: ProjectStateTurn['phase']) { + return getForceClosePhaseAction(workflow, phase).available; } function PhaseSummaryCard({ + phase, summary, onConfirm, disabled, }: { + phase: ProjectStateTurn['phase']; summary: string; onConfirm: () => void; disabled: boolean; }) { return (
-
Scope closure proposal
+
+ {phase[0].toUpperCase() + phase.slice(1)} closure proposal +

{summary}

@@ -270,9 +296,31 @@ export function InterviewWorkspace() { ← Projects

{project.name}

- - {getScopeStatusLabel(workflow.phases.scope.status)} - +
+ {(Object.entries(workflow.phases) as Array<[ProjectStateTurn['phase'], WorkflowPhaseState]>).map( + ([phase, state]) => ( +
+
{getWorkflowStatusLabel(phase, state)}
+
{getWorkflowMetaLabel(state)}
+ {canForceClosePhase(workflow, phase) && ( + + )} +
+ ), + )} +
@@ -331,9 +379,10 @@ export function InterviewWorkspace() { {phaseSummary && ( chat.confirmPhaseClosure(phaseSummary.turnId)} + onConfirm={() => chat.confirmPhaseClosure(phaseSummary.phase, phaseSummary.turnId)} /> )} diff --git a/src/client/workspace/workspace-controller-core.ts b/src/client/workspace/workspace-controller-core.ts index 725275fd..88ca34ea 100644 --- a/src/client/workspace/workspace-controller-core.ts +++ b/src/client/workspace/workspace-controller-core.ts @@ -285,7 +285,7 @@ export function createWorkspaceControllerViewState( const pendingQuestion = isLoading ? findPendingQuestion(messages) : null; const latestPhaseSummary = findPhaseSummary(messages); const phaseSummary = - latestPhaseSummary && (isLoading || workflow.phases[latestPhaseSummary.phase].status === 'proposed') + latestPhaseSummary && (isLoading || workflow.phases[latestPhaseSummary.phase].proposalPending) ? latestPhaseSummary : null; const turnCard: WorkspaceTurnCardViewModel | null = phaseSummary diff --git a/src/client/workspace/workspace-controller.test.tsx b/src/client/workspace/workspace-controller.test.tsx index 3256c0ae..6adc6f80 100644 --- a/src/client/workspace/workspace-controller.test.tsx +++ b/src/client/workspace/workspace-controller.test.tsx @@ -106,10 +106,42 @@ function createProjectState({ }, workflow: { phases: { - scope: { status: 'open', turnId: null, summary: null }, - design: { status: 'open', turnId: null, summary: null }, - requirements: { status: 'open', turnId: null, summary: null }, - criteria: { status: 'open', turnId: null, summary: null }, + scope: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + design: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + requirements: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + criteria: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, }, }, turns: [ diff --git a/src/client/workspace/workspace-controller.ts b/src/client/workspace/workspace-controller.ts index 70342e5e..e4da7f9b 100644 --- a/src/client/workspace/workspace-controller.ts +++ b/src/client/workspace/workspace-controller.ts @@ -7,6 +7,12 @@ import { useSubmitTurnResponseMutation } from '@/mutations/workspace-mutations'; import type { ProjectStateTurn } from '../../shared/api-types.js'; import { brunchDataPartSchemas, type BrunchUIMessage } from '../../shared/chat.js'; +import { + createConfirmProposedPhaseClosureCommand, + createForceCloseActivePhaseCommand, + getPhaseClosureCommandText, + type DataConfirmation, +} from '../../shared/phase-close.js'; import { useChatHydrationBoundary } from './chat-hydration.js'; import { createWorkspaceControllerViewState, @@ -23,7 +29,8 @@ export interface WorkspaceControllerChatState { isLoading: boolean; isStreaming: boolean; submitText: (text: string) => void; - confirmPhaseClosure: (turnId: number) => void; + confirmPhaseClosure: (phase: ProjectStateTurn['phase'], turnId: number) => void; + forcePhaseClosure: (phase: ProjectStateTurn['phase']) => void; } export type WorkspaceControllerTurnCardState = @@ -102,22 +109,39 @@ export function useWorkspaceController(): WorkspaceController { [isLoading, sendMessage], ); - const confirmPhaseClosure = useCallback( - (turnId: number) => { + const submitPhaseClosureCommand = useCallback( + (command: DataConfirmation) => { if (isLoading) { return; } void sendMessage({ parts: [ - { type: 'text', text: 'Confirm scope closure' }, - { type: 'data-confirmation', data: { turnId, confirmed: true } }, + { type: 'text', text: getPhaseClosureCommandText(command) }, + { + type: 'data-confirmation', + data: command, + }, ], }); }, [isLoading, sendMessage], ); + const confirmPhaseClosure = useCallback( + (phase: ProjectStateTurn['phase'], turnId: number) => { + submitPhaseClosureCommand(createConfirmProposedPhaseClosureCommand(phase, turnId)); + }, + [submitPhaseClosureCommand], + ); + + const forcePhaseClosure = useCallback( + (phase: ProjectStateTurn['phase']) => { + submitPhaseClosureCommand(createForceCloseActivePhaseCommand(phase)); + }, + [submitPhaseClosureCommand], + ); + return { project: viewState.project, workflow: viewState.workflow, @@ -129,6 +153,7 @@ export function useWorkspaceController(): WorkspaceController { isStreaming: status === 'streaming', submitText, confirmPhaseClosure, + forcePhaseClosure, }, turnCard: viewState.turnCard ? viewState.turnCard.kind === 'persisted-turn' diff --git a/src/client/workspace/workspace-data.test.ts b/src/client/workspace/workspace-data.test.ts index d544e1de..3572ced1 100644 --- a/src/client/workspace/workspace-data.test.ts +++ b/src/client/workspace/workspace-data.test.ts @@ -29,18 +29,7 @@ function createProjectState({ is_recommended: boolean; is_selected: boolean; }>; - workflow?: { - phases: { - scope: { status: 'open' | 'proposed' | 'confirmed'; turnId: number | null; summary: string | null }; - design: { status: 'open' | 'proposed' | 'confirmed'; turnId: number | null; summary: string | null }; - requirements: { - status: 'open' | 'proposed' | 'confirmed'; - turnId: number | null; - summary: string | null; - }; - criteria: { status: 'open' | 'proposed' | 'confirmed'; turnId: number | null; summary: string | null }; - }; - }; + workflow?: ProjectState['workflow']; } = {}): ProjectState { return { project: { @@ -52,10 +41,42 @@ function createProjectState({ }, workflow: workflow ?? { phases: { - scope: { status: 'open', turnId: null, summary: null }, - design: { status: 'open', turnId: null, summary: null }, - requirements: { status: 'open', turnId: null, summary: null }, - criteria: { status: 'open', turnId: null, summary: null }, + scope: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + design: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + requirements: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + criteria: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, }, }, turns: [ @@ -251,13 +272,41 @@ describe('workspace controller core', () => { workflow: { phases: { scope: { - status: 'proposed', + status: 'in_progress', + closeability: true, + readiness: 'high', + closureBasis: null, + proposalPending: true, turnId: 1, summary: 'Goals, terms, context, and constraints are sufficiently captured.', }, - design: { status: 'open', turnId: null, summary: null }, - requirements: { status: 'open', turnId: null, summary: null }, - criteria: { status: 'open', turnId: null, summary: null }, + design: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + requirements: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + criteria: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, }, }, }), @@ -366,10 +415,42 @@ describe('workspace controller core', () => { }, workflow: { phases: { - scope: { status: 'open', turnId: null, summary: null }, - design: { status: 'open', turnId: null, summary: null }, - requirements: { status: 'open', turnId: null, summary: null }, - criteria: { status: 'open', turnId: null, summary: null }, + scope: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + design: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + requirements: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, + criteria: { + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + turnId: null, + summary: null, + }, }, }, turns: [], diff --git a/src/server/app.test.ts b/src/server/app.test.ts index c16bf9f7..fca2a26a 100644 --- a/src/server/app.test.ts +++ b/src/server/app.test.ts @@ -99,14 +99,22 @@ async function makeStructuredQuestionInterviewer(dbArg: DB, turnId: number) { }; } -async function makePhaseClosureInterviewer(dbArg: DB, projectId: number, turnId: number) { +async function makePhaseClosureInterviewer( + dbArg: DB, + projectId: number, + turnId: number, + phase: 'scope' | 'design' = 'scope', + summary: + | 'Goals, terms, context, and constraints are sufficiently captured.' + | 'The main architectural commitments are captured well enough to review requirements.' = 'Goals, terms, context, and constraints are sufficiently captured.', +) { const { createPhaseOutcome } = await import('./db.js'); createPhaseOutcome(dbArg, { projectId, - phase: 'scope', + phase, proposal_turn_id: turnId, - summary: 'Goals, terms, context, and constraints are sufficiently captured.', + summary, }); return { @@ -119,15 +127,15 @@ async function makePhaseClosureInterviewer(dbArg: DB, projectId: number, turnId: toolCallId: 'tool-phase-1', toolName: 'propose_phase_closure', input: { - phase: 'scope', - summary: 'Goals, terms, context, and constraints are sufficiently captured.', + phase, + summary, }, }, { type: 'tool-output-available', toolCallId: 'tool-phase-1', toolName: 'propose_phase_closure', - output: { ok: true, turnId, phase: 'scope' }, + output: { ok: true, turnId, phase }, }, ]), finishReason: Promise.resolve('tool-calls'), @@ -154,6 +162,103 @@ async function createTestProject(name = 'Test Project'): Promise { return res.body.id; } +async function seedClosedScope(projectId: number) { + const { advanceHead, confirmPhaseOutcome, createPhaseOutcome, createTurn } = await import('./db.js'); + + const scopeTurn = createTurn(db, projectId, { + phase: 'scope', + question: 'What platform?', + answer: 'Web', + }); + advanceHead(db, projectId, scopeTurn.id); + + const scopeProposalTurn = createTurn(db, projectId, { + phase: 'scope', + parent_turn_id: scopeTurn.id, + question: '', + answer: 'We have enough scope context', + }); + advanceHead(db, projectId, scopeProposalTurn.id); + + const scopeOutcome = createPhaseOutcome(db, { + projectId, + phase: 'scope', + proposal_turn_id: scopeProposalTurn.id, + summary: 'Goals, terms, context, and constraints are sufficiently captured.', + }); + + const scopeConfirmationTurn = createTurn(db, projectId, { + phase: 'scope', + parent_turn_id: scopeProposalTurn.id, + question: '', + answer: 'Confirm scope closure', + user_parts: JSON.stringify([ + { type: 'text', text: 'Confirm scope closure' }, + { + type: 'data-confirmation', + data: { + kind: 'confirm-proposed-phase-closure', + proposalTurnId: scopeProposalTurn.id, + phase: 'scope', + }, + }, + ]), + }); + confirmPhaseOutcome(db, scopeOutcome.id, scopeConfirmationTurn.id); + advanceHead(db, projectId, scopeConfirmationTurn.id); + + return { scopeTurn, scopeProposalTurn, scopeConfirmationTurn }; +} + +async function seedActiveDesign(projectId: number) { + const { advanceHead, createTurn } = await import('./db.js'); + const seededScope = await seedClosedScope(projectId); + + const designTurn = createTurn(db, projectId, { + phase: 'design', + parent_turn_id: seededScope.scopeConfirmationTurn.id, + question: 'Which tradeoff matters most?', + answer: 'Keep the repository seam small', + }); + advanceHead(db, projectId, designTurn.id); + + return { ...seededScope, designTurn }; +} + +async function seedRequirementsReady(projectId: number) { + const { advanceHead, confirmPhaseOutcome, createPhaseOutcome, createTurn } = await import('./db.js'); + const seededDesign = await seedActiveDesign(projectId); + + const designOutcome = createPhaseOutcome(db, { + projectId, + phase: 'design', + proposal_turn_id: seededDesign.designTurn.id, + summary: 'The main architectural commitments are captured well enough to review requirements.', + }); + + const designConfirmationTurn = createTurn(db, projectId, { + phase: 'design', + parent_turn_id: seededDesign.designTurn.id, + question: '', + answer: 'Confirm design closure', + user_parts: JSON.stringify([ + { type: 'text', text: 'Confirm design closure' }, + { + type: 'data-confirmation', + data: { + kind: 'confirm-proposed-phase-closure', + proposalTurnId: seededDesign.designTurn.id, + phase: 'design', + }, + }, + ]), + }); + confirmPhaseOutcome(db, designOutcome.id, designConfirmationTurn.id); + advanceHead(db, projectId, designConfirmationTurn.id); + + return { ...seededDesign, designConfirmationTurn }; +} + beforeEach(() => { mockStreamInterviewer.mockReset(); mockRunObserver.mockReset(); @@ -705,7 +810,11 @@ describe('phase outcomes + scope closure', () => { const projectRes = await request(app).get(`/api/projects/${projectId}`).expect(200); expect(projectRes.body.workflow.phases.scope).toEqual({ - status: 'proposed', + status: 'in_progress', + closeability: true, + readiness: 'medium', + closureBasis: null, + proposalPending: true, turnId: 1, summary: 'Goals, terms, context, and constraints are sufficiently captured.', }); @@ -747,7 +856,14 @@ describe('phase outcomes + scope closure', () => { role: 'user', parts: [ { type: 'text', text: 'Confirm scope closure' }, - { type: 'data-confirmation', data: { turnId: 1, confirmed: true } }, + { + type: 'data-confirmation', + data: { + kind: 'confirm-proposed-phase-closure', + proposalTurnId: 1, + phase: 'scope', + }, + }, ], }, ], @@ -755,20 +871,564 @@ describe('phase outcomes + scope closure', () => { .expect(200); const projectRes = await request(app).get(`/api/projects/${projectId}`).expect(200); - expect(projectRes.body.workflow.phases.scope).toEqual({ - status: 'confirmed', - turnId: 1, - summary: 'Goals, terms, context, and constraints are sufficiently captured.', - }); + expect(projectRes.body.workflow.phases.scope).toEqual( + expect.objectContaining({ + status: 'closed', + turnId: 1, + summary: 'Goals, terms, context, and constraints are sufficiently captured.', + closeability: false, + readiness: 'high', + closureBasis: 'interviewer_recommended', + proposalPending: false, + }), + ); + const phaseOutcomes = db.$client + .prepare('SELECT closure_basis FROM phase_outcome WHERE project_id = ? ORDER BY id DESC') + .all(projectId) as Array<{ closure_basis: string | null }>; + expect(phaseOutcomes[0]).toEqual({ closure_basis: 'interviewer_recommended' }); + expect(projectRes.body.workflow.phases.design).toEqual( + expect.objectContaining({ + status: 'in_progress', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + }), + ); expect(projectRes.body.project.active_turn_id).toBe(2); expect(projectRes.body.turns.at(-1)).toMatchObject({ answer: 'Confirm scope closure', }); expect(JSON.parse(projectRes.body.turns.at(-1).user_parts ?? '[]')).toEqual([ { type: 'text', text: 'Confirm scope closure' }, - { type: 'data-confirmation', data: { turnId: 1, confirmed: true } }, + { + type: 'data-confirmation', + data: { + kind: 'confirm-proposed-phase-closure', + proposalTurnId: 1, + phase: 'scope', + }, + }, ]); }); + + it('enters design mode on the next chat turn after scope closure and runs the observer in design phase', async () => { + const projectId = await createTestProject(); + mockStreamInterviewer.mockImplementation(async (dbArg, turn) => + makePhaseClosureInterviewer(dbArg as DB, projectId, (turn as { id: number }).id), + ); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { id: 'u1', role: 'user', parts: [{ type: 'text', text: 'We have enough scope context' }] }, + ], + }) + .expect(200); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { + id: 'u2', + role: 'user', + parts: [ + { type: 'text', text: 'Confirm scope closure' }, + { + type: 'data-confirmation', + data: { + kind: 'confirm-proposed-phase-closure', + proposalTurnId: 1, + phase: 'scope', + }, + }, + ], + }, + ], + }) + .expect(200); + + mockStreamInterviewer.mockImplementation(async () => + makeTextInterviewer('Which database tradeoff matters more?'), + ); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { + id: 'u3', + role: 'user', + parts: [{ type: 'text', text: 'Let us compare SQLite and Postgres' }], + }, + ], + }) + .expect(200); + + expect(mockStreamInterviewer).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ phase: 'design' }), + expect.any(Array), + 'Let us compare SQLite and Postgres', + 'design', + ); + expect(mockRunObserver).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ phase: 'design' }), + projectId, + ); + }); + + it('streams a design phase summary proposal and projects workflow state through the shared phase seam', async () => { + const projectId = await createTestProject(); + mockStreamInterviewer.mockImplementation(async (dbArg, turn) => + makePhaseClosureInterviewer(dbArg as DB, projectId, (turn as { id: number }).id), + ); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { id: 'u1', role: 'user', parts: [{ type: 'text', text: 'We have enough scope context' }] }, + ], + }) + .expect(200); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { + id: 'u2', + role: 'user', + parts: [ + { type: 'text', text: 'Confirm scope closure' }, + { + type: 'data-confirmation', + data: { + kind: 'confirm-proposed-phase-closure', + proposalTurnId: 1, + phase: 'scope', + }, + }, + ], + }, + ], + }) + .expect(200); + + mockStreamInterviewer.mockImplementation(async (dbArg, turn) => + makePhaseClosureInterviewer( + dbArg as DB, + projectId, + (turn as { id: number }).id, + 'design', + 'The main architectural commitments are captured well enough to review requirements.', + ), + ); + + const chatRes = await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { + id: 'u3', + role: 'user', + parts: [{ type: 'text', text: 'We have enough design direction now' }], + }, + ], + }) + .expect(200); + + const events = parseSSELines(collectSSE(chatRes)).filter((event) => event !== '[DONE]'); + expect(events).toContainEqual({ + type: 'data-phase-summary', + data: { + turnId: 3, + phase: 'design', + summary: 'The main architectural commitments are captured well enough to review requirements.', + }, + }); + + const projectRes = await request(app).get(`/api/projects/${projectId}`).expect(200); + expect(projectRes.body.workflow.phases.design).toEqual({ + status: 'in_progress', + closeability: true, + readiness: 'medium', + closureBasis: null, + proposalPending: true, + turnId: 3, + summary: 'The main architectural commitments are captured well enough to review requirements.', + }); + expect(projectRes.body.workflow.phases.requirements).toEqual( + expect.objectContaining({ + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + }), + ); + expect(JSON.parse(projectRes.body.turns.at(-1).assistant_parts ?? '[]')).toEqual( + expect.arrayContaining([ + { + type: 'data-phase-summary', + data: { + turnId: 3, + phase: 'design', + summary: 'The main architectural commitments are captured well enough to review requirements.', + }, + }, + ]), + ); + }); + + it('confirms a proposed design phase outcome and enters requirements mode on the next turn', async () => { + const projectId = await createTestProject(); + mockStreamInterviewer.mockImplementation(async (dbArg, turn) => + makePhaseClosureInterviewer(dbArg as DB, projectId, (turn as { id: number }).id), + ); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { id: 'u1', role: 'user', parts: [{ type: 'text', text: 'We have enough scope context' }] }, + ], + }) + .expect(200); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { + id: 'u2', + role: 'user', + parts: [ + { type: 'text', text: 'Confirm scope closure' }, + { + type: 'data-confirmation', + data: { + kind: 'confirm-proposed-phase-closure', + proposalTurnId: 1, + phase: 'scope', + }, + }, + ], + }, + ], + }) + .expect(200); + + mockStreamInterviewer.mockImplementation(async (dbArg, turn) => + makePhaseClosureInterviewer( + dbArg as DB, + projectId, + (turn as { id: number }).id, + 'design', + 'The main architectural commitments are captured well enough to review requirements.', + ), + ); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { + id: 'u3', + role: 'user', + parts: [{ type: 'text', text: 'We have enough design direction now' }], + }, + ], + }) + .expect(200); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { + id: 'u4', + role: 'user', + parts: [ + { type: 'text', text: 'Confirm design closure' }, + { + type: 'data-confirmation', + data: { + kind: 'confirm-proposed-phase-closure', + proposalTurnId: 3, + phase: 'design', + }, + }, + ], + }, + ], + }) + .expect(200); + + const projectRes = await request(app).get(`/api/projects/${projectId}`).expect(200); + expect(projectRes.body.workflow.phases.design).toEqual( + expect.objectContaining({ + status: 'closed', + turnId: 3, + summary: 'The main architectural commitments are captured well enough to review requirements.', + closeability: false, + readiness: 'high', + closureBasis: 'interviewer_recommended', + proposalPending: false, + }), + ); + expect(projectRes.body.workflow.phases.requirements).toEqual( + expect.objectContaining({ + status: 'in_progress', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + }), + ); + + mockStreamInterviewer.mockImplementation(async () => + makeTextInterviewer('Which requirement is must-have?'), + ); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { + id: 'u5', + role: 'user', + parts: [{ type: 'text', text: 'Let us review the must-have capabilities' }], + }, + ], + }) + .expect(200); + + expect(mockStreamInterviewer).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ phase: 'requirements' }), + expect.any(Array), + 'Let us review the must-have capabilities', + 'requirements', + ); + expect(mockRunObserver).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ phase: 'requirements' }), + projectId, + ); + }); + + it('force-closes design through the shared confirmation seam and enters requirements mode on the next turn', async () => { + const projectId = await createTestProject(); + mockStreamInterviewer.mockImplementation(async (dbArg, turn) => + makePhaseClosureInterviewer(dbArg as DB, projectId, (turn as { id: number }).id), + ); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { id: 'u1', role: 'user', parts: [{ type: 'text', text: 'We have enough scope context' }] }, + ], + }) + .expect(200); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { + id: 'u2', + role: 'user', + parts: [ + { type: 'text', text: 'Confirm scope closure' }, + { + type: 'data-confirmation', + data: { + kind: 'confirm-proposed-phase-closure', + proposalTurnId: 1, + phase: 'scope', + }, + }, + ], + }, + ], + }) + .expect(200); + + mockStreamInterviewer.mockImplementation(async () => + makeTextInterviewer('Which database tradeoff matters more?'), + ); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { + id: 'u3', + role: 'user', + parts: [{ type: 'text', text: 'Let us compare SQLite and Postgres' }], + }, + ], + }) + .expect(200); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { + id: 'u4', + role: 'user', + parts: [ + { type: 'text', text: 'Force design closure' }, + { + type: 'data-confirmation', + data: { kind: 'force-close-active-phase', phase: 'design' }, + }, + ], + }, + ], + }) + .expect(200); + + const projectRes = await request(app).get(`/api/projects/${projectId}`).expect(200); + expect(projectRes.body.workflow.phases.design).toEqual( + expect.objectContaining({ + status: 'closed', + closeability: false, + readiness: 'high', + closureBasis: 'user_forced', + proposalPending: false, + }), + ); + const phaseOutcomes = db.$client + .prepare('SELECT closure_basis FROM phase_outcome WHERE project_id = ? ORDER BY id DESC') + .all(projectId) as Array<{ closure_basis: string | null }>; + expect(phaseOutcomes[0]).toEqual({ closure_basis: 'user_forced' }); + expect(projectRes.body.workflow.phases.requirements).toEqual( + expect.objectContaining({ + status: 'in_progress', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, + }), + ); + expect(projectRes.body.turns.at(-1)).toMatchObject({ + phase: 'design', + answer: 'Force design closure', + }); + expect(JSON.parse(projectRes.body.turns.at(-1).user_parts ?? '[]')).toEqual([ + { type: 'text', text: 'Force design closure' }, + { + type: 'data-confirmation', + data: { kind: 'force-close-active-phase', phase: 'design' }, + }, + ]); + + await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { + id: 'u5', + role: 'user', + parts: [{ type: 'text', text: 'Let us review the must-have capabilities' }], + }, + ], + }) + .expect(200); + + expect(mockStreamInterviewer).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ phase: 'requirements' }), + expect.any(Array), + 'Let us review the must-have capabilities', + 'requirements', + ); + expect(mockRunObserver).toHaveBeenLastCalledWith( + expect.anything(), + expect.objectContaining({ phase: 'requirements' }), + projectId, + ); + }); + + it.each([ + { + name: 'unsupported phases', + seed: async (projectId: number) => { + const { advanceHead, createTurn } = await import('./db.js'); + const scopeTurn = createTurn(db, projectId, { + phase: 'scope', + question: 'What platform?', + answer: 'Web', + }); + advanceHead(db, projectId, scopeTurn.id); + }, + phase: 'scope', + expectedError: 'Only design supports force-close in this slice', + }, + { + name: 'inactive phases', + seed: async (projectId: number) => { + await seedRequirementsReady(projectId); + }, + phase: 'design', + expectedError: 'Only the active phase can be force-closed', + }, + { + name: 'design that is not closeable yet', + seed: async (projectId: number) => { + await seedClosedScope(projectId); + }, + phase: 'design', + expectedError: 'Phase is not closeable yet', + }, + { + name: 'design with a pending proposal', + seed: async (projectId: number) => { + const { createPhaseOutcome } = await import('./db.js'); + const { designTurn } = await seedActiveDesign(projectId); + createPhaseOutcome(db, { + projectId, + phase: 'design', + proposal_turn_id: designTurn.id, + summary: 'The main architectural commitments are captured well enough to review requirements.', + }); + }, + phase: 'design', + expectedError: 'Confirm the pending closure proposal instead of force-closing', + }, + ])('preserves force-close validation errors for $name', async ({ seed, phase, expectedError }) => { + const projectId = await createTestProject(); + await seed(projectId); + + const response = await request(app) + .post(`/api/projects/${projectId}/chat`) + .send({ + messages: [ + { + id: 'u1', + role: 'user', + parts: [ + { type: 'text', text: `Force ${phase} closure` }, + { + type: 'data-confirmation', + data: { kind: 'force-close-active-phase', phase }, + }, + ], + }, + ], + }) + .expect(400); + + expect(response.body).toEqual({ error: expectedError }); + }); }); describe('GET /api/projects/:id', () => { diff --git a/src/server/app.ts b/src/server/app.ts index 47727dd3..f25c5166 100644 --- a/src/server/app.ts +++ b/src/server/app.ts @@ -13,6 +13,12 @@ import { type BrunchUIMessage, type BrunchUserPart, } from '../shared/chat.js'; +import { + getForceCloseActionErrorMessage, + getForceClosePhaseAction, + getForcedPhaseClosureSummary, + parsePhaseClosureCommand, +} from '../shared/phase-close.js'; import { extractPrompt, finalizeTurn, @@ -24,9 +30,11 @@ import { import { applyTurnResponseSelections, confirmPhaseOutcome, + createConfirmedPhaseOutcome, createDb, findPhaseOutcomeForTurn, findProposedPhaseOutcomeByTurn, + getCurrentWorkflowState, getTurn, getOptionsForTurn, updateTurn, @@ -174,28 +182,41 @@ export function createApp(dbPath?: string) { (part): part is Extract => part.type === 'data-confirmation', ); + const phaseClosureCommand = confirmationPart ? parsePhaseClosureCommand(confirmationPart.data) : null; if (!prompt.trim() && !confirmationPart) { res.status(400).json({ error: 'message content is required' }); return; } - if (confirmationPart && !confirmationPart.data.confirmed) { - res.status(400).json({ error: 'Only confirmed phase closures are supported' }); + if (confirmationPart && !phaseClosureCommand) { + res.status(400).json({ error: 'Invalid phase-close command' }); return; } - const confirmationTarget = confirmationPart - ? findProposedPhaseOutcomeByTurn(db, id, confirmationPart.data.turnId) - : undefined; - if (confirmationPart && !confirmationTarget) { + const forceClosePhase = + phaseClosureCommand?.kind === 'force-close-active-phase' ? phaseClosureCommand.phase : undefined; + const confirmationTarget = + phaseClosureCommand?.kind === 'confirm-proposed-phase-closure' + ? findProposedPhaseOutcomeByTurn(db, id, phaseClosureCommand.proposalTurnId) + : undefined; + + if (forceClosePhase) { + const workflow = getCurrentWorkflowState(db, id); + const forceCloseAction = getForceClosePhaseAction(workflow, forceClosePhase); + const forceCloseError = getForceCloseActionErrorMessage(forceCloseAction); + if (forceCloseError) { + res.status(400).json({ error: forceCloseError }); + return; + } + } else if (confirmationPart && !confirmationTarget) { res.status(404).json({ error: 'Phase closure proposal not found' }); return; } let prepared: ReturnType; try { - prepared = prepareTurn(db, id, prompt, userParts); + prepared = prepareTurn(db, id, prompt, userParts, forceClosePhase); } catch (error) { const message = error instanceof Error ? error.message : 'Unknown error'; res.status(404).json({ error: message }); @@ -211,6 +232,19 @@ export function createApp(dbPath?: string) { return; } + if (forceClosePhase) { + createConfirmedPhaseOutcome(db, { + projectId: id, + phase: forceClosePhase, + proposal_turn_id: prepared.turn.id, + confirmation_turn_id: prepared.turn.id, + summary: getForcedPhaseClosureSummary(forceClosePhase), + }); + finalizeTurn(db, id, prepared.turn.id); + writer.write({ type: 'finish', finishReason: 'stop' }); + return; + } + const interviewer = await streamInterviewer( db, prepared.turn, diff --git a/src/server/core.test.ts b/src/server/core.test.ts index 0a12407c..b033ed04 100644 --- a/src/server/core.test.ts +++ b/src/server/core.test.ts @@ -2,7 +2,16 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import type { BrunchUIMessage, BrunchUserPart } from '../shared/chat.js'; import { extractPrompt, finalizeTurn, getProjectState, prepareTurn } from './core.js'; -import { createDb, createProject, createTurn, getProject, getTurn, type DB } from './db.js'; +import { + confirmPhaseOutcome, + createDb, + createPhaseOutcome, + createProject, + createTurn, + getProject, + getTurn, + type DB, +} from './db.js'; let db: DB; @@ -32,7 +41,10 @@ describe('extractPrompt', () => { role: 'user', parts: [ { type: 'text', text: 'hello' }, - { type: 'data-confirmation', data: { turnId: 7, confirmed: true } }, + { + type: 'data-confirmation', + data: { kind: 'confirm-proposed-phase-closure', proposalTurnId: 7, phase: 'scope' }, + }, ], }, ]; @@ -50,7 +62,10 @@ describe('prepareTurn', () => { const project = createProject(db, 'Spec'); const userParts: BrunchUserPart[] = [ { type: 'text', text: 'Use SQLite' }, - { type: 'data-confirmation', data: { turnId: 1, confirmed: true } }, + { + type: 'data-confirmation', + data: { kind: 'confirm-proposed-phase-closure', proposalTurnId: 1, phase: 'scope' }, + }, ]; const prepared = prepareTurn(db, project.id, 'Use SQLite', userParts); @@ -77,6 +92,183 @@ describe('prepareTurn', () => { expect(prepared.activePath).toHaveLength(1); expect(prepared.activePath[0].id).toBe(parent.id); }); + + it('selects design as the next turn phase after scope is confirmed closed', () => { + const project = createProject(db, 'Spec'); + const scopeTurn = createTurn(db, project.id, { + phase: 'scope', + question: 'What platform?', + answer: 'Web', + }); + finalizeTurn(db, project.id, scopeTurn.id); + + const proposalTurn = createTurn(db, project.id, { + phase: 'scope', + parent_turn_id: scopeTurn.id, + question: '', + answer: 'We have enough scope context', + }); + finalizeTurn(db, project.id, proposalTurn.id); + + const outcome = createPhaseOutcome(db, { + projectId: project.id, + phase: 'scope', + proposal_turn_id: proposalTurn.id, + summary: 'Goals, terms, context, and constraints are sufficiently captured.', + }); + + const confirmationTurn = createTurn(db, project.id, { + phase: 'scope', + parent_turn_id: proposalTurn.id, + question: '', + answer: 'Confirm scope closure', + }); + confirmPhaseOutcome(db, outcome.id, confirmationTurn.id); + finalizeTurn(db, project.id, confirmationTurn.id); + + const prepared = prepareTurn(db, project.id, 'Let us compare SQLite and Postgres', [ + { type: 'text', text: 'Let us compare SQLite and Postgres' }, + ]); + + expect(prepared.turn.phase).toBe('design'); + }); + + it('selects requirements as the next turn phase after design is confirmed closed', () => { + const project = createProject(db, 'Spec'); + + const scopeTurn = createTurn(db, project.id, { + phase: 'scope', + question: 'What platform?', + answer: 'Web', + }); + finalizeTurn(db, project.id, scopeTurn.id); + + const scopeProposalTurn = createTurn(db, project.id, { + phase: 'scope', + parent_turn_id: scopeTurn.id, + question: '', + answer: 'We have enough scope context', + }); + finalizeTurn(db, project.id, scopeProposalTurn.id); + + const scopeOutcome = createPhaseOutcome(db, { + projectId: project.id, + phase: 'scope', + proposal_turn_id: scopeProposalTurn.id, + summary: 'Goals, terms, context, and constraints are sufficiently captured.', + }); + + const scopeConfirmationTurn = createTurn(db, project.id, { + phase: 'scope', + parent_turn_id: scopeProposalTurn.id, + question: '', + answer: 'Confirm scope closure', + }); + confirmPhaseOutcome(db, scopeOutcome.id, scopeConfirmationTurn.id); + finalizeTurn(db, project.id, scopeConfirmationTurn.id); + + const designTurn = createTurn(db, project.id, { + phase: 'design', + parent_turn_id: scopeConfirmationTurn.id, + question: 'Which module boundary matters first?', + answer: 'Persistence should stay behind one repository seam', + }); + finalizeTurn(db, project.id, designTurn.id); + + const designOutcome = createPhaseOutcome(db, { + projectId: project.id, + phase: 'design', + proposal_turn_id: designTurn.id, + summary: 'The main architectural commitments are captured well enough to review requirements.', + }); + + const designConfirmationTurn = createTurn(db, project.id, { + phase: 'design', + parent_turn_id: designTurn.id, + question: '', + answer: 'Confirm design closure', + }); + confirmPhaseOutcome(db, designOutcome.id, designConfirmationTurn.id); + finalizeTurn(db, project.id, designConfirmationTurn.id); + + const prepared = prepareTurn(db, project.id, 'Let us review the must-have capabilities', [ + { type: 'text', text: 'Let us review the must-have capabilities' }, + ]); + + expect(prepared.turn.phase).toBe('requirements'); + }); + + it('selects requirements as the next turn phase after design is force-closed by the user', () => { + const project = createProject(db, 'Spec'); + + const scopeTurn = createTurn(db, project.id, { + phase: 'scope', + question: 'What platform?', + answer: 'Web', + }); + finalizeTurn(db, project.id, scopeTurn.id); + + const scopeProposalTurn = createTurn(db, project.id, { + phase: 'scope', + parent_turn_id: scopeTurn.id, + question: '', + answer: 'We have enough scope context', + }); + finalizeTurn(db, project.id, scopeProposalTurn.id); + + const scopeOutcome = createPhaseOutcome(db, { + projectId: project.id, + phase: 'scope', + proposal_turn_id: scopeProposalTurn.id, + summary: 'Goals, terms, context, and constraints are sufficiently captured.', + }); + + const scopeConfirmationTurn = createTurn(db, project.id, { + phase: 'scope', + parent_turn_id: scopeProposalTurn.id, + question: '', + answer: 'Confirm scope closure', + }); + confirmPhaseOutcome(db, scopeOutcome.id, scopeConfirmationTurn.id); + finalizeTurn(db, project.id, scopeConfirmationTurn.id); + + const designTurn = createTurn(db, project.id, { + phase: 'design', + parent_turn_id: scopeConfirmationTurn.id, + question: 'Which module boundary matters first?', + answer: 'Persistence should stay behind one repository seam', + }); + finalizeTurn(db, project.id, designTurn.id); + + const designForceCloseTurn = createTurn(db, project.id, { + phase: 'design', + parent_turn_id: designTurn.id, + question: '', + answer: 'Force design closure', + user_parts: JSON.stringify([ + { type: 'text', text: 'Force design closure' }, + { + type: 'data-confirmation', + data: { kind: 'force-close-active-phase', phase: 'design' }, + }, + ]), + }); + + const designOutcome = createPhaseOutcome(db, { + projectId: project.id, + phase: 'design', + proposal_turn_id: designForceCloseTurn.id, + summary: 'Design closed by user without an interviewer recommendation.', + }); + confirmPhaseOutcome(db, designOutcome.id, designForceCloseTurn.id); + finalizeTurn(db, project.id, designForceCloseTurn.id); + + const prepared = prepareTurn(db, project.id, 'Let us review the must-have capabilities', [ + { type: 'text', text: 'Let us review the must-have capabilities' }, + ]); + + expect(prepared.turn.phase).toBe('requirements'); + }); }); describe('finalizeTurn', () => { diff --git a/src/server/core.ts b/src/server/core.ts index 88e40a7a..4804f2a3 100644 --- a/src/server/core.ts +++ b/src/server/core.ts @@ -4,6 +4,7 @@ import { getProject, getActivePath, getOptionsForTurn, + getCurrentPhase, getCurrentWorkflowState, createTurn, advanceHead, @@ -41,14 +42,14 @@ export function prepareTurn( projectId: number, userMessage: string, userParts: BrunchUserPart[], - phase: Turn['phase'] = 'scope', + phase: Turn['phase'] | undefined = undefined, ) { const project = getProject(db, projectId); if (!project) throw new Error(`Project ${projectId} not found`); const activePath = loadActivePathWithOptions(db, projectId); const turn = createTurn(db, projectId, { parent_turn_id: project.active_turn_id, - phase, + phase: phase ?? getCurrentPhase(db, projectId), question: '', answer: userMessage, user_parts: serializeParts(userParts), diff --git a/src/server/db.test.ts b/src/server/db.test.ts index d4d14343..8abc4b79 100644 --- a/src/server/db.test.ts +++ b/src/server/db.test.ts @@ -27,6 +27,7 @@ import { addAssumptionParentAssumption, getEntitiesForProject, getScopeBundleForProject, + listPhaseOutcomesForProject, type DB, } from './db.js'; @@ -68,6 +69,11 @@ describe('createDb', () => { for (const table of expected) { expect(names).toContain(table); } + + const phaseOutcomeColumns = db.$client.prepare("PRAGMA table_info('phase_outcome')").all() as Array<{ + name: string; + }>; + expect(phaseOutcomeColumns.map((column) => column.name)).toContain('closure_basis'); }); it('creates database file on disk when given a path', () => { @@ -209,9 +215,13 @@ describe('phase outcome lifecycle', () => { }); expect(getCurrentWorkflowState(db, project.id).phases.scope).toMatchObject({ - status: 'proposed', + status: 'in_progress', + proposalPending: true, summary: proposed.summary, turnId: closureTurn.id, + closeability: true, + readiness: 'high', + closureBasis: null, }); const confirmationTurn = createTurn(db, project.id, { @@ -223,10 +233,26 @@ describe('phase outcome lifecycle', () => { confirmPhaseOutcome(db, proposed.id, confirmationTurn.id); advanceHead(db, project.id, confirmationTurn.id); - expect(getCurrentWorkflowState(db, project.id).phases.scope).toMatchObject({ - status: 'confirmed', + const confirmedWorkflow = getCurrentWorkflowState(db, project.id); + expect(confirmedWorkflow.phases.scope).toMatchObject({ + status: 'closed', + proposalPending: false, summary: proposed.summary, turnId: closureTurn.id, + closeability: false, + readiness: 'high', + closureBasis: 'interviewer_recommended', + }); + expect(listPhaseOutcomesForProject(db, project.id)[0]).toMatchObject({ + id: proposed.id, + closure_basis: 'interviewer_recommended', + }); + expect(confirmedWorkflow.phases.design).toMatchObject({ + status: 'in_progress', + proposalPending: false, + closeability: false, + readiness: 'low', + closureBasis: null, }); const alternateTurn = createTurn(db, project.id, { @@ -238,15 +264,165 @@ describe('phase outcome lifecycle', () => { advanceHead(db, project.id, alternateTurn.id); expect(getCurrentWorkflowState(db, project.id).phases.scope).toMatchObject({ - status: 'open', + status: 'in_progress', + proposalPending: false, summary: null, turnId: null, + closeability: true, + closureBasis: null, }); expect(listPhaseOutcomesForProject(db, project.id)[0]).toMatchObject({ id: proposed.id, status: 'superseded', }); }); + + it('projects a user-forced design close from the confirmation turn and advances requirements', async () => { + const project = getOrCreateProject(db); + + const scopeTurn = createTurn(db, project.id, { phase: 'scope', question: 'Goal?', answer: 'Spec tool' }); + advanceHead(db, project.id, scopeTurn.id); + + const scopeProposalTurn = createTurn(db, project.id, { + phase: 'scope', + question: '', + answer: 'We have enough scope context', + parent_turn_id: scopeTurn.id, + }); + advanceHead(db, project.id, scopeProposalTurn.id); + + const { createPhaseOutcome, confirmPhaseOutcome, getCurrentWorkflowState } = await import('./db.js'); + + const scopeOutcome = createPhaseOutcome(db, { + projectId: project.id, + phase: 'scope', + proposal_turn_id: scopeProposalTurn.id, + summary: 'Goals, terms, context, and constraints are sufficiently captured.', + }); + + const scopeConfirmationTurn = createTurn(db, project.id, { + phase: 'scope', + question: '', + answer: 'Confirm scope closure', + parent_turn_id: scopeProposalTurn.id, + user_parts: JSON.stringify([ + { type: 'text', text: 'Confirm scope closure' }, + { + type: 'data-confirmation', + data: { + kind: 'confirm-proposed-phase-closure', + proposalTurnId: scopeProposalTurn.id, + phase: 'scope', + }, + }, + ]), + }); + confirmPhaseOutcome(db, scopeOutcome.id, scopeConfirmationTurn.id); + advanceHead(db, project.id, scopeConfirmationTurn.id); + + const designTurn = createTurn(db, project.id, { + phase: 'design', + question: 'Which tradeoff matters most?', + answer: 'Keep the repository seam small', + parent_turn_id: scopeConfirmationTurn.id, + }); + advanceHead(db, project.id, designTurn.id); + + const designForceCloseTurn = createTurn(db, project.id, { + phase: 'design', + question: '', + answer: 'Force design closure', + parent_turn_id: designTurn.id, + user_parts: JSON.stringify([ + { type: 'text', text: 'Force design closure' }, + { + type: 'data-confirmation', + data: { kind: 'force-close-active-phase', phase: 'design' }, + }, + ]), + }); + + const designOutcome = createPhaseOutcome(db, { + projectId: project.id, + phase: 'design', + proposal_turn_id: designForceCloseTurn.id, + summary: 'Design closed by user without an interviewer recommendation.', + }); + confirmPhaseOutcome(db, designOutcome.id, designForceCloseTurn.id); + advanceHead(db, project.id, designForceCloseTurn.id); + + const workflow = getCurrentWorkflowState(db, project.id); + expect(workflow.phases.design).toMatchObject({ + status: 'closed', + proposalPending: false, + turnId: designForceCloseTurn.id, + summary: 'Design closed by user without an interviewer recommendation.', + closeability: false, + readiness: 'high', + closureBasis: 'user_forced', + }); + expect(listPhaseOutcomesForProject(db, project.id)[0]).toMatchObject({ + id: designOutcome.id, + closure_basis: 'user_forced', + }); + expect(workflow.phases.requirements).toMatchObject({ + status: 'in_progress', + proposalPending: false, + closeability: false, + readiness: 'low', + closureBasis: null, + }); + }); + + it('projects no closure basis when a confirmed phase outcome lacks durable closure provenance', async () => { + const project = getOrCreateProject(db); + + const scopeTurn = createTurn(db, project.id, { phase: 'scope', question: 'Goal?', answer: 'Spec tool' }); + advanceHead(db, project.id, scopeTurn.id); + + const scopeProposalTurn = createTurn(db, project.id, { + phase: 'scope', + question: '', + answer: 'We have enough scope context', + parent_turn_id: scopeTurn.id, + }); + advanceHead(db, project.id, scopeProposalTurn.id); + + const { createPhaseOutcome, confirmPhaseOutcome, getCurrentWorkflowState } = await import('./db.js'); + + const scopeOutcome = createPhaseOutcome(db, { + projectId: project.id, + phase: 'scope', + proposal_turn_id: scopeProposalTurn.id, + summary: 'Goals, terms, context, and constraints are sufficiently captured.', + }); + + const scopeConfirmationTurn = createTurn(db, project.id, { + phase: 'scope', + question: '', + answer: 'Confirm scope closure', + parent_turn_id: scopeProposalTurn.id, + user_parts: JSON.stringify([ + { type: 'text', text: 'Confirm scope closure' }, + { + type: 'data-confirmation', + data: { + kind: 'confirm-proposed-phase-closure', + proposalTurnId: scopeProposalTurn.id, + phase: 'scope', + }, + }, + ]), + }); + confirmPhaseOutcome(db, scopeOutcome.id, scopeConfirmationTurn.id); + advanceHead(db, project.id, scopeConfirmationTurn.id); + + db.$client.prepare('UPDATE phase_outcome SET closure_basis = NULL WHERE id = ?').run(scopeOutcome.id); + + expect(getCurrentWorkflowState(db, project.id).phases.scope).toMatchObject({ + closureBasis: null, + }); + }); }); describe('active path resolution', () => { diff --git a/src/server/db.ts b/src/server/db.ts index 6ed11873..27ce9857 100644 --- a/src/server/db.ts +++ b/src/server/db.ts @@ -10,6 +10,8 @@ import { type KnowledgeEntityCollection, type KnowledgeKind as SharedKnowledgeKind, } from '../shared/knowledge.js'; +import { parsePhaseClosureCommand, type PhaseClosureBasis } from '../shared/phase-close.js'; +import { safeDeserializeUserParts, type DataConfirmationPart } from './parts.js'; import * as schema from './schema.js'; export type DB = ReturnType>; @@ -20,9 +22,16 @@ export type PhaseOutcome = InferSelectModel; export type Phase = Turn['phase']; export type Impact = NonNullable; export type PhaseOutcomeStatus = PhaseOutcome['status']; +export type WorkflowPhaseStatus = 'unstarted' | 'in_progress' | 'closed'; +export type ReadinessBand = 'low' | 'medium' | 'high'; +export type ClosureBasis = PhaseClosureBasis | null; export interface WorkflowPhaseState { - status: 'open' | Extract; + status: WorkflowPhaseStatus; + closeability: boolean; + readiness: ReadinessBand; + closureBasis: ClosureBasis; + proposalPending: boolean; turnId: number | null; summary: string | null; } @@ -174,14 +183,35 @@ export function getActivePath(db: DB, projectId: number): Turn[] { return rows as Turn[]; } +const workflowPhaseOrder = [ + 'scope', + 'design', + 'requirements', + 'criteria', +] as const satisfies readonly Phase[]; + function createEmptyWorkflowPhaseState(): WorkflowPhaseState { return { - status: 'open', + status: 'unstarted', + closeability: false, + readiness: 'low', + closureBasis: null, + proposalPending: false, turnId: null, summary: null, }; } +function getReadinessBand(turnCount: number): ReadinessBand { + if (turnCount <= 0) { + return 'low'; + } + if (turnCount === 1) { + return 'medium'; + } + return 'high'; +} + export function listPhaseOutcomesForProject(db: DB, projectId: number): PhaseOutcome[] { return db .select() @@ -225,10 +255,21 @@ export function createPhaseOutcome(db: DB, input: CreatePhaseOutcomeInput): Phas return result as PhaseOutcome; } +function getClosureBasisForConfirmationTurn(db: DB, confirmationTurnId: number): PhaseClosureBasis { + const confirmationTurn = getTurn(db, confirmationTurnId); + const confirmationPart = safeDeserializeUserParts(confirmationTurn?.user_parts).find( + (part): part is DataConfirmationPart => part.type === 'data-confirmation', + ); + const phaseClosureCommand = confirmationPart ? parsePhaseClosureCommand(confirmationPart.data) : null; + + return phaseClosureCommand?.closureBasis ?? 'interviewer_recommended'; +} + export function confirmPhaseOutcome(db: DB, phaseOutcomeId: number, confirmationTurnId: number): void { db.update(schema.phaseOutcome) .set({ status: 'confirmed', + closure_basis: getClosureBasisForConfirmationTurn(db, confirmationTurnId), confirmation_turn_id: confirmationTurnId, confirmed_at: sql`datetime('now')`, }) @@ -236,6 +277,27 @@ export function confirmPhaseOutcome(db: DB, phaseOutcomeId: number, confirmation .run(); } +export function createConfirmedPhaseOutcome( + db: DB, + input: CreatePhaseOutcomeInput & { confirmation_turn_id: number }, +): PhaseOutcome { + const result = db + .insert(schema.phaseOutcome) + .values({ + project_id: input.projectId, + phase: input.phase, + proposal_turn_id: input.proposal_turn_id, + summary: input.summary, + status: 'confirmed', + closure_basis: getClosureBasisForConfirmationTurn(db, input.confirmation_turn_id), + confirmation_turn_id: input.confirmation_turn_id, + confirmed_at: sql`datetime('now')`, + }) + .returning() + .get(); + return result as PhaseOutcome; +} + export function findProposedPhaseOutcomeByTurn( db: DB, projectId: number, @@ -273,6 +335,14 @@ export function findPhaseOutcomeForTurn( .get() as PhaseOutcome | undefined; } +function getClosureBasisForOutcome(outcome: PhaseOutcome | undefined): ClosureBasis { + if (!outcome || outcome.status !== 'confirmed' || !outcome.confirmation_turn_id) { + return null; + } + + return outcome.closure_basis ?? null; +} + export function getCurrentWorkflowState(db: DB, projectId: number): WorkflowState { const workflow: WorkflowState = { phases: { @@ -283,29 +353,56 @@ export function getCurrentWorkflowState(db: DB, projectId: number): WorkflowStat }, }; - const activeTurnIds = new Set(getActivePath(db, projectId).map((turn) => turn.id)); + const activePath = getActivePath(db, projectId); + const activeTurnIds = new Set(activePath.map((turn) => turn.id)); + const turnCounts = Object.fromEntries(workflowPhaseOrder.map((phase) => [phase, 0])) as Record< + Phase, + number + >; + for (const turn of activePath) { + turnCounts[turn.phase] += 1; + } + const currentOutcomes = listPhaseOutcomesForProject(db, projectId).filter( (outcome) => (outcome.status === 'proposed' || outcome.status === 'confirmed') && activeTurnIds.has(outcome.proposal_turn_id), ); - for (const phase of ['scope', 'design', 'requirements', 'criteria'] as const) { + const firstUnclosedPhase = + workflowPhaseOrder.find( + (phase) => currentOutcomes.find((entry) => entry.phase === phase)?.status !== 'confirmed', + ) ?? 'criteria'; + + for (const phase of workflowPhaseOrder) { const outcome = currentOutcomes.find((entry) => entry.phase === phase); - if (!outcome) { - continue; - } + const isConfirmed = outcome?.status === 'confirmed'; + const proposalPending = outcome?.status === 'proposed'; + const hasTurnHistory = turnCounts[phase] > 0; workflow.phases[phase] = { - status: outcome.status === 'confirmed' ? 'confirmed' : 'proposed', - turnId: outcome.proposal_turn_id, - summary: outcome.summary, + status: isConfirmed + ? 'closed' + : phase === firstUnclosedPhase || hasTurnHistory + ? 'in_progress' + : 'unstarted', + closeability: isConfirmed ? false : hasTurnHistory, + readiness: getReadinessBand(turnCounts[phase]), + closureBasis: getClosureBasisForOutcome(outcome), + proposalPending, + turnId: outcome?.proposal_turn_id ?? null, + summary: outcome?.summary ?? null, }; } return workflow; } +export function getCurrentPhase(db: DB, projectId: number): Phase { + const workflow = getCurrentWorkflowState(db, projectId); + return workflowPhaseOrder.find((phase) => workflow.phases[phase].status !== 'closed') ?? 'criteria'; +} + export function getOptionsForTurn(db: DB, turnId: number): Option[] { return db .select() diff --git a/src/server/interview.test.ts b/src/server/interview.test.ts index aab873f7..d6038a7a 100644 --- a/src/server/interview.test.ts +++ b/src/server/interview.test.ts @@ -2,7 +2,12 @@ import { afterEach, beforeEach, describe, expect, it } from 'vitest'; import { structuredQuestionSchema, type StructuredQuestion } from '../shared/chat.js'; import { createDb, createProject, createTurn, getOptionsForTurn, getTurn, type DB } from './db.js'; -import { getSystemPrompt, persistFallbackQuestionText, persistStructuredQuestion } from './interview.js'; +import { + canProposePhaseClosure, + getSystemPrompt, + persistFallbackQuestionText, + persistStructuredQuestion, +} from './interview.js'; let db: DB; @@ -50,6 +55,19 @@ describe('getSystemPrompt', () => { expect(getSystemPrompt('scope')).toContain('ask_question'); expect(getSystemPrompt('scope')).toContain('structured questions'); }); + + it('teaches the design prompt to propose closure when enough design direction is captured', () => { + expect(getSystemPrompt('design')).toContain('propose_phase_closure'); + }); +}); + +describe('canProposePhaseClosure', () => { + it('enables closure proposals for scope and design but not later review phases', () => { + expect(canProposePhaseClosure('scope')).toBe(true); + expect(canProposePhaseClosure('design')).toBe(true); + expect(canProposePhaseClosure('requirements')).toBe(false); + expect(canProposePhaseClosure('criteria')).toBe(false); + }); }); describe('persistStructuredQuestion', () => { diff --git a/src/server/interview.ts b/src/server/interview.ts index 17c2fd8d..f98b089d 100644 --- a/src/server/interview.ts +++ b/src/server/interview.ts @@ -43,9 +43,11 @@ When goals, terms, context, and constraints are sufficiently captured for now, u Your job is to walk the design decision tree — exploring architectural choices, module boundaries, data models, and integration points. Each question drills into a branch of the design space. -For every turn, you MUST use the ask_question tool. Never respond with plain text. +For every turn, you MUST use the ask_question tool or the propose_phase_closure tool. Never respond with plain text. -Each question should present meaningfully different design alternatives with clear tradeoffs in the options.`, +Each question should present meaningfully different design alternatives with clear tradeoffs in the options. + +When the main architectural commitments are sufficiently captured for now, use the propose_phase_closure tool instead of asking another question. The summary should concisely explain what is now understood and why design can close.`, requirements: `You are a spec elicitation interviewer conducting the REQUIREMENTS REVIEW phase. @@ -65,6 +67,10 @@ export function getSystemPrompt(phase: Phase): string { return SYSTEM_PROMPTS[phase]; } +export function canProposePhaseClosure(phase: Phase): boolean { + return phase === 'scope' || phase === 'design'; +} + /** * Persist structured question data from tool input to the turn and options tables. */ @@ -127,7 +133,7 @@ export function createInterviewerAgent(db: DB, turnId: number, phase: Phase, pro instructions: getSystemPrompt(phase), tools: { ask_question: createAskQuestionTool(db, turnId), - ...(phase === 'scope' + ...(canProposePhaseClosure(phase) ? { propose_phase_closure: createProposePhaseClosureTool(db, turnId, phase, projectId) } : {}), }, diff --git a/src/server/parts.test.ts b/src/server/parts.test.ts index 3aa69a17..7b5f56ac 100644 --- a/src/server/parts.test.ts +++ b/src/server/parts.test.ts @@ -77,10 +77,19 @@ describe('data schemas', () => { ).toThrow(); }); - it('validates data-confirmation payloads', () => { - const value = { turnId: 5, confirmed: true }; + it('validates explicit recommended-close data-confirmation payloads', () => { + const value = { kind: 'confirm-proposed-phase-closure', proposalTurnId: 5, phase: 'scope' }; expect(dataConfirmationSchema.parse(value)).toEqual(value); }); + + it('validates explicit forced-close data-confirmation payloads', () => { + const value = { kind: 'force-close-active-phase', phase: 'design' }; + expect(dataConfirmationSchema.parse(value)).toEqual(value); + }); + + it('rejects the old optional-field data-confirmation payload shape', () => { + expect(() => dataConfirmationSchema.parse({ turnId: 5, confirmed: true })).toThrow(); + }); }); describe('assistant part round-trip', () => { @@ -178,7 +187,23 @@ describe('user part round-trip', () => { const parts: BrunchUserPart[] = [ { type: 'text', text: 'Web first — Best fit' }, { type: 'data-turn-response', data: { turnId: 4, selectedOptionIds: [9], freeText: 'Best fit' } }, - { type: 'data-confirmation', data: { turnId: 4, confirmed: true } }, + { + type: 'data-confirmation', + data: { kind: 'confirm-proposed-phase-closure', proposalTurnId: 4, phase: 'scope' }, + }, + ]; + + const json = serializeParts(parts); + expect(deserializeUserParts(json)).toEqual(parts); + }); + + it('round-trips forced-close confirmation user parts', () => { + const parts: BrunchUserPart[] = [ + { type: 'text', text: 'Force design closure' }, + { + type: 'data-confirmation', + data: { kind: 'force-close-active-phase', phase: 'design' }, + }, ]; const json = serializeParts(parts); diff --git a/src/server/schema.ts b/src/server/schema.ts index 0b586ad4..e11f5177 100644 --- a/src/server/schema.ts +++ b/src/server/schema.ts @@ -62,6 +62,7 @@ export const phaseOutcome = sqliteTable('phase_outcome', { .notNull() .default('proposed'), summary: text().notNull(), + closure_basis: text({ enum: ['interviewer_recommended', 'user_forced'] }), confirmation_turn_id: integer().references(() => turn.id), confirmed_at: text(), superseded_at: text(), diff --git a/src/shared/chat.ts b/src/shared/chat.ts index 5d9355de..31dfcef5 100644 --- a/src/shared/chat.ts +++ b/src/shared/chat.ts @@ -2,6 +2,7 @@ import { tool, type UIMessage, type UIMessagePart, type UITools } from 'ai'; import * as z from 'zod/v4'; import { createKnowledgeCollectionRecord } from './knowledge.js'; +import { dataConfirmationSchema, workflowPhaseSchema, type DataConfirmation } from './phase-close.js'; export const structuredQuestionSchema = z.object({ question: z.string().min(1), @@ -43,34 +44,30 @@ export const dataTurnResponseSchema = z } }); -export const dataConfirmationSchema = z.object({ - turnId: z.number(), - confirmed: z.boolean(), -}); - export const dataPhaseSummarySchema = z.object({ turnId: z.number(), - phase: z.enum(['scope', 'design', 'requirements', 'criteria']), + phase: workflowPhaseSchema, summary: z.string(), }); export const phaseClosureProposalSchema = z.object({ - phase: z.enum(['scope', 'design', 'requirements', 'criteria']), + phase: workflowPhaseSchema, summary: z.string().min(1), }); export const proposePhaseClosureToolOutputSchema = z.object({ ok: z.literal(true), turnId: z.number(), - phase: z.enum(['scope', 'design', 'requirements', 'criteria']), + phase: workflowPhaseSchema, }); +export { dataConfirmationSchema }; export type StructuredQuestion = z.infer; export type AskQuestionToolOutput = z.infer; export type ObserverResultData = z.infer; export type ObserverEntityIds = ObserverResultData['entityIds']; export type DataTurnResponse = z.infer; -export type DataConfirmation = z.infer; +export type { DataConfirmation }; export type DataPhaseSummary = z.infer; export type PhaseClosureProposal = z.infer; export type ProposePhaseClosureToolOutput = z.infer; diff --git a/src/shared/phase-close.test.ts b/src/shared/phase-close.test.ts new file mode 100644 index 00000000..d6187ffc --- /dev/null +++ b/src/shared/phase-close.test.ts @@ -0,0 +1,199 @@ +import { describe, expect, it } from 'vitest'; + +import { + createConfirmProposedPhaseClosureCommand, + createForceCloseActivePhaseCommand, + dataConfirmationSchema, + getForceCloseActionErrorMessage, + getForceClosePhaseAction, + getForcedPhaseClosureSummary, + getPhaseClosureCommandText, + parsePhaseClosureCommand, + type WorkflowPhase, + type WorkflowPhaseActionProjection, +} from './phase-close.js'; + +function createWorkflow( + overrides: Partial< + Record> + > = {}, +): WorkflowPhaseActionProjection { + return { + phases: { + scope: { status: 'unstarted', closeability: false, proposalPending: false, ...overrides.scope }, + design: { status: 'unstarted', closeability: false, proposalPending: false, ...overrides.design }, + requirements: { + status: 'unstarted', + closeability: false, + proposalPending: false, + ...overrides.requirements, + }, + criteria: { status: 'unstarted', closeability: false, proposalPending: false, ...overrides.criteria }, + }, + }; +} + +describe('phase-close commands', () => { + it('parses interviewer-recommended proposal confirmations into an explicit command', () => { + expect( + parsePhaseClosureCommand({ + kind: 'confirm-proposed-phase-closure', + proposalTurnId: 5, + phase: 'design', + }), + ).toEqual({ + kind: 'confirm-proposed-phase-closure', + proposalTurnId: 5, + phase: 'design', + closureBasis: 'interviewer_recommended', + }); + }); + + it('parses user-forced phase closes into an explicit command', () => { + expect(parsePhaseClosureCommand({ kind: 'force-close-active-phase', phase: 'design' })).toEqual({ + kind: 'force-close-active-phase', + phase: 'design', + closureBasis: 'user_forced', + }); + }); + + it('rejects the old optional-field confirmation shape', () => { + expect(parsePhaseClosureCommand({ turnId: 5, confirmed: true })).toBeNull(); + }); + + it('builds confirm-proposal command payloads that validate through the discriminated command schema', () => { + expect(dataConfirmationSchema.parse(createConfirmProposedPhaseClosureCommand('scope', 7))).toEqual({ + kind: 'confirm-proposed-phase-closure', + proposalTurnId: 7, + phase: 'scope', + }); + }); + + it('builds force-close command payloads that validate through the discriminated command schema', () => { + expect(dataConfirmationSchema.parse(createForceCloseActivePhaseCommand('design'))).toEqual({ + kind: 'force-close-active-phase', + phase: 'design', + }); + }); + + it('derives close-action message text from the shared command model', () => { + expect(getPhaseClosureCommandText({ kind: 'confirm-proposed-phase-closure', phase: 'scope' })).toBe( + 'Confirm scope closure', + ); + expect(getPhaseClosureCommandText({ kind: 'force-close-active-phase', phase: 'design' })).toBe( + 'Force design closure', + ); + }); +}); + +describe('force-close phase action projection', () => { + it('allows force-closing the active design phase when it is closeable and has no pending proposal', () => { + expect( + getForceClosePhaseAction( + createWorkflow({ + scope: { status: 'closed' }, + design: { status: 'in_progress', closeability: true }, + }), + 'design', + ), + ).toEqual({ + kind: 'force-close-active-phase', + phase: 'design', + available: true, + reason: null, + }); + }); + + it('rejects force-close for unsupported phases', () => { + expect( + getForceClosePhaseAction( + createWorkflow({ + scope: { status: 'in_progress', closeability: true }, + }), + 'scope', + ), + ).toEqual({ + kind: 'force-close-active-phase', + phase: 'scope', + available: false, + reason: 'unsupported_phase', + }); + }); + + it('rejects force-close when design is not the active phase', () => { + expect( + getForceClosePhaseAction( + createWorkflow({ + scope: { status: 'closed' }, + design: { status: 'closed' }, + requirements: { status: 'in_progress' }, + }), + 'design', + ), + ).toEqual({ + kind: 'force-close-active-phase', + phase: 'design', + available: false, + reason: 'inactive_phase', + }); + }); + + it('rejects force-close when the active design phase is not closeable', () => { + expect( + getForceClosePhaseAction( + createWorkflow({ + scope: { status: 'closed' }, + design: { status: 'in_progress', closeability: false }, + }), + 'design', + ), + ).toEqual({ + kind: 'force-close-active-phase', + phase: 'design', + available: false, + reason: 'not_closeable', + }); + }); + + it('rejects force-close when the active design phase already has a pending proposal', () => { + expect( + getForceClosePhaseAction( + createWorkflow({ + scope: { status: 'closed' }, + design: { status: 'in_progress', closeability: true, proposalPending: true }, + }), + 'design', + ), + ).toEqual({ + kind: 'force-close-active-phase', + phase: 'design', + available: false, + reason: 'proposal_pending', + }); + }); + + it('maps force-close rejection reasons through one shared error helper', () => { + expect( + getForceCloseActionErrorMessage({ + kind: 'force-close-active-phase', + phase: 'scope', + available: false, + reason: 'unsupported_phase', + }), + ).toBe('Only design supports force-close in this slice'); + expect( + getForceCloseActionErrorMessage({ + kind: 'force-close-active-phase', + phase: 'design', + available: false, + reason: 'proposal_pending', + }), + ).toBe('Confirm the pending closure proposal instead of force-closing'); + }); + + it('builds the forced-close summary through one shared helper', () => { + expect(getForcedPhaseClosureSummary('design')).toBe( + 'Design closed by user without an interviewer recommendation.', + ); + }); +}); diff --git a/src/shared/phase-close.ts b/src/shared/phase-close.ts new file mode 100644 index 00000000..cea68ec6 --- /dev/null +++ b/src/shared/phase-close.ts @@ -0,0 +1,173 @@ +import * as z from 'zod/v4'; + +export const workflowPhaseOrder = ['scope', 'design', 'requirements', 'criteria'] as const; +export const workflowPhaseSchema = z.enum(workflowPhaseOrder); +export const phaseClosureBasisSchema = z.enum(['interviewer_recommended', 'user_forced']); + +const confirmProposedPhaseClosureSchema = z.object({ + kind: z.literal('confirm-proposed-phase-closure'), + proposalTurnId: z.number(), + phase: workflowPhaseSchema, +}); + +const forceCloseActivePhaseSchema = z.object({ + kind: z.literal('force-close-active-phase'), + phase: workflowPhaseSchema, +}); + +export const dataConfirmationSchema = z.discriminatedUnion('kind', [ + confirmProposedPhaseClosureSchema, + forceCloseActivePhaseSchema, +]); + +export type WorkflowPhase = z.infer; +export type PhaseClosureBasis = z.infer; + +export type PhaseClosureCommand = + | { + kind: 'confirm-proposed-phase-closure'; + proposalTurnId: number; + phase: WorkflowPhase; + closureBasis: 'interviewer_recommended'; + } + | { + kind: 'force-close-active-phase'; + phase: WorkflowPhase; + closureBasis: 'user_forced'; + }; + +export type DataConfirmation = z.infer; + +export type WorkflowPhaseActionState = { + status: 'unstarted' | 'in_progress' | 'closed'; + closeability: boolean; + proposalPending: boolean; +}; + +export type WorkflowPhaseActionProjection = { + phases: Record; +}; + +export type ForceClosePhaseAction = { + kind: 'force-close-active-phase'; + phase: WorkflowPhase; + available: boolean; + reason: 'unsupported_phase' | 'inactive_phase' | 'not_closeable' | 'proposal_pending' | null; +}; + +export function getPhaseClosureCommandText( + command: Pick | Pick, +): string { + return command.kind === 'confirm-proposed-phase-closure' + ? `Confirm ${command.phase} closure` + : `Force ${command.phase} closure`; +} + +export function parsePhaseClosureCommand(value: unknown): PhaseClosureCommand | null { + const result = dataConfirmationSchema.safeParse(value); + if (!result.success) { + return null; + } + + if (result.data.kind === 'confirm-proposed-phase-closure') { + return { + ...result.data, + closureBasis: 'interviewer_recommended', + }; + } + + return { + ...result.data, + closureBasis: 'user_forced', + }; +} + +export function getCurrentWorkflowPhase(workflow: WorkflowPhaseActionProjection): WorkflowPhase { + return workflowPhaseOrder.find((phase) => workflow.phases[phase].status !== 'closed') ?? 'criteria'; +} + +export function getForceClosePhaseAction( + workflow: WorkflowPhaseActionProjection, + phase: WorkflowPhase, +): ForceClosePhaseAction { + if (phase !== 'design') { + return { + kind: 'force-close-active-phase', + phase, + available: false, + reason: 'unsupported_phase', + }; + } + + if (phase !== getCurrentWorkflowPhase(workflow)) { + return { + kind: 'force-close-active-phase', + phase, + available: false, + reason: 'inactive_phase', + }; + } + + const state = workflow.phases[phase]; + if (!state.closeability) { + return { + kind: 'force-close-active-phase', + phase, + available: false, + reason: 'not_closeable', + }; + } + + if (state.proposalPending) { + return { + kind: 'force-close-active-phase', + phase, + available: false, + reason: 'proposal_pending', + }; + } + + return { + kind: 'force-close-active-phase', + phase, + available: true, + reason: null, + }; +} + +export function getForceCloseActionErrorMessage(action: ForceClosePhaseAction): string | null { + if (action.available) { + return null; + } + + return action.reason === 'unsupported_phase' + ? 'Only design supports force-close in this slice' + : action.reason === 'inactive_phase' + ? 'Only the active phase can be force-closed' + : action.reason === 'not_closeable' + ? 'Phase is not closeable yet' + : 'Confirm the pending closure proposal instead of force-closing'; +} + +export function getForcedPhaseClosureSummary(phase: WorkflowPhase): string { + const phaseLabel = phase[0].toUpperCase() + phase.slice(1); + return `${phaseLabel} closed by user without an interviewer recommendation.`; +} + +export function createConfirmProposedPhaseClosureCommand( + phase: WorkflowPhase, + proposalTurnId: number, +): DataConfirmation { + return { + kind: 'confirm-proposed-phase-closure', + proposalTurnId, + phase, + }; +} + +export function createForceCloseActivePhaseCommand(phase: WorkflowPhase): DataConfirmation { + return { + kind: 'force-close-active-phase', + phase, + }; +}