diff --git a/.codex/plans/2026-04-28-session-repair-v1.md b/.codex/plans/2026-04-28-session-repair-v1.md new file mode 100644 index 000000000..e05191e13 --- /dev/null +++ b/.codex/plans/2026-04-28-session-repair-v1.md @@ -0,0 +1,72 @@ +# Session Repair V1: Interrupted Transcript Terminalization + +## Summary + +Implement a focused session repair feature that fixes the root cause: daemon crashes can leave a persisted session transcript with an incomplete final turn, causing AI SDK replay parts and tool calls to remain permanently "streaming" even after the session is stopped. + +The v1 repair is append-only. It provides automatic boot repair for crashed/error sessions and explicit agent-manageable repair via HTTP, UDS, and CLI with dry-run support. + +## Key Changes + +- Add `session.RepairSession(ctx, opts)` in `internal/session`. +- Add stable repair types: + - `RepairOpts{SessionID, DryRun, Force}` + - `RepairResult{SessionID, Issues, Actions, Persisted}` + - `RepairIssue{Code, Severity, TurnID, Detail}` + - `RepairAction{Code, TurnID, EventID, Persisted}` +- Repair behavior: + - Append a canonical `error` event when the final persisted turn lacks `done` or `error` and the session is stopped with `agent_crashed` or `error`. + - Append interrupted synthetic `tool_result` events before the terminal `error` when the interrupted turn has dangling `tool_call` events. + - Report invalid JSON, corrupt metadata, event DB failures, sequence anomalies, and lineage concerns without destructive mutation. + - Treat sequence gaps as diagnostics only; do not truncate or resequence. +- Boot integration: + - Run automatic repair during boot for stopped sessions with `agent_crashed` or `error`. + - Log structured summary counts. +- Public surfaces: + - `POST /api/sessions/:id/repair?dry_run=true&force=false` for HTTP and UDS. + - `agh session repair --dry-run --force -o json`. + - Add OpenAPI/contract `SessionRepairResponse`. + +## Test Plan + +- Unit tests in `internal/session` for terminal event repair, dangling tool repair, dry-run, consistency no-op, force behavior, invalid JSON, corrupt metadata, and sequence diagnostics. +- Transcript/store tests proving repaired UI messages are no longer stuck in streaming states and appended repair events preserve monotonic sequence ordering. +- HTTP/UDS/CLI tests for repair endpoint/command and error mapping. +- Boot/integration tests proving automatic repair is idempotent after daemon restart. +- Verification: focused Go tests, `make codegen`, `make codegen-check`, web type/test gates if generated types change, and full `make verify`. + +## Web/Docs Impact + +- `web/`: + - `web/src/generated/agh-openapi.d.ts` — regenerated `repairSession` operation and payload types from OpenAPI. + - `web/src/systems/session/types.ts` — session repair response/query aliases derived from generated contract types. + - `web/src/systems/session/adapters/session-api.ts` — `repairSession` client for `POST /api/sessions/{id}/repair`. + - `web/src/systems/session/hooks/use-session-actions.ts` — `useRepairSession` mutation invalidating session detail, history, transcript, events, and lists. + - `web/src/systems/session/mocks/fixtures.ts` and `web/src/systems/session/mocks/handlers.ts` — MSW fixture/handler for session repair. + - No route/component UI change — checked `web/src/routes/_app/**` and `web/src/systems/session/components/**`; v1 is intentionally agent/operator-manageable via CLI/HTTP/UDS, not a visible web control. +- `packages/site`: + - `packages/site/content/runtime/cli-reference/session/repair.mdx` — generated CLI reference for `agh session repair`. + - `packages/site/content/runtime/cli-reference/session/index.mdx` and `packages/site/content/runtime/cli-reference/session/meta.json` — generated CLI navigation updates. + - `packages/site/content/runtime/core/sessions/lifecycle.mdx` — conceptual crash/transcript repair behavior. + - `packages/site/content/runtime/core/sessions/resume.mdx` — resume flow mentions append-only transcript repair before replay. + - `packages/site/content/runtime/core/operations/troubleshooting.mdx` — operator runbook includes dry-run/manual repair. + - `packages/site/content/runtime/api-reference/index.mdx` — no direct file edit; OpenAPI-backed API reference receives `repairSession` via `openapi/agh.json`. + +## Extensibility / Agent Manageability / Config Lifecycle + +- `Extensibility`: + - none — checked extension manifests, hooks, skills/capabilities, tools/resources, bundles, registries, bridge SDKs, MCP sidecars, and protocol docs; repair is a session persistence operation and does not add an extension point or protocol surface. +- `Agent manageability`: + - CLI: `agh session repair --dry-run --force -o json`. + - HTTP: `POST /api/sessions/{id}/repair?dry_run=true&force=false`. + - UDS: same route and payload as HTTP. + - Structured output: `SessionRepairResponse` with `issues`, `actions`, `persisted`, `tool_call_id`, and `tool_name`. + - Error contracts: 400 for invalid repair options, 404 for unknown sessions, non-persisting diagnostics for invalid transcript/event conditions. +- `Config lifecycle`: + - none — checked `config.toml` keys/defaults, structs, merge/overlay behavior, validation, examples, docs, and tests; v1 adds no configuration. + +## Assumptions + +- Default repair mode is Boot + manual. +- No schema migration is required for v1. +- No truncation, resequencing, row deletion, UI-only state coercion, lint suppression, ignored errors, sleeps, or retry loops are acceptable. diff --git a/.compozy/tasks/delete-session/reviews-001/_meta.md b/.compozy/tasks/badges-design/reviews-001/_meta.md similarity index 65% rename from .compozy/tasks/delete-session/reviews-001/_meta.md rename to .compozy/tasks/badges-design/reviews-001/_meta.md index bcff3647a..f328e7f8d 100644 --- a/.compozy/tasks/delete-session/reviews-001/_meta.md +++ b/.compozy/tasks/badges-design/reviews-001/_meta.md @@ -1,8 +1,8 @@ --- provider: coderabbit -pr: "58" +pr: "84" round: 1 -created_at: 2026-04-24T00:56:58.862771Z +created_at: 2026-04-28T17:21:46.348991Z --- ## Summary diff --git a/.compozy/tasks/badges-design/reviews-001/issue_001.md b/.compozy/tasks/badges-design/reviews-001/issue_001.md new file mode 100644 index 000000000..a9d003267 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_001.md @@ -0,0 +1,54 @@ +--- +status: resolved +file: packages/ui/src/components/pill-group.tsx +line: 19 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM5-Mtcr,comment:PRRC_kwDOR5y4QM68GEz- +--- + +# Issue 001: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Replace hard-coded pixel classes with DESIGN.md tokens.** + +Line 9, Line 17, Line 18, Line 62, Line 89, and nearby classes introduce raw px values (`[5px]`, `[10px]`, `[20px]`, `[22px]`, `[2px]`, `[3px]`, `[14px]`, `[7px]`, `[9px]`). In `packages/ui/src`, these should come from design tokens only to keep the system consistent. + + +As per coding guidelines, "`**/{web,packages/ui,packages/site}/src/**/*.{ts,tsx,css}`: Pull design tokens from `DESIGN.md` ... never invent values in UI code." + + +Also applies to: 62-63, 89-90 + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@packages/ui/src/components/pill-group.tsx` around lines 9 - 19, The PillGroup +component contains hard-coded pixel classes (e.g., rounded-[5px], text-[10px], +h-[20px], h-[22px], px-2 (if representing 8px/10px), and other +[14px],[7px],[9px] usages) which must be replaced with the design token +equivalents from DESIGN.md; update the class strings inside +packages/ui/src/components/pill-group.tsx (the variant block for active and +size, base class string, and any other nearby class values referenced in this +file) to use the canonical DESIGN.md tokens (CSS custom properties or token +utility classes used across the repo) instead of raw px values, mapping each +specific token (rounded, font-size, height, padding, gap) to the appropriate +DESIGN.md name so no new numeric values are invented and all sizes reference the +shared tokens. +``` + +
+ + + + + +## Triage + +- Decision: `valid` +- Root cause: `PillGroup` still encodes badge/segment dimensions with raw arbitrary pixel classes (`rounded-[5px]`, `h-[22px]`, `p-[3px]`, `text-[9px]`, etc.) instead of design-system tokens or token-derived utilities. +- Fix approach: replace raw px classes with existing design tokens where available (`--radius-chip`, `--radius`, `text-badge`, spacing scale) and add minimal shared component sizing custom properties in `packages/ui/src/tokens.css` where DESIGN.md defines values that do not already have a token. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_002.md b/.compozy/tasks/badges-design/reviews-001/issue_002.md new file mode 100644 index 000000000..4ea05d7ec --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_002.md @@ -0,0 +1,22 @@ +--- +status: resolved +file: packages/ui/src/components/pill.test.tsx +line: 12 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4190677522,nitpick_hash:955795c2492d +review_hash: 955795c2492d +source_review_id: "4190677522" +source_review_submitted_at: "2026-04-28T16:30:00Z" +--- + +# Issue 002: Extract inline object-shape types to named interfaces. +## Review Comment + +Lines 12–15 define `WithMotion` props as an inline object shape `{ reducedMotion: "always" | "never"; children: ReactNode; }`, and line 31 defines `it.each<{ tone: PillTone; bg: string; text: string }>` as an inline generic argument. Per coding guidelines (`**/*.{ts,tsx}: Prefer interface for defining object shapes in TypeScript`), extract both to named `interface` declarations above their respective scopes. + +## Triage + +- Decision: `valid` +- Root cause: `WithMotion` props and the `it.each` row object are inline object shapes, which weakens readability and violates the repository preference for named interfaces for object shapes. +- Fix approach: introduce named interfaces for the motion wrapper props and tone expectation table rows, then reuse them in the test. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_003.md b/.compozy/tasks/badges-design/reviews-001/issue_003.md new file mode 100644 index 000000000..922690074 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_003.md @@ -0,0 +1,22 @@ +--- +status: resolved +file: packages/ui/src/components/stories/pill-group.stories.tsx +line: 13 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4190677522,nitpick_hash:ddb6a4664b0d +review_hash: ddb6a4664b0d +source_review_id: "4190677522" +source_review_submitted_at: "2026-04-28T16:30:00Z" +--- + +# Issue 003: Minor: Clarify component description. +## Review Comment + +The description says "Renamed from the legacy `PillGroup`" but the component is now named `PillGroup`. Consider updating to clarify what it was renamed *from* (e.g., "Replaces the legacy `Pills` segmented toggle"). + +## Triage + +- Decision: `valid` +- Root cause: the Storybook description says `PillGroup` was renamed from `PillGroup`, which is self-contradictory and does not explain the legacy primitive being replaced. +- Fix approach: rewrite the description to state that `PillGroup` replaces the legacy segmented toggle/pills usage. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_004.md b/.compozy/tasks/badges-design/reviews-001/issue_004.md new file mode 100644 index 000000000..089bd64a3 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_004.md @@ -0,0 +1,22 @@ +--- +status: resolved +file: packages/ui/src/components/stories/pill.stories.tsx +line: 11 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4190677522,nitpick_hash:2450eb2c3284 +review_hash: 2450eb2c3284 +source_review_id: "4190677522" +source_review_submitted_at: "2026-04-28T16:30:00Z" +--- + +# Issue 004: Typo in component description. +## Review Comment + +Line 14 reads "replaces `Pill`, `Pill`, `KindChip`..." with duplicate "Pill" entries. This should list the distinct components being replaced (e.g., `MonoBadge`, `StatusDot`, `KindChip`, etc.). + +## Triage + +- Decision: `valid` +- Root cause: the `Pill` Storybook component description repeats `Pill` several times and names removed/legacy primitives unclearly. +- Fix approach: update the description to list distinct replacements (`MonoBadge`, `StatusDot`, `KindChip`, `WireChip`, connection indicator composition) and keep the story copy aligned with the current `Pill` + `Pill.Dot` API. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_005.md b/.compozy/tasks/badges-design/reviews-001/issue_005.md new file mode 100644 index 000000000..745018ec2 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_005.md @@ -0,0 +1,100 @@ +--- +status: resolved +file: web/src/components/design-system-showcase.test.tsx +line: 205 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM5-Mtcw,comment:PRRC_kwDOR5y4QM68GE0G +--- + +# Issue 005: _⚠️ Potential issue_ | _🟡 Minor_ +## Review Comment + +_⚠️ Potential issue_ | _🟡 Minor_ + +
+🧩 Analysis chain + +🏁 Script executed: + +```shell +#!/bin/bash +# Check if kind-chip is exported from the network system's public barrel +rg -n "export.*KindChip" web/src/systems/network/index.ts 2>/dev/null || echo "No barrel export found" + +# Check how kind-chip is imported in the showcase +rg -n "kind-chip" web/src/components/design-system-showcase.tsx +``` + +Repository: compozy/agh + +Length of output: 149 + +--- + +🏁 Script executed: + +```shell +#!/bin/bash +# Check the actual structure of the network system +echo "=== Network system public barrel content ===" +cat web/src/systems/network/index.ts 2>/dev/null || echo "File not found" + +echo -e "\n=== Check if kind-chip file exists ===" +find web/src/systems/network -name "*kind-chip*" -type f + +echo -e "\n=== Check what's exported from kind-chip ===" +if [ -f "web/src/systems/network/components/kind-chip.tsx" ]; then + rg -n "^export" web/src/systems/network/components/kind-chip.tsx +fi +``` + +Repository: compozy/agh + +Length of output: 3435 + +--- + + + +
+ +**Fix cross-system import to use public barrel.** + +The showcase imports `KindChip` from `@/systems/network/components/kind-chip`, which violates the guideline: "Cross-system imports: Only through the public barrel (`@/systems/`). Never reach into another system's internals." + +`KindChip` is not currently exported from `@/systems/network`'s public barrel. Either: +1. Export `KindChip` from `web/src/systems/network/index.ts` and update the showcase to import from `@/systems/network` +2. Move `KindChip` to a shared location outside the network system if it's a general-purpose component + +Update the test's allowed imports list to match the corrected import path once resolved. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@web/src/components/design-system-showcase.test.tsx` around lines 199 - 205, +The showcase is importing KindChip directly from +"@/systems/network/components/kind-chip" which breaks cross-system import rules; +either export KindChip from the network public barrel (add an export in +web/src/systems/network/index.ts) and change the showcase import to +"@/systems/network", or move KindChip to a shared location and import from that +new public barrel; after doing that, update the allowed Set in +design-system-showcase.test.tsx to replace +"@/systems/network/components/kind-chip" with the new public import path +"@/systems/network" (or the new shared barrel path) so the test reflects the +corrected import. +``` + +
+ + + + + +## Triage + +- Decision: `valid` +- Root cause: `design-system-showcase.tsx` imports `KindChip` from `@/systems/network/components/kind-chip`, so the test's allowed import list preserves a cross-system internal import. +- Fix approach: export `KindChip` from the network public barrel, change the showcase import to `@/systems/network`, and update this test's allowed import set. This requires the minimal adjacent edit to `web/src/systems/network/index.ts` because the barrel currently does not expose `KindChip`. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_006.md b/.compozy/tasks/badges-design/reviews-001/issue_006.md new file mode 100644 index 000000000..e3a2b2990 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_006.md @@ -0,0 +1,53 @@ +--- +status: resolved +file: web/src/components/design-system-showcase.tsx +line: 139 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM5-Mtc1,comment:PRRC_kwDOR5y4QM68GE0M +--- + +# Issue 006: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Import `KindChip` via the network public barrel, not an internal path.** + +Line 139 pulls from another system’s internal component path, which breaks system boundaries. + + +
+Suggested fix + +```diff +-import { KindChip } from "@/systems/network/components/kind-chip"; ++import { KindChip } from "@/systems/network"; +``` +
+As per coding guidelines, `web/src/**/*.{ts,tsx}`: Cross-system imports: Only through the public barrel (`@/systems/`). Never reach into another system's internals. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@web/src/components/design-system-showcase.tsx` at line 139, The import of +KindChip in design-system-showcase.tsx uses an internal path and violates +cross-system rules; update the import to use the network system's public barrel +instead (replace the current "@/systems/network/components/kind-chip" import +with an import from "@/systems/network" that exposes KindChip) so the component +is consumed via the network public API rather than an internal file. +``` + +
+ + + + + +## Triage + +- Decision: `valid` +- Root cause: `design-system-showcase.tsx` directly reaches into `network/components/kind-chip`, violating the system boundary rule for cross-system imports. +- Fix approach: re-export `KindChip` from `@/systems/network` and import through that public barrel. The required barrel export is a minimal adjacent edit to `web/src/systems/network/index.ts`. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_007.md b/.compozy/tasks/badges-design/reviews-001/issue_007.md new file mode 100644 index 000000000..60612da51 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_007.md @@ -0,0 +1,68 @@ +--- +status: resolved +file: web/src/lib/kind-colors.ts +line: 15 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM5-MtdC,comment:PRRC_kwDOR5y4QM68GE0b +--- + +# Issue 007: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Replace ad-hoc hex colors with design tokens.** + +This map hardcodes multiple hex colors in `web/` UI code, which breaks the token-only rule and can drift from the canonical kind/status palette. + +
+Suggested refactor + +```diff + export const KIND_COLORS: Record = { +- say: "#8E8E93", +- greet: "#5BA6FF", ++ say: "var(--color-kind-say)", ++ greet: "var(--color-kind-greet)", + direct: "var(--color-accent)", + receipt: "var(--color-success)", + recipe: "var(--color-warning)", +- trace: "#B892FF", +- whois: "#4FD1C5", ++ trace: "var(--color-kind-trace)", ++ whois: "var(--color-kind-whois)", + }; +``` + +
+ +As per coding guidelines: “`web/**/*.{ts,tsx,css}`: Pull every color, font, radius, spacing step, and motion value from `DESIGN.md` — never invent tokens” and “`web/**/*.{tsx,ts,css}`: Tokens live in `packages/ui/src/tokens.css`; never override with ad-hoc hex values in components”. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@web/src/lib/kind-colors.ts` around lines 7 - 15, The KIND_COLORS map +currently uses hard-coded hex values (e.g., "say": "#8E8E93", "trace": +"#B892FF") which violates the token-only rule; update the KIND_COLORS constant +in web/src/lib/kind-colors.ts to reference the canonical design tokens (CSS +variables exported from packages/ui/src/tokens.css) for every entry (replace hex +literals with var(--color-*) tokens such as var(--color-muted)/var(--color-xxx) +or the appropriate kind/status token names), ensure no ad-hoc hex remains, and +keep keys (say, greet, direct, receipt, recipe, trace, whois) unchanged so +callers still work. +``` + +
+ + + + + +## Triage + +- Decision: `valid` +- Root cause: `KIND_COLORS` hardcodes kind-dot hex values instead of referencing canonical CSS custom properties. +- Fix approach: add the missing protocol-kind color variables to `packages/ui/src/tokens.css`, surface them in the design-system showcase token wall, and update `web/src/lib/kind-colors.ts` plus the related `Pill` story to consume `var(--color-kind-*)` tokens. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_008.md b/.compozy/tasks/badges-design/reviews-001/issue_008.md new file mode 100644 index 000000000..1a22f4869 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_008.md @@ -0,0 +1,22 @@ +--- +status: resolved +file: web/src/lib/pill-variant.ts +line: 17 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4190677522,nitpick_hash:5e61a9769644 +review_hash: 5e61a9769644 +source_review_id: "4190677522" +source_review_submitted_at: "2026-04-28T16:30:00Z" +--- + +# Issue 008: Rename pillVariantFromTone to match its current semantics. +## Review Comment + +The function now returns `PillTone`, so the `Variant` name is misleading and increases API confusion at call sites. + +## Triage + +- Decision: `valid` +- Root cause: `pillVariantFromTone` now returns `PillTone`, so `Variant` no longer matches the function's semantics and leaks outdated design-system vocabulary into call sites. +- Fix approach: hard-rename the function to `pillToneFromLegacyTone` and update all imports/call sites so typecheck does not rely on a legacy alias. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_009.md b/.compozy/tasks/badges-design/reviews-001/issue_009.md new file mode 100644 index 000000000..3974520fe --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_009.md @@ -0,0 +1,22 @@ +--- +status: resolved +file: web/src/storybook/packages-ui-storybook-config.test.ts +line: 31 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4190677522,nitpick_hash:fcf6b8ded5b7 +review_hash: fcf6b8ded5b7 +source_review_id: "4190677522" +source_review_submitted_at: "2026-04-28T16:30:00Z" +--- + +# Issue 009: Consider asserting shadcn CSS import too. +## Review Comment + +Since Storybook preview styling now depends on both shared tokens and shadcn Tailwind styles, adding an assertion for the shadcn import would strengthen regression coverage. + +## Triage + +- Decision: `valid` +- Root cause: the Storybook config test asserts the shared token import but does not assert the `shadcn/tailwind.css` import even though preview styling depends on both. +- Fix approach: add an explicit assertion for `@import "shadcn/tailwind.css";` in `packages/ui/.storybook/preview.css`. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_010.md b/.compozy/tasks/badges-design/reviews-001/issue_010.md new file mode 100644 index 000000000..3b3756f4e --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_010.md @@ -0,0 +1,55 @@ +--- +status: resolved +file: web/src/systems/automation/components/automation-detail-panel.tsx +line: 16 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM5-MtdH,comment:PRRC_kwDOR5y4QM68GE0h +--- + +# Issue 010: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Use the network system public barrel import here.** + +Line 16 imports `KindChip` from another system’s internal path, which couples automation to network internals. + + +
+Suggested fix + +```diff +-import { KindChip } from "@/systems/network/components/kind-chip"; ++import { KindChip } from "@/systems/network"; +``` + +If `KindChip` is not exported yet, add a named export in the network system barrel. +
+As per coding guidelines, `web/src/**/*.{ts,tsx}`: Cross-system imports: Only through the public barrel (`@/systems/`). Never reach into another system's internals. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@web/src/systems/automation/components/automation-detail-panel.tsx` at line +16, Replace the internal-path import of KindChip with the network system public +barrel import (import { KindChip } from "@/systems/network"); if KindChip is not +exported from the network barrel, add a named re-export in the network system +barrel (e.g., export { KindChip } from "./components/kind-chip") so +automation-detail-panel uses only the public "@/systems/network" surface. +``` + +
+ + + + + +## Triage + +- Decision: `valid` +- Root cause: `automation-detail-panel.tsx` imports `KindChip` from the network system's internal component path. +- Fix approach: import `KindChip` from `@/systems/network` after adding a minimal public barrel export in `web/src/systems/network/index.ts`. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_011.md b/.compozy/tasks/badges-design/reviews-001/issue_011.md new file mode 100644 index 000000000..1839737e1 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_011.md @@ -0,0 +1,22 @@ +--- +status: resolved +file: web/src/systems/bridges/components/bridge-detail-panel.tsx +line: 87 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4190677522,nitpick_hash:818b4723d1c8 +review_hash: 818b4723d1c8 +source_review_id: "4190677522" +source_review_submitted_at: "2026-04-28T16:30:00Z" +--- + +# Issue 011: Consolidate duplicate status→tone mapping. +## Review Comment + +`statusToStatusDotTone` and `statusToMonoBadgeTone` currently do the same work. Keep a single mapper to avoid drift. + +## Triage + +- Decision: `valid` +- Root cause: `statusToStatusDotTone` and `statusToMonoBadgeTone` are identical mappings in `bridge-detail-panel.tsx`, creating drift risk for the same bridge status signal. +- Fix approach: consolidate them into one status-to-pill-tone mapper and use the same tone for both the dot and badge. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_012.md b/.compozy/tasks/badges-design/reviews-001/issue_012.md new file mode 100644 index 000000000..12a025d27 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_012.md @@ -0,0 +1,56 @@ +--- +status: resolved +file: web/src/systems/bridges/components/bridge-list-panel.tsx +line: 7 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM5-MtdO,comment:PRRC_kwDOR5y4QM68GE0o +--- + +# Issue 012: _🛠️ Refactor suggestion_ | _🟠 Major_ +## Review Comment + +_🛠️ Refactor suggestion_ | _🟠 Major_ + +**Import `KindChip` via the network system public barrel.** + +This reaches into another system’s internals. Keep cross-system boundaries by importing through `@/systems/network` (and export `KindChip` from that barrel if needed). + +
+♻️ Suggested change + +```diff +-import { KindChip } from "@/systems/network/components/kind-chip"; ++import { KindChip } from "@/systems/network"; +``` +
+ + + +As per coding guidelines: `web/src/**/*.{ts,tsx}`: Cross-system imports: Only through the public barrel (`@/systems/`). Never reach into another system's internals. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@web/src/systems/bridges/components/bridge-list-panel.tsx` around lines 6 - 7, +Replace the direct import of KindChip from the network system's internals with +the public barrel export: change the import to pull KindChip from +"@/systems/network" (e.g., import { KindChip } from "@/systems/network"); if +KindChip is not currently re-exported from that barrel, add it to the network +system's public exports so the component can be imported only via the public +barrel and avoid reaching into components/kind-chip internals. +``` + +
+ + + + + +## Triage + +- Decision: `valid` +- Root cause: `bridge-list-panel.tsx` imports `KindChip` from a network internal component path instead of the public `@/systems/network` barrel. +- Fix approach: import through `@/systems/network` and add the required minimal barrel export. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_013.md b/.compozy/tasks/badges-design/reviews-001/issue_013.md new file mode 100644 index 000000000..22e29eb96 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_013.md @@ -0,0 +1,69 @@ +--- +status: resolved +file: web/src/systems/bridges/components/bridge-provider-card.tsx +line: 4 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM5-MtdV,comment:PRRC_kwDOR5y4QM68GE0z +--- + +# Issue 013: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Use the network public barrel, not internal component paths.** + +This imports another system’s internal file directly, which breaks the cross-system boundary contract. + + +
+Suggested boundary-safe import + +```diff +-import { KindChip } from "@/systems/network/components/kind-chip"; ++import { KindChip } from "@/systems/network"; +``` +
+As per coding guidelines `web/src/**/*.{ts,tsx}: Cross-system imports: Only through the public barrel (`@/systems/`). Never reach into another system's internals`. + + + +
+📝 Committable suggestion + +> ‼️ **IMPORTANT** +> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. + +```suggestion +import { KindChip } from "@/systems/network"; +``` + +
+ + + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@web/src/systems/bridges/components/bridge-provider-card.tsx` at line 4, The +import of KindChip is reaching into another system's internals; update the +import in bridge-provider-card.tsx to use the network system's public barrel +instead of "@/systems/network/components/kind-chip"—i.e., import KindChip from +the "@/systems/network" barrel so the cross-system boundary contract is +respected and internal paths are not referenced. +``` + +
+ + + + + +## Triage + +- Decision: `valid` +- Root cause: `bridge-provider-card.tsx` imports `KindChip` through another system's internal file. +- Fix approach: import `KindChip` from `@/systems/network` and rely on the public barrel export. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_014.md b/.compozy/tasks/badges-design/reviews-001/issue_014.md new file mode 100644 index 000000000..94db6eba3 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_014.md @@ -0,0 +1,71 @@ +--- +status: resolved +file: web/src/systems/daemon/hooks/use-daemon-health.ts +line: 3 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM5-Mtdk,comment:PRRC_kwDOR5y4QM68GE1G +--- + +# Issue 014: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Avoid importing a UI component type into a system hook.** + +`useDaemonHealth` now depends on `@/components/connection-indicator`, which reverses the intended layer direction and couples daemon domain logic to a view module. + +
+♻️ Proposed fix + +```diff +-import type { ConnectionStatus } from "@/components/connection-indicator"; ++type ConnectionStatus = "connected" | "reconnecting" | "disconnected"; +``` +
+ +As per coding guidelines: `web/src/systems/**/*.{ts,tsx}` must keep dependency flow unidirectional (`adapters → lib → hooks → components`). + + + +
+📝 Committable suggestion + +> ‼️ **IMPORTANT** +> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. + +```suggestion +type ConnectionStatus = "connected" | "reconnecting" | "disconnected"; +``` + +
+ + + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@web/src/systems/daemon/hooks/use-daemon-health.ts` at line 3, The hook +useDaemonHealth currently imports the UI type ConnectionStatus from the +component, which breaks layer boundaries; instead remove that import and replace +it with a system-level type (either define a local ConnectionStatus enum/type +inside useDaemonHealth or move a shared status type to a lower-level module such +as a lib/adapters/types module and import that). Update all references inside +useDaemonHealth (and any callers in systems) to use the new system-level type so +the hook no longer depends on "@/components/connection-indicator". +``` + +
+ + + + + +## Triage + +- Decision: `valid` +- Root cause: `useDaemonHealth` imports a UI component type from `@/components/connection-indicator`, making a daemon system hook depend on a view-layer component. +- Fix approach: define the connection status union at the hook/system level so the hook no longer imports from the component layer. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_015.md b/.compozy/tasks/badges-design/reviews-001/issue_015.md new file mode 100644 index 000000000..a6be12ed3 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_015.md @@ -0,0 +1,22 @@ +--- +status: resolved +file: web/src/systems/knowledge/components/knowledge-list-panel.tsx +line: 70 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4190677522,nitpick_hash:feed37d675c0 +review_hash: feed37d675c0 +source_review_id: "4190677522" +source_review_submitted_at: "2026-04-28T16:30:00Z" +--- + +# Issue 015: Reuse knowledgeScopeShortLabel instead of duplicating scope label logic. +## Review Comment + +This keeps scope text formatting centralized and avoids drift between list/detail views. + +## Triage + +- Decision: `valid` +- Root cause: `knowledge-list-panel.tsx` duplicates short scope label logic inline instead of using the shared `knowledgeScopeShortLabel` formatter. +- Fix approach: import and use `knowledgeScopeShortLabel(scope)` for the list badge label. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_016.md b/.compozy/tasks/badges-design/reviews-001/issue_016.md new file mode 100644 index 000000000..8d69c39e6 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_016.md @@ -0,0 +1,22 @@ +--- +status: resolved +file: web/src/systems/knowledge/lib/knowledge-formatters.ts +line: 39 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4190677522,nitpick_hash:c2673579b22a +review_hash: c2673579b22a +source_review_id: "4190677522" +source_review_submitted_at: "2026-04-28T16:30:00Z" +--- + +# Issue 016: Tighten TYPE_TONE key typing to MemoryType for safer evolution. +## Review Comment + +Using `Record` weakens compile-time guarantees. Prefer keying by `MemoryType` so invalid/typo keys are rejected by TypeScript. + +## Triage + +- Decision: `valid` +- Root cause: `TYPE_TONE` is typed as `Record`, so TypeScript cannot catch missing or misspelled `MemoryType` keys. +- Fix approach: key the map as `Record` and return directly from the exhaustive map. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_017.md b/.compozy/tasks/badges-design/reviews-001/issue_017.md new file mode 100644 index 000000000..eecaf0353 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_017.md @@ -0,0 +1,22 @@ +--- +status: resolved +file: web/src/systems/tasks/components/tasks-detail-header.tsx +line: 42 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4190677522,nitpick_hash:1da0698660cd +review_hash: 1da0698660cd +source_review_id: "4190677522" +source_review_submitted_at: "2026-04-28T16:30:00Z" +--- + +# Issue 017: Stale JSDoc comment references MonoBadge. +## Review Comment + +The docstring mentions `MonoBadge` which has been replaced by `Pill`. Consider updating to match the current implementation. + +## Triage + +- Decision: `valid` +- Root cause: the `TasksDetailHeader` docstring still mentions the old `MonoBadge` primitive even though the implementation now uses `Pill`. +- Fix approach: update the JSDoc to describe the current `Pill`/`Pill.Dot` composition. diff --git a/.compozy/tasks/badges-design/reviews-001/issue_018.md b/.compozy/tasks/badges-design/reviews-001/issue_018.md new file mode 100644 index 000000000..859bbe9f4 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-001/issue_018.md @@ -0,0 +1,22 @@ +--- +status: resolved +file: web/src/systems/tasks/components/tasks-detail-preview-panel.tsx +line: 58 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4190677522,nitpick_hash:69e48d4556c5 +review_hash: 69e48d4556c5 +source_review_id: "4190677522" +source_review_submitted_at: "2026-04-28T16:30:00Z" +--- + +# Issue 018: Stale JSDoc comment references removed components. +## Review Comment + +The docstring still mentions `StatusDot` and `MonoBadge`, but these have been replaced by `Pill.Dot` and `Pill`. Update the comment to reflect the current implementation. + +## Triage + +- Decision: `valid` +- Root cause: the `TasksDetailPreviewPanel` docstring still references removed `StatusDot`/`MonoBadge` components. +- Fix approach: update the JSDoc to reference `Pill.Dot`, `Pill`, `Metric`, `Section`, and `CodeBlock`. diff --git a/.compozy/tasks/badges-design/reviews-003/_meta.md b/.compozy/tasks/badges-design/reviews-003/_meta.md new file mode 100644 index 000000000..e4e00eef3 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/_meta.md @@ -0,0 +1,11 @@ +--- +provider: coderabbit +pr: "84" +round: 3 +created_at: 2026-04-28T19:57:23.234084Z +--- + +## Summary +- Total: 14 +- Resolved: 0 +- Unresolved: 14 diff --git a/.compozy/tasks/badges-design/reviews-003/issue_001.md b/.compozy/tasks/badges-design/reviews-003/issue_001.md new file mode 100644 index 000000000..724a7d229 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/issue_001.md @@ -0,0 +1,21 @@ +--- +status: pending +file: internal/api/core/handlers.go +line: 422 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4191605807,nitpick_hash:10a98b0a97c6 +review_hash: 10a98b0a97c6 +source_review_id: "4191605807" +source_review_submitted_at: "2026-04-28T18:57:12Z" +--- + +# Issue 001: Consider rejecting conflicting dry_run alias values. +## Review Comment + +If both `dry_run` and `dry-run` are sent with different values, current behavior silently prefers the first name. + +## Triage + +- Decision: `UNREVIEWED` +- Notes: diff --git a/.compozy/tasks/badges-design/reviews-003/issue_002.md b/.compozy/tasks/badges-design/reviews-003/issue_002.md new file mode 100644 index 000000000..e0d82bff2 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/issue_002.md @@ -0,0 +1,21 @@ +--- +status: pending +file: internal/api/core/handlers_test.go +line: 214 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4191605807,nitpick_hash:30e708e01a81 +review_hash: 30e708e01a81 +source_review_id: "4191605807" +source_review_submitted_at: "2026-04-28T18:57:12Z" +--- + +# Issue 002: Strengthen repair response assertions for future regressions. +## Review Comment + +Consider also asserting `persisted` and action payload details, since this test already exercises dry-run behavior. + +## Triage + +- Decision: `UNREVIEWED` +- Notes: diff --git a/.compozy/tasks/badges-design/reviews-003/issue_003.md b/.compozy/tasks/badges-design/reviews-003/issue_003.md new file mode 100644 index 000000000..8e72c207c --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/issue_003.md @@ -0,0 +1,21 @@ +--- +status: pending +file: internal/cli/client.go +line: 64 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4191605807,nitpick_hash:cb616e548bf2 +review_hash: cb616e548bf2 +source_review_id: "4191605807" +source_review_submitted_at: "2026-04-28T18:57:12Z" +--- + +# Issue 003: Add a compile-time assertion for unixSocketClient. +## Review Comment + +`DaemonClient` gained another method, but no `var _ DaemonClient = (*unixSocketClient)(nil)` guard exists. This assertion catches interface drift at compile time instead of later through call-site failures. + +## Triage + +- Decision: `UNREVIEWED` +- Notes: diff --git a/.compozy/tasks/badges-design/reviews-003/issue_004.md b/.compozy/tasks/badges-design/reviews-003/issue_004.md new file mode 100644 index 000000000..5b05564f9 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/issue_004.md @@ -0,0 +1,84 @@ +--- +status: pending +file: internal/cli/client_test.go +line: 812 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM5-O8En,comment:PRRC_kwDOR5y4QM68JGPI +--- + +# Issue 004: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Wrap the repair assertion in a dedicated subtest.** + +Line 809 introduces a new test case, but it is not isolated with `t.Run("Should ...")` like required test-case structure. + +
+♻️ Suggested update + +```diff +- repaired, err := client.RepairSession(ctx, "sess-1", SessionRepairQuery{DryRun: true, Force: true}) +- if err != nil || repaired.SessionID != "sess-1" || len(repaired.Actions) != 1 { +- t.Fatalf("RepairSession() = %#v, %v", repaired, err) +- } ++ t.Run("Should repair session", func(t *testing.T) { ++ repaired, err := client.RepairSession(ctx, "sess-1", SessionRepairQuery{DryRun: true, Force: true}) ++ if err != nil || repaired.SessionID != "sess-1" || len(repaired.Actions) != 1 { ++ t.Fatalf("RepairSession() = %#v, %v", repaired, err) ++ } ++ }) +``` +
+ + +As per coding guidelines: `**/*_test.go`: Use `t.Run("Should ...")` subtests with `t.Parallel` as default. + + + +
+📝 Committable suggestion + +> ‼️ **IMPORTANT** +> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. + +```suggestion + t.Run("Should repair session", func(t *testing.T) { + repaired, err := client.RepairSession(ctx, "sess-1", SessionRepairQuery{DryRun: true, Force: true}) + if err != nil || repaired.SessionID != "sess-1" || len(repaired.Actions) != 1 { + t.Fatalf("RepairSession() = %#v, %v", repaired, err) + } + }) +``` + +
+ + + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/cli/client_test.go` around lines 809 - 812, The test assertion +calling client.RepairSession (checking repaired, err, repaired.SessionID and +len(repaired.Actions)) should be moved into a dedicated subtest: wrap the +existing call and its t.Fatalf check inside t.Run("Should repair session with +dry run and force", func(t *testing.T) { t.Parallel(); ... }) so it follows the +repository test structure; keep the same call to client.RepairSession and the +same assertions but execute them inside the subtest and mark it parallel with +t.Parallel(). +``` + +
+ + + + + +## Triage + +- Decision: `UNREVIEWED` +- Notes: diff --git a/.compozy/tasks/badges-design/reviews-003/issue_005.md b/.compozy/tasks/badges-design/reviews-003/issue_005.md new file mode 100644 index 000000000..2553a1870 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/issue_005.md @@ -0,0 +1,154 @@ +--- +status: pending +file: internal/cli/session_test.go +line: 354 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM5-O8E5,comment:PRRC_kwDOR5y4QM68JGPi +--- + +# Issue 005: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Wrap this test case in a `t.Run("Should...")` subtest.** + +The current test is parallelized, but it misses the required subtest wrapper pattern used by your Go test guidelines. + +
+🔧 Suggested update + +```diff + func TestSessionRepairPassesFlagsAndRendersJSON(t *testing.T) { + t.Parallel() + +- var seenQuery SessionRepairQuery +- var seenID string +- deps := newTestDeps(t, &stubClient{ +- repairSessionFn: func(_ context.Context, id string, query SessionRepairQuery) (SessionRepairRecord, error) { +- seenID = id +- seenQuery = query +- return SessionRepairRecord{ +- SessionID: id, +- Issues: []SessionRepairIssueRecord{{ +- Code: session.RepairIssueStopReasonRequiresForce, +- Severity: session.RepairSeverityError, +- TurnID: "turn-1", +- }}, +- Actions: []SessionRepairActionRecord{{ +- Code: session.RepairActionAppendTerminalError, +- TurnID: "turn-1", +- Persisted: false, +- }}, +- }, nil +- }, +- }) +- +- stdout, _, err := executeRootCommand( +- t, +- deps, +- "session", +- "repair", +- "sess-1", +- "--dry-run", +- "--force", +- "-o", +- "json", +- ) +- if err != nil { +- t.Fatalf("executeRootCommand(session repair) error = %v", err) +- } +- if seenID != "sess-1" || !seenQuery.DryRun || !seenQuery.Force { +- t.Fatalf("repair call = id %q query %#v, want dry-run force for sess-1", seenID, seenQuery) +- } +- +- var decoded SessionRepairRecord +- if err := json.Unmarshal([]byte(stdout), &decoded); err != nil { +- t.Fatalf("json.Unmarshal(session repair) error = %v", err) +- } +- if decoded.SessionID != "sess-1" || len(decoded.Issues) != 1 || len(decoded.Actions) != 1 { +- t.Fatalf("decoded repair = %#v, want one issue and one action for sess-1", decoded) +- } ++ t.Run("ShouldPassFlagsAndRenderJSON", func(t *testing.T) { ++ t.Parallel() ++ ++ var seenQuery SessionRepairQuery ++ var seenID string ++ deps := newTestDeps(t, &stubClient{ ++ repairSessionFn: func(_ context.Context, id string, query SessionRepairQuery) (SessionRepairRecord, error) { ++ seenID = id ++ seenQuery = query ++ return SessionRepairRecord{ ++ SessionID: id, ++ Issues: []SessionRepairIssueRecord{{ ++ Code: session.RepairIssueStopReasonRequiresForce, ++ Severity: session.RepairSeverityError, ++ TurnID: "turn-1", ++ }}, ++ Actions: []SessionRepairActionRecord{{ ++ Code: session.RepairActionAppendTerminalError, ++ TurnID: "turn-1", ++ Persisted: false, ++ }}, ++ }, nil ++ }, ++ }) ++ ++ stdout, _, err := executeRootCommand( ++ t, ++ deps, ++ "session", ++ "repair", ++ "sess-1", ++ "--dry-run", ++ "--force", ++ "-o", ++ "json", ++ ) ++ if err != nil { ++ t.Fatalf("executeRootCommand(session repair) error = %v", err) ++ } ++ if seenID != "sess-1" || !seenQuery.DryRun || !seenQuery.Force { ++ t.Fatalf("repair call = id %q query %#v, want dry-run force for sess-1", seenID, seenQuery) ++ } ++ ++ var decoded SessionRepairRecord ++ if err := json.Unmarshal([]byte(stdout), &decoded); err != nil { ++ t.Fatalf("json.Unmarshal(session repair) error = %v", err) ++ } ++ if decoded.SessionID != "sess-1" || len(decoded.Issues) != 1 || len(decoded.Actions) != 1 { ++ t.Fatalf("decoded repair = %#v, want one issue and one action for sess-1", decoded) ++ } ++ }) + } +``` +
+ +As per coding guidelines: "MUST use t.Run("Should...") pattern for ALL test cases". + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/cli/session_test.go` around lines 304 - 354, Wrap the existing test +body of TestSessionRepairPassesFlagsAndRendersJSON in a t.Run subtest (e.g. +t.Run("Should pass flags and render JSON", func(t *testing.T) { ... })) and move +the t.Parallel() call inside that subtest; remove the top-level t.Parallel() so +the test follows the required subtest pattern. Ensure the existing variables and +calls (seenQuery, seenID, newTestDeps, stubClient.repairSessionFn, +executeRootCommand, json.Unmarshal, and assertions) remain inside the subtest +closure so behavior and scoping are preserved. +``` + +
+ + + + + +## Triage + +- Decision: `UNREVIEWED` +- Notes: diff --git a/.compozy/tasks/badges-design/reviews-003/issue_006.md b/.compozy/tasks/badges-design/reviews-003/issue_006.md new file mode 100644 index 000000000..1289af913 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/issue_006.md @@ -0,0 +1,21 @@ +--- +status: pending +file: internal/daemon/boot.go +line: 521 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4191605807,nitpick_hash:59d120de52a6 +review_hash: 59d120de52a6 +source_review_id: "4191605807" +source_review_submitted_at: "2026-04-28T18:57:12Z" +--- + +# Issue 006: Add startup latency guardrails around boot-time session repair. +## Review Comment + +Operationally, this loop can grow startup time with many stopped sessions. Consider recording duration/count metrics and optionally capping repairs per boot cycle. + +## Triage + +- Decision: `UNREVIEWED` +- Notes: diff --git a/.compozy/tasks/badges-design/reviews-003/issue_007.md b/.compozy/tasks/badges-design/reviews-003/issue_007.md new file mode 100644 index 000000000..f6007c594 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/issue_007.md @@ -0,0 +1,23 @@ +--- +status: pending +file: internal/session/repair.go +line: 74 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4191605807,nitpick_hash:bb54c5c9ee9a +review_hash: bb54c5c9ee9a +source_review_id: "4191605807" +source_review_submitted_at: "2026-04-28T18:57:12Z" +--- + +# Issue 007: Document the new private repair-state types. +## Review Comment + +`repairEvent`, `repairTurnState`, `repairToolCall`, and `repairAnalysis` all land without comments. That makes this state machine harder to scan and can fight the repo's Go comment policy. + +As per coding guidelines: Comments in Go must explain the 'why' and 'what', not just 'what'. Unexported identifiers must have a comment. + +## Triage + +- Decision: `UNREVIEWED` +- Notes: diff --git a/.compozy/tasks/badges-design/reviews-003/issue_008.md b/.compozy/tasks/badges-design/reviews-003/issue_008.md new file mode 100644 index 000000000..0cddfd7eb --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/issue_008.md @@ -0,0 +1,47 @@ +--- +status: pending +file: internal/session/repair.go +line: 173 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM5-O8E_,comment:PRRC_kwDOR5y4QM68JGPp +--- + +# Issue 008: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Don't skip dangling `tool_result` repair just because the turn is already terminal.** + +This early return suppresses `RepairActionAppendInterruptedToolResult` too. If a session managed to persist `done`/`error` but crashed before one or more matching `tool_result` rows were written, this path can never close those tool calls. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@internal/session/repair.go` around lines 166 - 173, The early return inside +the block that checks analysis.turn.terminal (which currently appends a +RepairIssue with Code RepairIssueTerminalEventAlreadyExists) prevents subsequent +dangling tool_result repair actions (e.g., +RepairActionAppendInterruptedToolResult) from running; remove the return so the +function still records the TerminalEventAlreadyExists issue but continues +executing the rest of the repair logic that scans for and appends interrupted +tool results (look for logic that generates +RepairActionAppendInterruptedToolResult and ensure it runs even when +analysis.turn.terminal is true). Ensure tests cover a case where terminal event +exists but matching tool_result rows are missing so the +append-interrupted-tool-result path executes. +``` + +
+ + + + + +## Triage + +- Decision: `UNREVIEWED` +- Notes: diff --git a/.compozy/tasks/badges-design/reviews-003/issue_009.md b/.compozy/tasks/badges-design/reviews-003/issue_009.md new file mode 100644 index 000000000..b4dbf8ffa --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/issue_009.md @@ -0,0 +1,41 @@ +--- +status: pending +file: packages/ui/src/tokens.css +line: 49 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM5-O8FB,comment:PRRC_kwDOR5y4QM68JGPu +--- + +# Issue 009: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Align the protocol-kind token names with the active kind enum.** + +This block introduces `--color-kind-recipe`, but the rest of this PR still renders a `capability` kind. With those names out of sync, `capability` has no dedicated token to resolve against and will either fall back or inherit the wrong color. Please make the token name and the runtime kind name consistent before shipping. + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@packages/ui/src/tokens.css` around lines 42 - 49, The CSS defines a +protocol-kind token --color-kind-recipe but the runtime enum uses "capability", +so update the token name to match the active kind: replace or add +--color-kind-capability (instead of --color-kind-recipe) in the protocol-kind +colors block so the runtime lookup for "capability" resolves to the intended +color; ensure any usages reference --color-kind-capability consistently. +``` + +
+ + + + + +## Triage + +- Decision: `UNREVIEWED` +- Notes: diff --git a/.compozy/tasks/badges-design/reviews-003/issue_010.md b/.compozy/tasks/badges-design/reviews-003/issue_010.md new file mode 100644 index 000000000..938d4f03a --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/issue_010.md @@ -0,0 +1,23 @@ +--- +status: pending +file: web/src/components/design-system-showcase.tsx +line: 297 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4191605807,nitpick_hash:3eefbf5b72d3 +review_hash: 3eefbf5b72d3 +source_review_id: "4191605807" +source_review_submitted_at: "2026-04-28T18:57:12Z" +--- + +# Issue 010: Avoid hardcoding token values in the showcase metadata. +## Review Comment + +These new swatches duplicate literal color values that already live in `packages/ui/src/tokens.css`, but the current tests only verify token names. If the token file changes later, the showcase docs can silently drift. Consider deriving the displayed value from the token source or adding a sync assertion against `tokens.css`. + +As per coding guidelines, `web/src/**/*.{tsx,ts,css}`: Design system tokens (colors, fonts, radius, spacing, motion) MUST be pulled from `DESIGN.md` in the repository root — never invent tokens or use ad-hoc hex values in components. + +## Triage + +- Decision: `UNREVIEWED` +- Notes: diff --git a/.compozy/tasks/badges-design/reviews-003/issue_011.md b/.compozy/tasks/badges-design/reviews-003/issue_011.md new file mode 100644 index 000000000..44a5da041 --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/issue_011.md @@ -0,0 +1,21 @@ +--- +status: pending +file: web/src/routes/_app/settings.tsx +line: 140 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4191605807,nitpick_hash:c2c9216e8713 +review_hash: c2c9216e8713 +source_review_id: "4191605807" +source_review_submitted_at: "2026-04-28T18:57:12Z" +--- + +# Issue 011: Consider exposing full label on hover when truncated. +## Review Comment + +Small UX win: add a `title` so long section names remain discoverable when clipped. + +## Triage + +- Decision: `UNREVIEWED` +- Notes: diff --git a/.compozy/tasks/badges-design/reviews-003/issue_012.md b/.compozy/tasks/badges-design/reviews-003/issue_012.md new file mode 100644 index 000000000..df0b6a95d --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/issue_012.md @@ -0,0 +1,25 @@ +--- +status: pending +file: web/src/systems/knowledge/lib/knowledge-formatters.ts +line: 1 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4191605807,nitpick_hash:166fa7176ddc +review_hash: 166fa7176ddc +source_review_id: "4191605807" +source_review_submitted_at: "2026-04-28T18:57:12Z" +--- + +# Issue 012: Keep @agh/ui tone types out of the knowledge lib layer. +## Review Comment + +`knowledge-formatters.ts` now depends on the UI package just to describe tone strings. That leaks presentation concerns into `web/src/systems/knowledge/lib` and makes the formatter harder to reuse outside the component layer. A local semantic union mapped to `PillTone` at render time would keep the boundary cleaner. + +As per coding guidelines, `web/src/systems/**/*.{ts,tsx}`: Dependency flow within systems: `adapters → lib → hooks → components` (unidirectional, never reversed). + +Also applies to: 39-50 + +## Triage + +- Decision: `UNREVIEWED` +- Notes: diff --git a/.compozy/tasks/badges-design/reviews-003/issue_013.md b/.compozy/tasks/badges-design/reviews-003/issue_013.md new file mode 100644 index 000000000..78564f29f --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/issue_013.md @@ -0,0 +1,21 @@ +--- +status: pending +file: web/src/systems/session/adapters/session-api.test.ts +line: 336 +severity: nitpick +author: coderabbitai[bot] +provider_ref: review:4191605807,nitpick_hash:3a83f40579c6 +review_hash: 3a83f40579c6 +source_review_id: "4191605807" +source_review_submitted_at: "2026-04-28T18:57:12Z" +--- + +# Issue 013: Make the query assertion order-agnostic. +## Review Comment + +This assertion is coupled to parameter order. If serialization order changes, the test can fail even when behavior is correct. + +## Triage + +- Decision: `UNREVIEWED` +- Notes: diff --git a/.compozy/tasks/badges-design/reviews-003/issue_014.md b/.compozy/tasks/badges-design/reviews-003/issue_014.md new file mode 100644 index 000000000..8b487e32d --- /dev/null +++ b/.compozy/tasks/badges-design/reviews-003/issue_014.md @@ -0,0 +1,47 @@ +--- +status: pending +file: web/src/systems/session/adapters/session-api.ts +line: 155 +author: coderabbitai[bot] +provider_ref: thread:PRRT_kwDOR5y4QM5-O8FD,comment:PRRC_kwDOR5y4QM68JGPw +--- + +# Issue 014: _⚠️ Potential issue_ | _🟠 Major_ +## Review Comment + +_⚠️ Potential issue_ | _🟠 Major_ + +**Use typed adapter errors instead of raw `Error` in `repairSession`.** + +Line 152 and Line 154 throw raw `Error`, which makes adapter-level error handling less structured and inconsistent with system adapter contracts. Please switch these branches to typed session adapter errors. + + +As per coding guidelines `web/src/systems/*/adapters/*-api.ts`: "Typed error classes in adapters — never throw raw errors". + +
+🤖 Prompt for AI Agents + +``` +Verify each finding against the current code and only fix it if needed. + +In `@web/src/systems/session/adapters/session-api.ts` around lines 150 - 155, The +two raw Error throws in repairSession should be replaced with the adapter's +typed errors: throw a SessionNotFoundError (or similarly named not-found adapter +error) when response.status === 404, and throw a SessionAdapterError (or the +generic adapter error class used across systems) for the default failure branch +using defaultApiErrorMessage(response, error) as the message or as properties; +update imports to bring in the correct typed error classes (e.g., +SessionNotFoundError, SessionAdapterError) and ensure you pass the +id/response/error context to those constructors instead of new Error. +``` + +
+ + + + + +## Triage + +- Decision: `UNREVIEWED` +- Notes: diff --git a/.compozy/tasks/delete-session/reviews-001/issue_001.md b/.compozy/tasks/delete-session/reviews-001/issue_001.md deleted file mode 100644 index 476c035f3..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_001.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -status: resolved -file: internal/api/core/tasks_test.go -line: 918 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151198531,nitpick_hash:5d026632dc4f -review_hash: 5d026632dc4f -source_review_id: "4151198531" -source_review_submitted_at: "2026-04-21T23:03:23Z" ---- - -# Issue 001: Consider adding delete to the remaining error-path matrices for parity. -## Review Comment - -Nice addition in the actor-resolver error table. To fully cover this new critical endpoint, mirror `DELETE /tasks/:id` in the service-unavailable and manager-error request matrices in this file as well. - -As per coding guidelines, "Must Check: Focus on critical paths: workflow execution, state management, error handling". - -## Triage - -- Decision: `valid` -- Notes: - The actor-resolver matrix already includes `DELETE /tasks/:id`, but the service-unavailable and manager-error matrices still omit it. That leaves the new delete endpoint without parity coverage in two of the three error-path tables. I will add delete coverage to both matrices and use a delete-specific manager error case that exercises the runtime `400` mapping for delete validation failures. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_002.md b/.compozy/tasks/delete-session/reviews-001/issue_002.md deleted file mode 100644 index 09c83218f..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_002.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -status: resolved -file: internal/api/httpapi/handlers_error_test.go -line: 23 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151198531,nitpick_hash:57271b62870a -review_hash: 57271b62870a -source_review_id: "4151198531" -source_review_submitted_at: "2026-04-21T23:03:23Z" ---- - -# Issue 002: Split this expanded error-mapping test into subtests for isolation. -## Review Comment - -Now that this case covers five endpoints, one early failure hides the rest. Please convert to table-driven `t.Run("Should...")` subtests (and mark independent ones parallel) so each route/method mapping fails independently. - -As per coding guidelines, "Use table-driven tests with subtests (`t.Run`) as default pattern for Go tests" and "MUST use t.Run("Should...") pattern for ALL test cases". - -## Triage - -- Decision: `valid` -- Notes: - The HTTP handler error test currently checks create/get/resume/delete/stop in one linear flow, so the first failure hides later routes and it does not follow the repo's required subtest pattern. I will convert it to table-driven `t.Run("Should...")` subtests and mark independent subtests parallel. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_003.md b/.compozy/tasks/delete-session/reviews-001/issue_003.md deleted file mode 100644 index 7cf9df1b0..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_003.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -status: resolved -file: internal/api/httpapi/handlers_test.go -line: 1074 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151198531,nitpick_hash:2bfe6ce257e0 -review_hash: 2bfe6ce257e0 -source_review_id: "4151198531" -source_review_submitted_at: "2026-04-21T23:03:23Z" ---- - -# Issue 003: Use t.Run("Should...") for the new delete-session test case. -## Review Comment - -Please wrap this new case in a `Should...` subtest to match the required test pattern used by repo guidelines. - -As per coding guidelines, "MUST use t.Run("Should...") pattern for ALL test cases". - -## Triage - -- Decision: `valid` -- Notes: - `TestDeleteSessionHandlerReturnsNoContent` is still a single top-level assertion block and does not follow the repository's required `t.Run("Should...")` pattern. I will wrap the delete-session assertions in a `Should...` subtest and make the test parallel-safe. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_004.md b/.compozy/tasks/delete-session/reviews-001/issue_004.md deleted file mode 100644 index b06e89bb0..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_004.md +++ /dev/null @@ -1,82 +0,0 @@ ---- -status: resolved -file: internal/api/spec/spec.go -line: 1559 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58pG6P,comment:PRRC_kwDOR5y4QM66Aomf ---- - -# Issue 004: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**`deleteTask` response codes are out of sync with runtime mapping.** - -The spec advertises `422`, but `StatusForTaskError` maps task validation errors to `400` and conflict cases to `409` (see `internal/api/core/errors.go:180-213`). This will mislead generated clients. - -
-🛠️ Suggested response alignment - -```diff - Responses: []ResponseSpec{ - {Status: 204, Description: "No Content"}, - {Status: 404, Description: "Task not found", Body: contract.ErrorPayload{}}, -- {Status: 422, Description: "Invalid task delete", Body: contract.ErrorPayload{}}, -+ {Status: 400, Description: "Invalid task delete", Body: contract.ErrorPayload{}}, -+ {Status: 409, Description: "Task delete conflict", Body: contract.ErrorPayload{}}, - {Status: 503, Description: "Task service is not configured", Body: contract.ErrorPayload{}}, - {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, - }, -``` -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion - Responses: []ResponseSpec{ - {Status: 204, Description: "No Content"}, - {Status: 404, Description: "Task not found", Body: contract.ErrorPayload{}}, - {Status: 400, Description: "Invalid task delete", Body: contract.ErrorPayload{}}, - {Status: 409, Description: "Task delete conflict", Body: contract.ErrorPayload{}}, - {Status: 503, Description: "Task service is not configured", Body: contract.ErrorPayload{}}, - {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/api/spec/spec.go` around lines 1554 - 1559, The OpenAPI spec for -deleteTask is out of sync with runtime error mapping from StatusForTaskError; -replace the single 422 response entry with responses that match the function's -mappings: add a 400 "Invalid task request" (Body: contract.ErrorPayload) for -validation errors and a 409 "Task conflict" (Body: contract.ErrorPayload) for -conflict cases, keeping the existing 404, 503 and 500 entries; update the -ResponseSpec list in the deleteTask definition so generated clients match -StatusForTaskError behavior. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: - The spec is out of sync on the validation status code: the delete path returns `task.ErrValidation`, which `StatusForTaskError` maps to `400`, not `422`. I will align the delete-task response table to advertise `400` for invalid delete requests. I am not planning to add `409` because the current delete implementation does not emit a conflict sentinel on this route. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_005.md b/.compozy/tasks/delete-session/reviews-001/issue_005.md deleted file mode 100644 index e24c301ef..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_005.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -status: resolved -file: internal/api/udsapi/handlers_error_test.go -line: 19 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151198531,nitpick_hash:e290b3d34bb8 -review_hash: e290b3d34bb8 -source_review_id: "4151198531" -source_review_submitted_at: "2026-04-21T23:03:23Z" ---- - -# Issue 005: Refactor this endpoint-error matrix into t.Run("Should...") subtests. -## Review Comment - -This block now covers multiple behaviors (create/get/resume/delete/stop) in one flow; converting it to table-driven subtests will improve failure locality and align with repo test conventions. - -As per coding guidelines, `**/*_test.go`: MUST use t.Run("Should...") pattern for ALL test cases. - -## Triage - -- Decision: `valid` -- Notes: - The UDS handler error test has the same monolithic structure as the HTTP version: multiple route checks are coupled in one flow, which hurts failure isolation and skips the required `Should...` subtest convention. I will convert it to table-driven parallel subtests. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_006.md b/.compozy/tasks/delete-session/reviews-001/issue_006.md deleted file mode 100644 index 35cc1bce3..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_006.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -status: resolved -file: internal/api/udsapi/handlers_test.go -line: 969 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151198531,nitpick_hash:a108dbe4458f -review_hash: a108dbe4458f -source_review_id: "4151198531" -source_review_submitted_at: "2026-04-21T23:03:23Z" ---- - -# Issue 006: Consider adding t.Parallel() for test isolation. -## Review Comment - -Other tests in this file use `t.Parallel()` for independent execution. Adding it here would be consistent with the codebase patterns. - -## Triage - -- Decision: `valid` -- Notes: - The UDS delete-session handler test is independent and can safely run in parallel, but it currently does not use the repo's preferred subtest structure or `t.Parallel()`. I will wrap it in a `Should...` subtest and mark the test parallel. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_007.md b/.compozy/tasks/delete-session/reviews-001/issue_007.md deleted file mode 100644 index 5a7f3ac38..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_007.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -status: resolved -file: internal/daemon/daemon_test.go -line: 4134 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151198531,nitpick_hash:00f8e2d887e2 -review_hash: 00f8e2d887e2 -source_review_id: "4151198531" -source_review_submitted_at: "2026-04-21T23:03:23Z" ---- - -# Issue 007: Avoid aliasing delete behavior to stop in the session test double. -## Review Comment - -`Delete()` currently calls `Stop()`, which can hide regressions now that delete and stop are distinct operations. Consider tracking `Delete` calls independently (and optionally separate delete-specific errors/state mutation) so tests can assert the right path was used. - -## Triage - -- Decision: `valid` -- Notes: - In `internal/daemon/daemon_test.go`, the fake session manager currently implements `Delete()` by delegating to `Stop()`, which means delete-path assertions would silently exercise stop bookkeeping instead of delete-specific behavior. I will split delete tracking from stop tracking in the fake manager so future tests can distinguish the two operations. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_008.md b/.compozy/tasks/delete-session/reviews-001/issue_008.md deleted file mode 100644 index 782d70053..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_008.md +++ /dev/null @@ -1,89 +0,0 @@ ---- -status: resolved -file: internal/session/manager_delete.go -line: 27 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58pG6c,comment:PRRC_kwDOR5y4QM66Aomx ---- - -# Issue 008: _🛠️ Refactor suggestion_ | _🟠 Major_ -## Review Comment - -_🛠️ Refactor suggestion_ | _🟠 Major_ - -**Wrap propagated errors with delete-specific context.** - -Line 22 and Line 27 return raw errors, which makes operational diagnosis harder in call chains. - - -
-Proposed fix - -```diff - target, err := normalizeStoredSessionID(id) - if err != nil { -- return err -+ return fmt.Errorf("session: normalize delete id %q: %w", id, err) - } - - if _, ok := m.Get(target); ok { - if err := m.StopWithCause(ctx, target, CauseUserRequested, "session deleted"); err != nil { -- return err -+ return fmt.Errorf("session: stop %q before delete: %w", target, err) - } - } -``` -
-As per coding guidelines, "Use explicit error returns with wrapped context: `fmt.Errorf(\"context: %w\", err)`". - - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion - target, err := normalizeStoredSessionID(id) - if err != nil { - return fmt.Errorf("session: normalize delete id %q: %w", id, err) - } - - if _, ok := m.Get(target); ok { - if err := m.StopWithCause(ctx, target, CauseUserRequested, "session deleted"); err != nil { - return fmt.Errorf("session: stop %q before delete: %w", target, err) - } - } -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/session/manager_delete.go` around lines 21 - 27, The returns are -propagating raw errors; update the error returns to wrap them with -delete-specific context using fmt.Errorf so callers see cause and operation -(e.g., when checking m.Get(target) and when calling m.StopWithCause(ctx, target, -CauseUserRequested, "session deleted") wrap the err values as fmt.Errorf("delete -session %s: %w", target, err) or similar to add clear delete-specific context -while preserving the original error. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: - `Manager.Delete` in the session package still returns raw errors from ID normalization and `StopWithCause`, so callers lose delete-specific context in logs and error chains. I will wrap those error returns with operation-specific context while preserving the original sentinels via `%w`. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_009.md b/.compozy/tasks/delete-session/reviews-001/issue_009.md deleted file mode 100644 index d11a348d1..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_009.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -status: resolved -file: internal/session/manager_delete_test.go -line: 70 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58pG6Z,comment:PRRC_kwDOR5y4QM66Aomu ---- - -# Issue 009: _🛠️ Refactor suggestion_ | _🟠 Major_ -## Review Comment - -_🛠️ Refactor suggestion_ | _🟠 Major_ - -**Restructure to `t.Run("Should...")` table-driven subtests.** - -The assertions are useful, but this file currently skips the required subtest pattern. - - -As per coding guidelines, "Use table-driven tests with subtests (`t.Run`) as default pattern for Go tests" and "MUST use `t.Run(\"Should...\")` pattern for ALL test cases". - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/session/manager_delete_test.go` around lines 11 - 70, Convert the -two top-level tests into table-driven subtests using t.Run with names that begin -"Should...": create a single TestDeleteBehavior (or similar) that calls -t.Parallel() once and iterates over a cases slice where each case has a name -("Should remove stopped session from history", "Should stop active session -before removing artifacts") and a test func. Move the existing bodies of -TestDeleteRemovesStoppedSessionFromHistory and -TestDeleteStopsActiveSessionBeforeRemovingArtifacts into the case handlers, -preserving calls to h.manager.Stop, h.manager.Delete, h.manager.Status, -h.manager.ListAll, h.manager.Get, session.SessionDir(), and assertions on -h.driver.stopCalls; invoke each case with t.Run(case.name, func(t *testing.T) { -t.Parallel(); ... }) so each subtest runs in parallel and keeps original -assertions and setup (newHarness, createSession) unchanged. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: - `internal/session/manager_delete_test.go` still uses two separate top-level tests instead of the repository's required table-driven `Should...` subtest pattern. I will consolidate these delete scenarios into one parallel table-driven test and keep the existing assertions intact. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_010.md b/.compozy/tasks/delete-session/reviews-001/issue_010.md deleted file mode 100644 index 8925bd5f5..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_010.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -status: resolved -file: internal/store/globaldb/global_db_task.go -line: 120 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58pG6i,comment:PRRC_kwDOR5y4QM66Aom5 ---- - -# Issue 010: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Map parent-child FK delete failures to a domain error.** - -`DeleteTask` only maps “0 rows affected” to `ErrTaskNotFound`. If the task has direct children (`tasks.parent_task_id` FK without cascade), this path returns a raw DB constraint error instead of a stable task-domain error, which can surface as an internal server error upstream. - -Please normalize this failure (e.g., pre-check child count or translate FK constraint failure to a dedicated task error). - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/store/globaldb/global_db_task.go` around lines 104 - 120, DeleteTask -currently only maps 0-rows to taskpkg.ErrTaskNotFound and lets DB FK constraint -errors bubble up; update DeleteTask to normalize parent-child FK failures by -either (a) pre-checking for children with a SELECT COUNT(*) FROM tasks WHERE -parent_task_id = ? using the same trimmedID and return a domain error (e.g., -taskpkg.ErrTaskHasChildren) if count>0, or (b) catch the DB constraint error -from g.db.ExecContext and translate it to taskpkg.ErrTaskHasChildren before -returning; modify the logic around ExecContext/requireRowsAffected (referencing -DeleteTask, requireTaskValue, requireRowsAffected, and taskpkg.ErrTaskNotFound) -so callers receive a stable task-domain error instead of a raw DB constraint -error. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: - `GlobalDB.DeleteTask` only normalizes "0 rows affected" and otherwise returns the raw SQLite error, so a foreign-key delete failure can leak as a transport `500` if it occurs at the storage boundary. The manager already treats child-task deletes as validation failures, so the store should translate the SQLite foreign-key failure to the same stable task-domain validation error. I will add that mapping in `global_db_task.go` and add a minimal storage test in `internal/store/globaldb/global_db_task_test.go` because the scoped files do not currently contain direct coverage for this constraint path. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_011.md b/.compozy/tasks/delete-session/reviews-001/issue_011.md deleted file mode 100644 index 3f3b6f9e9..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_011.md +++ /dev/null @@ -1,85 +0,0 @@ ---- -status: resolved -file: internal/task/manager.go -line: 300 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58pG6o,comment:PRRC_kwDOR5y4QM66AonA ---- - -# Issue 011: _🛠️ Refactor suggestion_ | _🟠 Major_ -## Review Comment - -_🛠️ Refactor suggestion_ | _🟠 Major_ - -**Wrap delete-path failures with operation context.** - -Several returns (for example Line 281, Line 289, Line 294, Line 299, Line 2201, and Line 2214) bubble raw errors. Add contextual wrapping so delete failures are diagnosable in logs and callers. - -
-♻️ Suggested patch - -```diff - record, err := m.store.GetTask(ctx, trimmedID) - if err != nil { -- return err -+ return fmt.Errorf("task: load task %q for delete: %w", trimmedID, err) - } - if err := m.ensureTaskDeleteAllowed(ctx, record); err != nil { - return err - } - - dependents, err := m.store.ListDependents(ctx, trimmedID) - if err != nil { -- return err -+ return fmt.Errorf("task: list dependents for task %q delete: %w", trimmedID, err) - } - - if err := m.store.DeleteTask(ctx, trimmedID); err != nil { -- return err -+ return fmt.Errorf("task: delete task %q: %w", trimmedID, err) - } - - for _, dependentID := range dependentIDs { - if _, err := m.reconcileTaskCascade(ctx, dependentID); err != nil { -- return err -+ return fmt.Errorf("task: reconcile dependent task %q after deleting %q: %w", dependentID, trimmedID, err) - } - } -``` -
- - - -As per coding guidelines, "Use explicit error returns with wrapped context: `fmt.Errorf(\"context: %w\", err)`". - - -Also applies to: 2199-2214 - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/task/manager.go` around lines 279 - 300, The delete path returns raw -errors from calls like m.store.GetTask, m.ensureTaskDeleteAllowed, -m.store.ListDependents, m.store.DeleteTask and m.reconcileTaskCascade; update -each error return to wrap the underlying error with contextual text using -fmt.Errorf("...: %w", err) (e.g., include the operation and trimmedID or -dependentID), so replace bare returns like `return err` with wrapped errors that -mention the failing operation -(GetTask/DeleteTask/ListDependents/reconcileTaskCascade) and the affected task -ID; apply the same wrapping for the other occurrences around lines 2199–2214. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: - The task manager delete path currently returns raw errors from `GetTask`, `ListDependents`, `DeleteTask`, `reconcileTaskCascade`, `CountDirectChildren`, and `ListTaskRuns`, which makes delete failures harder to diagnose. I will wrap those errors with delete-specific context and add delete-path tests in `internal/task/manager_test.go` to verify the wrapped errors still preserve their sentinels. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_012.md b/.compozy/tasks/delete-session/reviews-001/issue_012.md deleted file mode 100644 index 2f86dd2a5..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_012.md +++ /dev/null @@ -1,78 +0,0 @@ ---- -status: resolved -file: internal/task/manager_test.go -line: 197 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58pG6k,comment:PRRC_kwDOR5y4QM66Aom7 ---- - -# Issue 012: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Triage state cleanup is incomplete due to wrong map key deletion.** - -Line 197 deletes by raw `taskID`, but `s.triageStates` keys are composite (`taskID|actorKind|actorRef`). This leaves stale triage state after task deletion. - - -
-Suggested fix - -```diff -- delete(s.triageStates, taskID) -+ triagePrefix := taskID + "|" -+ for key := range s.triageStates { -+ if strings.HasPrefix(key, triagePrefix) { -+ delete(s.triageStates, key) -+ } -+ } -``` -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion - triagePrefix := taskID + "|" - for key := range s.triageStates { - if strings.HasPrefix(key, triagePrefix) { - delete(s.triageStates, key) - } - } -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/task/manager_test.go` at line 197, The test deletes triage state -using the raw taskID but s.triageStates uses composite keys; update the cleanup -to delete the exact composite key constructed from taskID, actorKind and -actorRef (the same key format used when inserting into s.triageStates) — e.g., -build the composite key for the entry under test and call delete(s.triageStates, -compositeKey) so the triage state is fully removed. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: - The in-memory task store used by tests removes triage state by raw task ID even though triage records are keyed by the composite `taskID|actorKind|actorRef` format. That leaves stale triage entries after delete. I will remove all triage entries for the deleted task by key prefix and cover the behavior with delete-path tests in `internal/task/manager_test.go`. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_013.md b/.compozy/tasks/delete-session/reviews-001/issue_013.md deleted file mode 100644 index 793ca950f..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_013.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -status: resolved -file: web/src/hooks/routes/use-session-page-controls.ts -line: 12 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58pG6s,comment:PRRC_kwDOR5y4QM66AonF ---- - -# Issue 013: _🛠️ Refactor suggestion_ | _🟠 Major_ -## Review Comment - -_🛠️ Refactor suggestion_ | _🟠 Major_ - -**Use the session public barrel instead of internal cross-system paths.** - -This route hook imports from `@/systems/session/adapters/...`, `.../hooks/...`, and `.../types`. Please route cross-system usage through `@/systems/session` to preserve system boundaries. - - - -As per coding guidelines, "Cross-system imports MUST only go through the public barrel (`@/systems/`). Never reach into another system's internals." - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/hooks/routes/use-session-page-controls.ts` around lines 6 - 12, The -imports in this hook currently reach into session internals; replace the -internal paths with the session public barrel by importing -useClearSessionConversation, useDeleteSession, useResumeSession, useStopSession -and SessionPayload from "@/systems/session" (the system's public barrel) so all -cross-system usage goes through the public API; update the import statement to -reference "@/systems/session" and remove any direct references to -adapters/hooks/types paths. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: - `use-session-page-controls.ts` reaches into `session` internals through adapter, hook, and type paths instead of using the system barrel. I will switch the hook to import session APIs from `@/systems/session` and add the minimal missing barrel export in `web/src/systems/session/index.ts` so the cross-system boundary is clean. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_014.md b/.compozy/tasks/delete-session/reviews-001/issue_014.md deleted file mode 100644 index ed527f786..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_014.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -status: resolved -file: web/src/hooks/routes/use-session-page-controls.ts -line: 63 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151198531,nitpick_hash:9d369e2886c9 -review_hash: 9d369e2886c9 -source_review_id: "4151198531" -source_review_submitted_at: "2026-04-21T23:03:23Z" ---- - -# Issue 014: Harden handleDelete against concurrent control mutations. -## Review Comment - -`handleDelete` currently blocks only duplicate deletes. Consider guarding against other in-flight control mutations too (`isStopping`, `isResuming`, `isClearing`, `isCancellingPrompt`) to avoid overlapping operations through non-UI call paths. - -Also applies to: 92-97 - -## Triage - -- Decision: `valid` -- Notes: - `handleDelete` and `handleClear` only guard against their own mutation being pending. The UI disables these paths via derived state, but the callback functions themselves still allow overlapping operations if they are invoked programmatically while another control mutation is active. I will harden both callbacks against all in-flight control mutations and add a focused hook test in `web/src/hooks/routes/use-session-page-controls.test.tsx` because the current scoped files do not include route-hook coverage. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_015.md b/.compozy/tasks/delete-session/reviews-001/issue_015.md deleted file mode 100644 index 5b8fdbedd..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_015.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -status: resolved -file: web/src/systems/session/hooks/use-session-actions.test.tsx -line: 170 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151198531,nitpick_hash:cc1a31cf1c40 -review_hash: cc1a31cf1c40 -source_review_id: "4151198531" -source_review_submitted_at: "2026-04-21T23:03:23Z" ---- - -# Issue 015: Add a delete-failure test case to lock cache behavior. -## Review Comment - -This test is good for the success path. Please add a failure-path case (`deleteSession` rejects) to assert caches/drafts are not incorrectly cleared when deletion fails. - -## Triage - -- Decision: `valid` -- Notes: - The success-path delete hook test does not cover the regression described in issue 016: failed deletes should not clear drafts or remove cached session data. I will add a failure-path test that keeps the detail/history/transcript/events caches and draft intact when `deleteSession` rejects. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_016.md b/.compozy/tasks/delete-session/reviews-001/issue_016.md deleted file mode 100644 index 8ba862887..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_016.md +++ /dev/null @@ -1,132 +0,0 @@ ---- -status: resolved -file: web/src/systems/session/hooks/use-session-actions.ts -line: 86 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58pG6u,comment:PRRC_kwDOR5y4QM66AonH ---- - -# Issue 016: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -
-🧩 Analysis chain - -🌐 Web query: - -`In TanStack Query v5, does useMutation `onSettled` execute for both success and error results?` - -💡 Result: - -Yes. In TanStack Query v5, useMutation’s onSettled callback executes for both successful mutations and mutations that encounter an error. Official docs state that onSettled “will fire when the mutation is either successfully fetched or encounters an error” and it is passed either the data or the error (data may be undefined, error may be null). - -Citations: - -- 1: https://tanstack.com/query/v5/docs/framework/react/guides/mutations -- 2: https://tanstack.com/query/v5/docs/framework/react/reference/useMutation - ---- - -🏁 Script executed: - -```shell -fd -t f "use-session-actions.ts" web/ -``` - -Repository: compozy/agh - -Length of output: 107 - ---- - -🏁 Script executed: - -```shell -# Find and examine the file -find . -name "use-session-actions.ts" -type f -exec head -100 {} + -``` - -Repository: compozy/agh - -Length of output: 3106 - ---- - -🏁 Script executed: - -```shell -# Check if sessionKeys definition exists and what keys it contains -rg "sessionKeys\s*=" --type ts -A 10 web/src/systems/session/ -``` - -Repository: compozy/agh - -Length of output: 972 - ---- - - - -
- -**Move cache removal to `onSuccess` to avoid clearing session data on failed delete, and also remove the stale `events` cache.** - -In TanStack Query v5, `onSettled` executes for both successful and failed mutations. The current implementation removes session caches unconditionally in `onSettled`, which means a failed delete will still wipe local session data. Additionally, `sessionKeys.events(id)` is left behind after successful deletion. - -
-Suggested fix - -```diff - export function useDeleteSession() { - const queryClient = useQueryClient(); - - return useMutation({ - mutationFn: (id: string) => deleteSession(id), - onSuccess: (_data, id) => { - useSessionStore.getState().clearDraft(id); -+ queryClient.removeQueries({ queryKey: sessionKeys.detail(id) }); -+ queryClient.removeQueries({ queryKey: sessionKeys.history(id) }); -+ queryClient.removeQueries({ queryKey: sessionKeys.transcript(id) }); -+ queryClient.removeQueries({ queryKey: sessionKeys.events(id) }); - }, -- onSettled: (_data, _error, id) => { -- queryClient.removeQueries({ queryKey: sessionKeys.detail(id) }); -- queryClient.removeQueries({ queryKey: sessionKeys.history(id) }); -- queryClient.removeQueries({ queryKey: sessionKeys.transcript(id) }); -+ onSettled: () => { - queryClient.invalidateQueries({ queryKey: sessionKeys.lists() }); - }, - }); - } -``` -
- -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/systems/session/hooks/use-session-actions.ts` around lines 82 - 86, -Currently cache removal runs in the mutation's onSettled handler which clears -session data even when delete fails; move those queryClient.removeQueries calls -into the mutation's onSuccess handler instead, and add a call to remove -sessionKeys.events(id) alongside sessionKeys.detail(id), -sessionKeys.history(id), sessionKeys.transcript(id), and still call -queryClient.invalidateQueries({ queryKey: sessionKeys.lists() }) on success so -that only successful deletes purge all related caches. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: - `useDeleteSession` currently clears drafts and removes session caches from `onSettled`, which runs for both success and failure. That means a failed delete can still wipe local session state, and the `events` cache is not removed on success. I will move delete-side cache removal into `onSuccess`, include `sessionKeys.events(id)`, and keep failure paths from purging local state. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_017.md b/.compozy/tasks/delete-session/reviews-001/issue_017.md deleted file mode 100644 index ac120b903..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_017.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -status: resolved -file: web/src/systems/tasks/adapters/tasks-api.test.ts -line: 261 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151198531,nitpick_hash:52be5e2667c2 -review_hash: 52be5e2667c2 -source_review_id: "4151198531" -source_review_submitted_at: "2026-04-21T23:03:23Z" ---- - -# Issue 017: Strengthen the 404 test to assert the typed error contract. -## Review Comment - -Right now it only checks the message. Also assert `TasksApiError` so a regression to raw `Error` is caught. - -## Triage - -- Decision: `valid` -- Notes: - The delete-task 404 adapter test only checks the message string, so it would not catch a regression where the code starts throwing a generic `Error` instead of `TasksApiError`. I will strengthen the assertion to check both the typed error contract and the message. diff --git a/.compozy/tasks/delete-session/reviews-001/issue_018.md b/.compozy/tasks/delete-session/reviews-001/issue_018.md deleted file mode 100644 index 161f6181c..000000000 --- a/.compozy/tasks/delete-session/reviews-001/issue_018.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -status: resolved -file: web/src/systems/tasks/hooks/use-task-actions.ts -line: 155 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151198531,nitpick_hash:59fea0c87854 -review_hash: 59fea0c87854 -source_review_id: "4151198531" -source_review_submitted_at: "2026-04-21T23:03:23Z" ---- - -# Issue 018: Minor redundancy in cache operations. -## Review Comment - -`removeQueries` correctly purges the deleted task's detail from cache. However, `invalidateTaskQueries(queryClient, id)` on line 164 will also call `invalidateQueries({ queryKey: tasksKeys.detail(id) })` (see line 110), which is redundant after the query has already been removed. - -This is harmless but slightly wasteful. Consider either: -1. Not passing `id` to `invalidateTaskQueries` since the detail query is already removed -2. Or keeping as-is for consistency with other mutation hooks - -## Triage - -- Decision: `invalid` -- Notes: - This is an optional micro-optimization, not a correctness issue. `removeQueries(tasksKeys.detail(id))` followed by the existing shared invalidation helper is harmless and keeps the delete hook behavior consistent with the rest of the task mutation hooks. I will leave the code as-is and resolve this issue as analysis-only. diff --git a/.compozy/tasks/delete-session/reviews-002/_meta.md b/.compozy/tasks/delete-session/reviews-002/_meta.md deleted file mode 100644 index 94eb1cb47..000000000 --- a/.compozy/tasks/delete-session/reviews-002/_meta.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -provider: coderabbit -pr: "58" -round: 2 -created_at: 2026-04-24T01:34:29.636365Z ---- - -## Summary -- Total: 9 -- Resolved: 0 -- Unresolved: 9 diff --git a/.compozy/tasks/delete-session/reviews-002/issue_001.md b/.compozy/tasks/delete-session/reviews-002/issue_001.md deleted file mode 100644 index b0c805551..000000000 --- a/.compozy/tasks/delete-session/reviews-002/issue_001.md +++ /dev/null @@ -1,163 +0,0 @@ ---- -status: resolved -file: internal/daemon/daemon_test.go -line: 2075 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59RYXn,comment:PRRC_kwDOR5y4QM6622vD ---- - -# Issue 001: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Wrap this test body in a `t.Run("Should...")` subtest.** - -The behavior being checked is good, but this test case should follow the enforced subtest naming pattern. - - - -
-✅ Suggested structure - -```diff - func TestFakeSessionManagerDeleteTracksDeleteIndependently(t *testing.T) { - t.Parallel() -- -- manager := &fakeSessionManager{ -- infos: []*session.Info{{ID: "sess-a"}, {ID: "sess-b"}}, -- } -- -- if err := manager.Delete(testutil.Context(t), "sess-a"); err != nil { -- t.Fatalf("Delete() error = %v", err) -- } -- -- if got, want := len(manager.deleteCalls), 1; got != want { -- t.Fatalf("len(deleteCalls) = %d, want %d", got, want) -- } -- if got, want := manager.deleteCalls[0], "sess-a"; got != want { -- t.Fatalf("deleteCalls[0] = %q, want %q", got, want) -- } -- if got := len(manager.stopCalls); got != 0 { -- t.Fatalf("len(stopCalls) = %d, want 0", got) -- } -- if got, want := len(manager.infos), 1; got != want { -- t.Fatalf("len(infos) = %d, want %d", got, want) -- } -- if got, want := manager.infos[0].ID, "sess-b"; got != want { -- t.Fatalf("infos[0].ID = %q, want %q", got, want) -- } -+ t.Run("ShouldTrackDeleteIndependentlyFromStop", func(t *testing.T) { -+ t.Parallel() -+ -+ manager := &fakeSessionManager{ -+ infos: []*session.Info{{ID: "sess-a"}, {ID: "sess-b"}}, -+ } -+ -+ if err := manager.Delete(testutil.Context(t), "sess-a"); err != nil { -+ t.Fatalf("Delete() error = %v", err) -+ } -+ -+ if got, want := len(manager.deleteCalls), 1; got != want { -+ t.Fatalf("len(deleteCalls) = %d, want %d", got, want) -+ } -+ if got, want := manager.deleteCalls[0], "sess-a"; got != want { -+ t.Fatalf("deleteCalls[0] = %q, want %q", got, want) -+ } -+ if got := len(manager.stopCalls); got != 0 { -+ t.Fatalf("len(stopCalls) = %d, want 0", got) -+ } -+ if got, want := len(manager.infos), 1; got != want { -+ t.Fatalf("len(infos) = %d, want %d", got, want) -+ } -+ if got, want := manager.infos[0].ID, "sess-b"; got != want { -+ t.Fatalf("infos[0].ID = %q, want %q", got, want) -+ } -+ }) - } -``` -
- -As per coding guidelines, "MUST use t.Run("Should...") pattern for ALL test cases". - - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion -func TestFakeSessionManagerDeleteTracksDeleteIndependently(t *testing.T) { - t.Parallel() - - t.Run("ShouldTrackDeleteIndependentlyFromStop", func(t *testing.T) { - t.Parallel() - - manager := &fakeSessionManager{ - infos: []*session.Info{{ID: "sess-a"}, {ID: "sess-b"}}, - } - - if err := manager.Delete(testutil.Context(t), "sess-a"); err != nil { - t.Fatalf("Delete() error = %v", err) - } - - if got, want := len(manager.deleteCalls), 1; got != want { - t.Fatalf("len(deleteCalls) = %d, want %d", got, want) - } - if got, want := manager.deleteCalls[0], "sess-a"; got != want { - t.Fatalf("deleteCalls[0] = %q, want %q", got, want) - } - if got := len(manager.stopCalls); got != 0 { - t.Fatalf("len(stopCalls) = %d, want 0", got) - } - if got, want := len(manager.infos), 1; got != want { - t.Fatalf("len(infos) = %d, want %d", got, want) - } - if got, want := manager.infos[0].ID, "sess-b"; got != want { - t.Fatalf("infos[0].ID = %q, want %q", got, want) - } - }) -} -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/daemon/daemon_test.go` around lines 2048 - 2075, Wrap the existing -TestFakeSessionManagerDeleteTracksDeleteIndependently body in a t.Run(...) -subtest using the "Should..." naming pattern (e.g. t.Run("Should delete tracks -independently", func(t *testing.T) { ... })), moving t.Parallel() inside that -subtest and keeping the same assertions that exercise fakeSessionManager, its -Delete method, manager.deleteCalls, manager.stopCalls and manager.infos so -behavior is unchanged but the test now follows the required subtest naming -convention. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: - - `internal/daemon/daemon_test.go` still has a top-level test body for `TestFakeSessionManagerDeleteTracksDeleteIndependently`, while the repository rule for Go tests is `t.Run("Should...")` subtests. - - This is a style-and-consistency fix only; behavior should remain unchanged. - - Planned fix: wrap the assertions in a named subtest and move the inner `t.Parallel()` there. - -## Resolution - -- Wrapped `TestFakeSessionManagerDeleteTracksDeleteIndependently` in `t.Run("ShouldTrackDeleteIndependentlyFromStop", ...)` and moved the subtest-local `t.Parallel()` inside the named case. -- Preserved the existing assertions and fake-session behavior; this change is structural only. -- Verified with `make verify` (exit `0`). diff --git a/.compozy/tasks/delete-session/reviews-002/issue_002.md b/.compozy/tasks/delete-session/reviews-002/issue_002.md deleted file mode 100644 index 7e39d81a0..000000000 --- a/.compozy/tasks/delete-session/reviews-002/issue_002.md +++ /dev/null @@ -1,34 +0,0 @@ ---- -status: resolved -file: internal/daemon/daemon_test.go -line: 4034 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4167261241,nitpick_hash:5ab3004a8600 -review_hash: 5ab3004a8600 -source_review_id: "4167261241" -source_review_submitted_at: "2026-04-24T01:30:33Z" ---- - -# Issue 002: Add compile-time interface verification for fakeSessionManager. -## Review Comment - -Since this fake now tracks the new delete surface, add an interface assertion to prevent drift as the `SessionManager` contract evolves. - -As per coding guidelines, "Use compile-time interface verification: `var _ Interface = (*Type)(nil)`". - ---- - -## Triage - -- Decision: `valid` -- Notes: - - `fakeSessionManager` in `internal/daemon/daemon_test.go` is used as a transport-facing `SessionManager` fake and now carries the delete surface, but it has no compile-time assertion against the interface it is expected to satisfy. - - That leaves the test fake vulnerable to silent interface drift as the session contract evolves. - - Planned fix: add a compile-time interface assertion near the fake type definition. - -## Resolution - -- Added the compile-time assertion `var _ SessionManager = (*fakeSessionManager)(nil)` next to the fake definition in `internal/daemon/daemon_test.go`. -- This now forces the test double to stay aligned with the daemon-facing `SessionManager` contract as the interface evolves. -- Verified with `make verify` (exit `0`). diff --git a/.compozy/tasks/delete-session/reviews-002/issue_003.md b/.compozy/tasks/delete-session/reviews-002/issue_003.md deleted file mode 100644 index e48370b2c..000000000 --- a/.compozy/tasks/delete-session/reviews-002/issue_003.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -status: resolved -file: internal/daemon/daemon_test.go -line: 4163 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4167261241,nitpick_hash:1d95a04a1c96 -review_hash: 1d95a04a1c96 -source_review_id: "4167261241" -source_review_submitted_at: "2026-04-24T01:30:33Z" ---- - -# Issue 003: Make Delete able to model not-found behavior in tests. -## Review Comment - -This fake currently returns `nil` even when the session ID does not exist, which can mask negative-path behavior in callers. - -## Triage - -- Decision: `valid` -- Notes: - - The `fakeSessionManager.Delete` implementation in `internal/daemon/daemon_test.go` currently returns `nil` even when no matching session exists, unlike the real session manager which reports `session.ErrSessionNotFound`. - - That divergence can hide negative-path behavior in daemon tests that depend on delete semantics. - - Planned fix: make the fake return `session.ErrSessionNotFound` when no in-memory session matches the requested id. - -## Resolution - -- Updated `fakeSessionManager.Delete` to return `session.ErrSessionNotFound` when the requested session ID is absent from the fake's in-memory state. -- The fake now matches real delete semantics on the negative path, which prevents daemon tests from silently masking not-found behavior. -- Verified with `make verify` (exit `0`). diff --git a/.compozy/tasks/delete-session/reviews-002/issue_004.md b/.compozy/tasks/delete-session/reviews-002/issue_004.md deleted file mode 100644 index 5b875259f..000000000 --- a/.compozy/tasks/delete-session/reviews-002/issue_004.md +++ /dev/null @@ -1,88 +0,0 @@ ---- -status: resolved -file: internal/session/manager_delete.go -line: 28 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59RYXr,comment:PRRC_kwDOR5y4QM6622vO ---- - -# Issue 004: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Treat a concurrent stop as benign here.** - -This is a check-then-act race: `m.Get(target)` can succeed, the session can finish and disappear from runtime state, and `StopWithCause` can then return `ErrSessionNotFound`. In that case the persisted session history should still be deleted instead of aborting the whole operation. - -
-Suggested hardening - -```diff - if _, ok := m.Get(target); ok { -- if err := m.StopWithCause(ctx, target, CauseUserRequested, "session deleted"); err != nil { -+ if err := m.StopWithCause(ctx, target, CauseUserRequested, "session deleted"); err != nil && -+ !errors.Is(err, ErrSessionNotFound) { - return fmt.Errorf("session: stop %q before delete: %w", target, err) - } - } -``` -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion - if _, ok := m.Get(target); ok { - if err := m.StopWithCause(ctx, target, CauseUserRequested, "session deleted"); err != nil && - !errors.Is(err, ErrSessionNotFound) { - return fmt.Errorf("session: stop %q before delete: %w", target, err) - } - } -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/session/manager_delete.go` around lines 25 - 28, The current -check-then-act around m.Get(target) + m.StopWithCause can race: if the session -disappears between calls StopWithCause may return ErrSessionNotFound and abort -delete; treat that outcome as benign. Modify the code that calls -m.StopWithCause(ctx, target, CauseUserRequested, "session deleted") to continue -on ErrSessionNotFound (i.e., ignore that specific error) and only return on -other errors; reference m.Get, m.StopWithCause, and the ErrSessionNotFound -sentinel when implementing the conditional handling so the persisted session -history deletion proceeds even if the runtime session vanished concurrently. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: - - `internal/session/manager_delete.go` does a `m.Get(target)` check and then calls `m.StopWithCause(...)`. If another goroutine finalizes and removes the session between those two calls, `StopWithCause` can return `ErrSessionNotFound` even though delete should still proceed against the persisted session directory. - - Aborting the delete on that sentinel error is a real check-then-act race. - - Planned fix: treat `ErrSessionNotFound` from the pre-delete stop path as benign and continue with artifact removal. A focused test helper is likely required outside the scoped file list to exercise the new error policy. - -## Resolution - -- Refactored the pre-delete stop logic in `internal/session/manager_delete.go` into a helper that treats `ErrSessionNotFound` from `StopWithCause` as benign, so persisted session deletion continues after a concurrent stop race. -- Added focused regression coverage in `internal/session/manager_delete_test.go` for the race where runtime state disappears between `Get` and `StopWithCause`. -- Verified with `make verify` (exit `0`). diff --git a/.compozy/tasks/delete-session/reviews-002/issue_005.md b/.compozy/tasks/delete-session/reviews-002/issue_005.md deleted file mode 100644 index 8985df227..000000000 --- a/.compozy/tasks/delete-session/reviews-002/issue_005.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -status: resolved -file: internal/store/globaldb/global_db_task_test.go -line: 245 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4167261241,nitpick_hash:58c2326585d3 -review_hash: 58c2326585d3 -source_review_id: "4167261241" -source_review_submitted_at: "2026-04-24T01:30:33Z" ---- - -# Issue 005: Adopt the required t.Run("Should...") pattern for this test case. -## Review Comment - -This test is valuable, but it currently skips the repository-required subtest naming/structure convention. - -As per coding guidelines, "MUST use t.Run("Should...") pattern for ALL test cases" and "Use table-driven tests with subtests (`t.Run`) as default pattern for Go tests". - -## Triage - -- Decision: `valid` -- Notes: - - `TestGlobalDBDeleteTaskMapsChildConstraintToValidationError` in `internal/store/globaldb/global_db_task_test.go` is written as a flat top-level test body instead of the required `t.Run("Should...")` structure used across this repo. - - The underlying assertions are useful and should stay; only the test shape needs to change. - - Planned fix: move the current body into a named subtest and keep the assertions intact. - -## Resolution - -- Wrapped `TestGlobalDBDeleteTaskMapsChildConstraintToValidationError` in a named `t.Run("ShouldMapChildConstraintFailuresToTaskValidationErrors", ...)` subtest. -- Kept the existing assertions and test data unchanged; this is a conformance fix for the repo's Go test structure requirements. -- Verified with `make verify` (exit `0`). diff --git a/.compozy/tasks/delete-session/reviews-002/issue_006.md b/.compozy/tasks/delete-session/reviews-002/issue_006.md deleted file mode 100644 index f1bd774b5..000000000 --- a/.compozy/tasks/delete-session/reviews-002/issue_006.md +++ /dev/null @@ -1,59 +0,0 @@ ---- -status: resolved -file: internal/task/manager.go -line: 305 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59RYXu,comment:PRRC_kwDOR5y4QM6622vR ---- - -# Issue 006: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**This delete can fail after the task is already gone.** - -Once `m.store.DeleteTask` succeeds, any later `reconcileTaskCascade` error returns a failure even though the resource has already been deleted. That leaves dependent statuses stale and makes client retries observe `404` after a prior `500`. This needs an atomic delete+reconcile path in the store layer, or a guaranteed post-delete repair mechanism. - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/task/manager.go` around lines 293 - 305, The current flow calls -m.store.DeleteTask(ctx, trimmedID) then runs reconcileTaskCascade for -dependents, which can return an error after the resource is already deleted; fix -by making delete+reconcile atomic at the store layer or by guaranteeing -post-delete repair: add a new store method (e.g., -DeleteTaskAndReconcile/DeleteTaskWithCascade) that performs the delete and -dependent reconciliation inside the same transactional boundary (or returns -dependentIDs and reconciliation results) and call that from Manager instead of -m.store.DeleteTask, or if transactions are unavailable, modify manager.go to -treat a successful DeleteTask as definitive (do not return failure on -reconcileTaskCascade errors), log the reconcile error and enqueue an -asynchronous retry/repair job (or schedule background goroutine) so the API -returns success while dependents are eventually reconciled; update references to -m.store.DeleteTask and reconcileTaskCascade accordingly. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: - - `internal/task/manager.go` deletes the primary task first and only then reconciles dependent task statuses. If reconciliation fails after the delete succeeds, the method returns an error even though the target task is already gone. - - That creates a real success-reporting bug: callers can observe a failed delete response and then get `ErrTaskNotFound` on retry, while dependent durable statuses may remain unreconciled. - - Planned fix: run the delete plus dependent reconciliation inside one store-backed transaction when the store supports it, so a reconcile failure rolls back the delete. This will require minimal supporting changes outside the scoped file list in task store interfaces and tests, which will be kept tightly limited and documented here. - -## Resolution - -- Added a transactional delete path so `internal/task/manager.go` now performs task deletion and dependent-status reconciliation inside one store-backed unit when the store supports `WithDeleteTaskTransaction(...)`. -- Extended the global task store and the task manager test harness to support that transactional path and to prove rollback when dependent reconciliation fails. -- This removes the false-failure window where the API could return an error after the task had already been permanently deleted. -- Verified with `make verify` (exit `0`). diff --git a/.compozy/tasks/delete-session/reviews-002/issue_007.md b/.compozy/tasks/delete-session/reviews-002/issue_007.md deleted file mode 100644 index f77183582..000000000 --- a/.compozy/tasks/delete-session/reviews-002/issue_007.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -status: resolved -file: web/src/hooks/routes/use-session-page-controls.test.tsx -line: 137 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4167261241,nitpick_hash:095ae13675d7 -review_hash: 095ae13675d7 -source_review_id: "4167261241" -source_review_submitted_at: "2026-04-24T01:30:33Z" ---- - -# Issue 007: Strengthen the idle delete test by executing callback side effects. -## Review Comment - -Right now it only verifies callback presence. Executing captured `onSuccess`/`onError` would directly validate reset + toast + optional `onDeleteSuccess` behavior. - -## Triage - -- Decision: `valid` -- Notes: - - The idle-delete test in `web/src/hooks/routes/use-session-page-controls.test.tsx` only checks that `mutate` receives callbacks, but it never executes `onSuccess` or `onError`. - - That leaves the reset, toast, and optional `onDeleteSuccess` behavior unverified even though the hook owns those effects. - - Planned fix: capture the mutation options, invoke both callbacks, and assert the expected side effects. - -## Resolution - -- Strengthened the idle-delete hook test to execute the captured `onSuccess` and `onError` callbacks instead of only asserting their presence. -- The test now directly verifies reset behavior, success and error toasts, and the optional `onDeleteSuccess` callback contract owned by the hook. -- Verified with `make verify`, `make web-lint`, and `make web-typecheck` (all exit `0`). diff --git a/.compozy/tasks/delete-session/reviews-002/issue_008.md b/.compozy/tasks/delete-session/reviews-002/issue_008.md deleted file mode 100644 index 16c6663f1..000000000 --- a/.compozy/tasks/delete-session/reviews-002/issue_008.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -status: resolved -file: web/src/hooks/routes/use-session-page-controls.ts -line: 50 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4167261241,nitpick_hash:8484cc96e3e3 -review_hash: 8484cc96e3e3 -source_review_id: "4167261241" -source_review_submitted_at: "2026-04-24T01:30:33Z" ---- - -# Issue 008: Consider applying controlsBusy guard to stop/resume for full control serialization. -## Review Comment - -`handleDelete`/`handleClear` are protected, but `handleStop`/`handleResume` can still fire while another control action is pending. Guarding them too keeps mutation concurrency policy consistent. - -## Triage - -- Decision: `valid` -- Notes: - - `web/src/hooks/routes/use-session-page-controls.ts` serializes delete and clear behind `controlsBusy`, but `handleStop` and `handleResume` do not apply the same guard. - - This is a real orchestration gap because the hook can still dispatch stop/resume while other control mutations are pending, and the header component does not know about `clearMutation.isPending`. - - Planned fix: guard stop/resume in the hook with the same busy-state policy and add regression coverage in the hook test file. - -## Resolution - -- Added the same `controlsBusy` guard used by delete and clear to both `handleStop` and `handleResume` in `web/src/hooks/routes/use-session-page-controls.ts`. -- Added regression coverage proving stop and resume are suppressed while another control mutation is pending. -- Verified with `make verify`, `make web-lint`, and `make web-typecheck` (all exit `0`). diff --git a/.compozy/tasks/delete-session/reviews-002/issue_009.md b/.compozy/tasks/delete-session/reviews-002/issue_009.md deleted file mode 100644 index 7671e000a..000000000 --- a/.compozy/tasks/delete-session/reviews-002/issue_009.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -status: resolved -file: web/src/systems/session/hooks/use-session-actions.test.tsx -line: 170 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4167261241,nitpick_hash:f40554fb62a7 -review_hash: f40554fb62a7 -source_review_id: "4167261241" -source_review_submitted_at: "2026-04-24T01:30:33Z" ---- - -# Issue 009: Also assert list-query invalidation on delete success. -## Review Comment - -This test validates entity cache removal and draft clearing, but it doesn’t assert the `sessionKeys.lists()` invalidation contract from `useDeleteSession`, which is an important regression guard. - -## Triage - -- Decision: `valid` -- Notes: - - `useDeleteSession` explicitly invalidates `sessionKeys.lists()` on success, but `web/src/systems/session/hooks/use-session-actions.test.tsx` only asserts cache removal and draft clearing. - - Missing the list-invalidation assertion leaves an important contract unguarded. - - Planned fix: spy on `queryClient.invalidateQueries` in the delete-success test and assert the list-key invalidation call. - -## Resolution - -- Extended the delete-success test in `web/src/systems/session/hooks/use-session-actions.test.tsx` to assert the `sessionKeys.lists()` invalidation contract. -- The test now guards cache removal, draft clearing, and list refresh behavior together, which closes the missing regression check. -- Verified with `make verify`, `make web-lint`, and `make web-typecheck` (all exit `0`). diff --git a/.compozy/tasks/delete-session/reviews-003/_meta.md b/.compozy/tasks/delete-session/reviews-003/_meta.md deleted file mode 100644 index 464f0bc92..000000000 --- a/.compozy/tasks/delete-session/reviews-003/_meta.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -provider: coderabbit -pr: "58" -round: 3 -created_at: 2026-04-24T02:08:21.240235Z ---- - -## Summary -- Total: 2 -- Resolved: 2 -- Unresolved: 0 diff --git a/.compozy/tasks/delete-session/reviews-003/issue_001.md b/.compozy/tasks/delete-session/reviews-003/issue_001.md deleted file mode 100644 index c71e57d9f..000000000 --- a/.compozy/tasks/delete-session/reviews-003/issue_001.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -status: resolved -file: internal/session/manager_delete_test.go -line: 13 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4167388414,nitpick_hash:1bb5ecf448a0 -review_hash: 1bb5ecf448a0 -source_review_id: "4167388414" -source_review_submitted_at: "2026-04-24T02:02:29Z" ---- - -# Issue 001: LGTM! Consider adding t.Parallel() for faster test execution. -## Review Comment - -The table-driven test structure follows the `t.Run("Should...")` pattern and covers the key scenarios: stopped session removal, active session stop-before-delete, concurrent stop race handling, and error wrapping verification. - -Each subtest creates an isolated harness, so they can run in parallel: - -As per coding guidelines, "Add `t.Parallel()` to independent subtests in Go tests". - -## Triage - -- Decision: `valid` -- Root cause: `TestManagerDelete` uses a table-driven `t.Run(...)` loop but does not mark the independent subtests as parallelizable, so the suite misses the concurrency the workspace Go testing guidelines expect. -- Evidence: each case either creates its own `newHarness(t)` with `t.TempDir()`-backed state or exercises the pure `stopSessionBeforeDelete(...)` helper without shared mutable fixtures, so there is no inter-test coupling that would block `t.Parallel()`. -- Fix approach: add `t.Parallel()` to the parent test and each subtest closure so the suite can execute concurrently while preserving isolated setup and assertions. - -## Resolution - -- Added `t.Parallel()` to `TestManagerDelete` and to each table-driven subtest in `internal/session/manager_delete_test.go`. -- Kept the existing harness-per-case structure intact, so the change improves execution concurrency without changing session-delete behavior. -- Verified with `go test ./internal/session ./internal/task` and `make verify` (both exit `0`). diff --git a/.compozy/tasks/delete-session/reviews-003/issue_002.md b/.compozy/tasks/delete-session/reviews-003/issue_002.md deleted file mode 100644 index f963fe387..000000000 --- a/.compozy/tasks/delete-session/reviews-003/issue_002.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -status: resolved -file: internal/task/manager.go -line: 2105 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59Rqy6,comment:PRRC_kwDOR5y4QM663P3P ---- - -# Issue 002: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Keep canonical status calculation side-effect free.** - -`canonicalTaskStatus()` is used by read paths like `GetTask` and `ListTasks`, but this helper now resolves dependencies by calling `reconcileTaskWithStore()`, which can persist via `store.UpdateTask()`. That means a read-only request can mutate dependency records and effectively perform writes under read authority. - - - -Also applies to: 2295-2302 - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/task/manager.go` around lines 2094 - 2105, The -canonicalTaskStatusWithStore currently calls reconcileTaskWithStore (which can -call store.UpdateTask) and thus performs writes during read operations; change -it to be side-effect free by removing any calls to reconcileTaskWithStore or -other mutating helpers (and by not calling store.UpdateTask), instead deriving -status purely from read-only helpers like hasUnresolvedDependenciesWithStore, -the provided dependencies and runs, and any non-mutating logic; if -reconciliation logic is needed elsewhere, introduce a separate read-only variant -(e.g., reconcileTaskReadOnly or computeCanonicalStatusNoSideEffects) or ensure -reconcileTaskWithStore has a non-mutating mode and use that here, and update -canonicalTaskStatus and callers to use the new read-only pathway. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Root cause: `canonicalTaskStatusWithStore()` asks `hasUnresolvedDependenciesWithStore()` whether dependencies are resolved, and that helper currently calls `reconcileTaskWithStore()` for each dependency. `reconcileTaskWithStore()` can persist through `store.UpdateTask()`, so read-only paths such as `GetTask`, `ListTasks`, and dependency-reference hydration can write task records while computing display status. -- Evidence: `GetTask`, `ListTasks`, `enrichTaskSummaryFromState`, and `taskReference` all route through `canonicalTaskStatus(...)`; the dependency walk in `hasUnresolvedDependenciesWithStore()` is therefore reachable under read authority and currently crosses a mutating reconciliation boundary. -- Fix approach: replace the dependency-status walk used by canonical reads with a recursive, side-effect-free status calculator that only uses `GetTask`, `ListDependencies`, and `ListTaskRuns`. Keep persistence inside explicit reconciliation paths such as `reconcileTaskWithStore()` and `reconcileTaskCascadeWithStore()`. -- Test plan: add regression coverage in `internal/task/manager_test.go` to prove `GetTask`/`ListTasks` compute dependency-derived statuses correctly without mutating stored dependency records. This requires one additional test file beyond the scoped code file because `internal/task/manager.go` has no co-located test cases for this read-only behavior. - -## Resolution - -- Reworked canonical dependency-status evaluation in `internal/task/manager.go` so read-time status calculation uses a recursive, side-effect-free helper instead of `reconcileTaskWithStore()`. -- Left persistence inside explicit reconciliation flows only; `GetTask`, `ListTasks`, and dependency-reference reads now derive the right status without calling `UpdateTask()`. -- Added regression coverage in `internal/task/manager_test.go` that seeds a stale dependency record, exercises both `GetTask` and `ListTasks`, and asserts the returned derived status is correct while the stored dependency status remains unchanged. -- Verified with `go test ./internal/session ./internal/task` and `make verify` (both exit `0`). diff --git a/.compozy/tasks/network-default/reviews-001/_meta.md b/.compozy/tasks/network-default/reviews-001/_meta.md deleted file mode 100644 index 97d05592a..000000000 --- a/.compozy/tasks/network-default/reviews-001/_meta.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -provider: coderabbit -pr: "57" -round: 1 -created_at: 2026-04-24T00:57:29.033415Z ---- - -## Summary -- Total: 4 -- Resolved: 0 -- Unresolved: 4 diff --git a/.compozy/tasks/network-default/reviews-001/issue_001.md b/.compozy/tasks/network-default/reviews-001/issue_001.md deleted file mode 100644 index 58cd62c3e..000000000 --- a/.compozy/tasks/network-default/reviews-001/issue_001.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -status: resolved -file: internal/api/httpapi/helpers_test.go -line: 421 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151161262,nitpick_hash:50d2a42d9b89 -review_hash: 50d2a42d9b89 -source_review_id: "4151161262" -source_review_submitted_at: "2026-04-21T22:49:44Z" ---- - -# Issue 001: Centralize testConfigWithDisabledNetwork. -## Review Comment - -This helper is now duplicated in `internal/api/httpapi/helpers_test.go`, `internal/api/udsapi/helpers_test.go`, and `internal/api/core/test_helpers_test.go`. Moving it into shared testutil would reduce drift the next time config defaults change. - -As per coding guidelines, `**/*_test.go`: "Check for shared test utilities usage to avoid duplication". - -## Triage - -- Decision: `VALID` -- Notes: - - `testConfigWithDisabledNetwork` is currently duplicated in all three cited test helper files, and each copy does the same `aghconfig.DefaultWithHome(homePaths)` plus `cfg.Network.Enabled = false` mutation. - - Root cause: the network-default test additions reused a local helper pattern instead of extracting the shared API-test fixture into `internal/api/testutil`. - - Fix approach: add one shared helper in `internal/api/testutil` and update the three test helper files to call it. This requires minimal out-of-scope edits to `internal/api/udsapi/helpers_test.go`, `internal/api/core/test_helpers_test.go`, and `internal/api/testutil/apitest.go` so the duplication is actually removed rather than moved. - - Implemented: `internal/api/testutil.ConfigWithDisabledNetwork(...)` now owns the config construction and the three package-local helpers delegate to it. - - Verified: focused `go test ./internal/api/httpapi ./internal/api/udsapi ./internal/api/core ./internal/config ./internal/daemon ./internal/testutil/e2e` passed, then `make verify` passed. diff --git a/.compozy/tasks/network-default/reviews-001/issue_002.md b/.compozy/tasks/network-default/reviews-001/issue_002.md deleted file mode 100644 index 24ca01234..000000000 --- a/.compozy/tasks/network-default/reviews-001/issue_002.md +++ /dev/null @@ -1,96 +0,0 @@ ---- -status: resolved -file: internal/config/bootstrap_test.go -line: 192 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58o_sg,comment:PRRC_kwDOR5y4QM66AfBM ---- - -# Issue 002: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Use required `t.Run("Should...")` subtest pattern for these new scenarios** - -These new cases are added as standalone tests; project test guidelines require table-driven subtests with `t.Run("Should...")` for test cases. - -
-♻️ Suggested structure - -```diff --func TestSaveBootstrapConfigFirstRunKeepsNetworkEnabledByDefault(t *testing.T) { -- t.Parallel() -- ... --} -- --func TestSaveBootstrapConfigPreservesExplicitNetworkDisable(t *testing.T) { -- t.Parallel() -- ... --} -+func TestSaveBootstrapConfigNetworkBehavior(t *testing.T) { -+ t.Parallel() -+ -+ tests := []struct { -+ name string -+ seed string -+ }{ -+ { -+ name: "ShouldKeepNetworkEnabledByDefaultOnFirstRun", -+ seed: "", -+ }, -+ { -+ name: "ShouldPreserveExplicitNetworkDisable", -+ seed: "[network]\nenabled = false\ndefault_channel = \"legacy\"\n", -+ }, -+ } -+ -+ for _, tc := range tests { -+ tc := tc -+ t.Run(tc.name, func(t *testing.T) { -+ t.Parallel() -+ // arrange/act/assert... -+ }) -+ } -+} -``` -
- - -As per coding guidelines, `**/*_test.go`: “MUST use t.Run("Should...") pattern for ALL test cases” and “Use table-driven tests with subtests (`t.Run`) as default pattern for Go tests”. - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/config/bootstrap_test.go` around lines 110 - 192, The two standalone -tests (TestSaveBootstrapConfigFirstRunKeepsNetworkEnabledByDefault and -TestSaveBootstrapConfigPreservesExplicitNetworkDisable) violate the project -guideline requiring table-driven subtests; refactor them into a single -table-driven test that iterates over cases and calls t.Run("Should ...", func(t -*testing.T){ ... }) for each scenario, referencing the same helpers -(ResolveHomePathsFrom, SaveBootstrapConfig, LoadGlobalConfig, writeFile) inside -each subtest; ensure each subtest calls t.Parallel() as appropriate, uses -descriptive "Should..." names, and asserts the same expectations currently in -the two functions (first-run default network enabled and preserving explicit -network disabled + default_channel) so behavior and file contents checks remain -unchanged. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - The current file still has two standalone tests, `TestSaveBootstrapConfigFirstRunKeepsNetworkEnabledByDefault` and `TestSaveBootstrapConfigPreservesExplicitNetworkDisable`, rather than a shared table-driven subtest. - - Root cause: the new network-default scenarios were added incrementally as top-level tests instead of following the repo's default `t.Run("Should...")` pattern used elsewhere in `internal/config`. - - Fix approach: collapse the two scenarios into one table-driven test with `Should...` subtest names, keep the existing assertions intact, and preserve per-case parallel execution. - - Implemented: the standalone tests were merged into `TestSaveBootstrapConfigNetworkBehavior` with parallel `Should...` subtests and the original expectations preserved. - - Verified: focused `go test ./internal/api/httpapi ./internal/api/udsapi ./internal/api/core ./internal/config ./internal/daemon ./internal/testutil/e2e` passed, then `make verify` passed. diff --git a/.compozy/tasks/network-default/reviews-001/issue_003.md b/.compozy/tasks/network-default/reviews-001/issue_003.md deleted file mode 100644 index 3f0f02573..000000000 --- a/.compozy/tasks/network-default/reviews-001/issue_003.md +++ /dev/null @@ -1,66 +0,0 @@ ---- -status: resolved -file: internal/daemon/daemon_test.go -line: 4350 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58o_sk,comment:PRRC_kwDOR5y4QM66AfBQ ---- - -# Issue 003: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Add compile-time interface assertions for new test doubles** - -These newly added methods/types are interface-contract critical for this test path; explicit compile-time checks will prevent accidental drift. - -
-🧩 Suggested addition - -```diff -+var ( -+ _ networkBindableSessionManager = (*fakeSessionManager)(nil) -+ _ syntheticPrompter = (*fakeSessionManager)(nil) -+ _ syntheticPrompter = nonBindableHarnessSessionManager{} -+) -``` -
- - -As per coding guidelines, `**/*.go`: “Use compile-time interface verification: `var _ Interface = (*Type)(nil)`”. - - -Also applies to: 4476-4491 - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/daemon/daemon_test.go` around lines 4333 - 4350, Add compile-time -interface assertions for the test double type to prevent future drift: add lines -like `var _ session.Manager = (*fakeSessionManager)(nil)` (or the concrete -session interface type used in production) next to the fakeSessionManager -definition to assert it implements SetNetworkPeerLifecycle, SetTurnEndNotifier, -PromptNetwork, and IsPrompting; also add the same form of assertion for any -other newly added test doubles mentioned (e.g., the types around lines -4476-4491) so the compiler verifies the interface contract. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - The new daemon test doubles currently rely on implicit interface satisfaction only; there are no compile-time assertions covering `fakeSessionManager`, `fakeNetworkBindableSessionManager`, or the synthetic prompt wrapper types near the cited lines. - - Root cause: new test helpers were added for the network-binding path without the repo-standard interface assertions that would catch future drift at compile time. - - Fix approach: add explicit `var _ ...` assertions adjacent to the test double declarations for the production/test interfaces they are expected to satisfy. - - Implemented: compile-time assertions now pin the session-manager, network-bindable, and synthetic-prompter test doubles to their intended interfaces. - - Verified: focused `go test ./internal/api/httpapi ./internal/api/udsapi ./internal/api/core ./internal/config ./internal/daemon ./internal/testutil/e2e` passed, then `make verify` passed. diff --git a/.compozy/tasks/network-default/reviews-001/issue_004.md b/.compozy/tasks/network-default/reviews-001/issue_004.md deleted file mode 100644 index 456fa7cb4..000000000 --- a/.compozy/tasks/network-default/reviews-001/issue_004.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -status: resolved -file: internal/testutil/e2e/runtime_harness_test.go -line: 35 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151161262,nitpick_hash:c37851268e75 -review_hash: c37851268e75 -source_review_id: "4151161262" -source_review_submitted_at: "2026-04-21T22:49:44Z" ---- - -# Issue 004: Split the three network scenarios into subtests. -## Review Comment - -This packs three distinct behaviors into one test body, so the first failure hides the rest and the cases cannot be parallelized independently. A small table with `t.Run("Should ...")` would fit the suite better. - -As per coding guidelines, `**/*_test.go`: "Use table-driven tests with subtests (`t.Run`) as default pattern for Go tests" and "Add `t.Parallel()` to independent subtests in Go tests". - -## Triage - -- Decision: `VALID` -- Notes: - - `TestPrepareRuntimeLayoutUsesEnabledNetworkByDefaultAndAllowsExplicitDisable` currently checks three distinct behaviors in one body: default enablement, explicit disablement, and `EnableNetwork` overriding a disabled seed. - - Root cause: the test was written as one linear scenario instead of the repo-default subtest structure, which hides later failures and blocks independent parallelization. - - Fix approach: rewrite the test as a small table-driven suite with `Should...` subtests and keep the current network expectations per scenario. - - Implemented: the three scenarios now run as parallel `Should...` subtests with the same enabled/disabled assertions. - - Verified: focused `go test ./internal/api/httpapi ./internal/api/udsapi ./internal/api/core ./internal/config ./internal/daemon ./internal/testutil/e2e` passed, then `make verify` passed. diff --git a/.compozy/tasks/network-redesign/reviews-001/_meta.md b/.compozy/tasks/network-redesign/reviews-001/_meta.md deleted file mode 100644 index 5cb9d2d58..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/_meta.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -provider: coderabbit -pr: "59" -round: 1 -created_at: 2026-04-24T00:56:19.332411Z ---- - -## Summary -- Total: 26 -- Resolved: 0 -- Unresolved: 26 diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_001.md b/.compozy/tasks/network-redesign/reviews-001/issue_001.md deleted file mode 100644 index 279513e41..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_001.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -status: resolved -file: extensions/bridges/whatsapp/provider_test.go -line: 1804 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151559901,nitpick_hash:dc73e086f285 -review_hash: dc73e086f285 -source_review_id: "4151559901" -source_review_submitted_at: "2026-04-22T01:22:21Z" ---- - -# Issue 001: Keep the longer timeout local to the flaky path. -## Review Comment - -`waitForCondition` is shared across this file, so Line 1804 makes every broken condition take up to 5s to fail. If only one scenario needed extra slack, prefer a per-call timeout (or a dedicated helper) so unrelated regressions still fail fast. - -## Triage - -- Decision: `invalid` -- Reasoning: `waitForCondition()` is scoped to this integration-only test file, and every current caller waits on subprocess startup, HTTP ingress, file markers, or batched runtime state. The 5s budget is intentional for this whole suite, not just a single call site, so splitting timeouts per helper call would add churn without addressing a demonstrated regression. -- Resolution: no code change. The shared 5s timeout remains intentional for this integration-only WhatsApp provider suite. -- Verification: `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_002.md b/.compozy/tasks/network-redesign/reviews-001/issue_002.md deleted file mode 100644 index 4cd6d8264..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_002.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -status: resolved -file: internal/acp/types.go -line: 396 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIeF,comment:PRRC_kwDOR5y4QM66CAkm ---- - -# Issue 002: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Bound deferred tool-result buffering.** - -`pendingToolResults` now grows until a matching tool call arrives or the turn finishes. Because ACP session updates are agent-originated and flow straight into `emitPromptEvent`, a noisy or misordered agent can keep sending unique `ToolCallID`s here and retain them for the full prompt. Please cap or dedupe this buffer by `ToolCallID` so one bad session cannot grow memory without bound. - - - -Also applies to: 587-599 - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/acp/types.go` around lines 395 - 396, pendingToolResults can grow -unbounded because unique ToolCallID values are appended until a matching tool -call arrives; update the buffering logic around pendingToolResults and -seenToolCalls (and the code paths that push into pendingToolResults, e.g., -emitPromptEvent and the related handler at the other location referenced) to -dedupe by ToolCallID using seenToolCalls (skip adding if already seen) and -enforce a hard cap (e.g., maxPendingToolResults constant) so when the cap is -reached you drop oldest entries or refuse new ones; ensure the dedupe check uses -the AgentEvent.ToolCallID (or equivalent field) and that seenToolCalls is kept -in sync when removing entries. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Reasoning: `activePromptState.deferToolResultLocked()` appends every unseen deferred tool result and never bounds `pendingToolResults`. A misordered or noisy agent can therefore retain unbounded `ToolCallID` state until the prompt ends. -- Fix plan: dedupe deferred tool results per `ToolCallID`, add a hard cap with oldest-entry eviction, keep the deferred-ID bookkeeping in sync when entries are flushed or dropped, and add ACP tests that cover duplicate buffering and cap behavior. -- Resolution: added deferred `ToolCallID` dedupe, a hard `maxPendingToolResults` cap with oldest-entry eviction, synchronized pending-ID bookkeeping across flush and drop paths, and ACP regression coverage for duplicate and capped buffering. -- Verification: `go test ./internal/acp` and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_003.md b/.compozy/tasks/network-redesign/reviews-001/issue_003.md deleted file mode 100644 index a8404f67a..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_003.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -status: resolved -file: internal/acp/types_test.go -line: 138 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIeD,comment:PRRC_kwDOR5y4QM66CAki ---- - -# Issue 003: _🛠️ Refactor suggestion_ | _🟠 Major_ -## Review Comment - -_🛠️ Refactor suggestion_ | _🟠 Major_ - -**Wrap the new cases in the required `t.Run("Should...")` form.** - -These additions are useful, but they introduce more top-level tests instead of the required subtest style. Please move them under `t.Run("Should defer tool results until the tool call")` / `t.Run("Should flush deferred tool results before done")` so this file stays consistent with the enforced Go test pattern. - - -As per coding guidelines, `Use table-driven tests with subtests (t.Run) as default pattern for Go tests` and `MUST use t.Run("Should...") pattern for ALL test cases`. - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/acp/types_test.go` around lines 89 - 138, Wrap each new top-level -test body in a t.Run subtest using the required "Should..." titles: inside -TestEmitPromptEventDefersToolResultUntilToolCall call t.Run("Should defer tool -results until the tool call", func(t *testing.T) { t.Parallel(); /* move the -current body here */ }) and inside -TestEmitPromptEventFlushesDeferredToolResultsBeforeDone call t.Run("Should flush -deferred tool results before done", func(t *testing.T) { t.Parallel(); /* move -the current body here */ }), preserving the use of AgentProcess, beginPrompt, -emitPromptEvent and reads from active.events unchanged; ensure t.Parallel() is -invoked inside the subtest functions (not only at the top) so the tests follow -the project's t.Run("Should...") subtest pattern. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Reasoning: the two new ACP tests are top-level one-off bodies instead of the repo's required `t.Run("Should...")` structure. Bringing them into named subtests keeps the file aligned with the enforced Go test pattern and makes failures easier to localize. -- Fix plan: wrap each top-level body in a `t.Run("Should...")` subtest with its own `t.Parallel()` while preserving the existing assertions. -- Resolution: wrapped the ACP regression coverage in named `t.Run("Should...")` subtests so failures stay localized and the file matches the repo's Go test pattern. -- Verification: `go test ./internal/acp` and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_004.md b/.compozy/tasks/network-redesign/reviews-001/issue_004.md deleted file mode 100644 index d301d9a1e..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_004.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -status: resolved -file: internal/api/core/coverage_helpers_test.go -line: 125 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIeH,comment:PRRC_kwDOR5y4QM66CAkp ---- - -# Issue 004: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Add one case for the new persisted-channel input.** - -These assertions only adapt the call sites to the new `networkChannelExists(..., ..., ..., channel)` signature, but they still pass `nil` for the new source every time. That leaves the durable-channel branch introduced in this PR untested. - - -As per coding guidelines, "Focus on critical paths: workflow execution, state management, error handling". - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/api/core/coverage_helpers_test.go` around lines 118 - 125, The tests -call networkChannelExists(...) with nil for the new persisted-channel parameter, -leaving the durable/persisted-channel branch untested; update -coverage_helpers_test.go to add at least one assertion that passes a non-nil -persistedChannels value (e.g., a slice/map containing "match" or -"sessionChannel") into networkChannelExists and assert true, and add a -complementary case where persistedChannels does not include the channel and -assert false so the persisted-channel branch in networkChannelExists is covered. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Reasoning: the current helper coverage only exercises `networkChannelExists()` through session and peer matches plus the missing case. The persisted metadata branch (`metadata != nil`) is reachable in production but currently uncovered. -- Fix plan: add explicit persisted-metadata true/false coverage in `coverage_helpers_test.go` and keep the helper assertions organized as named subtests. -- Resolution: added explicit coverage for the persisted-metadata branch in `networkChannelExists()` and kept the helper assertions organized as named subtests. -- Verification: `go test ./internal/api/core` and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_005.md b/.compozy/tasks/network-redesign/reviews-001/issue_005.md deleted file mode 100644 index 5c38a0c59..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_005.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -status: resolved -file: internal/api/core/coverage_helpers_test.go -line: 354 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151559901,nitpick_hash:ddfa247ed8db -review_hash: ddfa247ed8db -source_review_id: "4151559901" -source_review_submitted_at: "2026-04-22T01:22:21Z" ---- - -# Issue 005: Split the new helper assertions into named subtests. -## Review Comment - -The preview and payload cases are independent now, but they are still bundled into one large test body. Breaking them into `t.Run("Should...")` cases will make failures much easier to localize. - -As per coding guidelines, "MUST use t.Run(\"Should...\") pattern for ALL test cases". - -## Triage - -- Decision: `valid` -- Reasoning: `TestStatusForBundleErrorAndChannelHelpers` currently bundles the network helper assertions into one linear body, so a failure does not identify which helper path regressed. Splitting them into named `t.Run("Should...")` cases matches the project test style and pairs naturally with the additional persisted-channel coverage. -- Fix plan: split the helper assertions into focused subtests for session visibility, peer visibility, persisted metadata, missing channels, and not-found error detection. -- Resolution: split the bundled network helper assertions into focused named subtests for session visibility, peer visibility, persisted metadata, missing channels, and not-found error detection. -- Verification: `go test ./internal/api/core` and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_006.md b/.compozy/tasks/network-redesign/reviews-001/issue_006.md deleted file mode 100644 index 77c9ed77a..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_006.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -status: resolved -file: internal/api/core/interfaces.go -line: 106 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151559901,nitpick_hash:3aca0e96c0c4 -review_hash: 3aca0e96c0c4 -source_review_id: "4151559901" -source_review_submitted_at: "2026-04-22T01:22:21Z" ---- - -# Issue 006: Refresh the NetworkStore doc comment. -## Review Comment - -It now owns channel metadata CRUD as well, so "audit and timeline queries" undersells the actual contract and makes the interface easier to misuse. - -## Triage - -- Decision: `valid` -- Reasoning: the `NetworkStore` interface now exposes persisted channel CRUD in addition to audit and timeline reads. The existing comment underspecifies the contract and can mislead callers about what the store owns. -- Fix plan: refresh the interface doc comment so it accurately describes audit, channel metadata CRUD, and timeline responsibilities. -- Resolution: refreshed the `NetworkStore` doc comment so it now describes audit access, channel metadata CRUD, and timeline responsibilities. -- Verification: `go test ./internal/api/core` and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_007.md b/.compozy/tasks/network-redesign/reviews-001/issue_007.md deleted file mode 100644 index 79da3eedc..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_007.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -status: resolved -file: internal/api/core/network_details.go -line: 558 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIeI,comment:PRRC_kwDOR5y4QM66CAkr ---- - -# Issue 007: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Last message preview can disappear when metadata wins the timestamp race.** - -`applyNetworkChannelMetadata()` can move `aggregate.lastActivityAt` past the newest message, and `aggregateMessageIsLatest()` then rejects every message preview. A just-created channel can hit this if the metadata row is written after the first envelope, leaving the room list with messages but no preview. Track the latest message timestamp separately from overall activity when deciding `lastMessagePreview`. - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/api/core/network_details.go` around lines 541 - 558, The issue is -that applyNetworkChannelMetadata can advance aggregate.lastActivityAt past the -newest message timestamp, causing aggregateMessageIsLatest to reject message -previews; fix by tracking the latest message timestamp separately (e.g., add a -field like latestMessageAt or lastMessageTimestamp to networkChannelAggregate), -update ensureNetworkChannelAggregate to initialize that field, update the loop -where messages are processed to set/advance aggregate.latestMessageAt based on -message.Timestamp (independent of lastActivityAt), and change -aggregateMessageIsLatest (or the preview check) to compare message.Timestamp -against aggregate.latestMessageAt instead of aggregate.lastActivityAt so -metadata updates no longer suppress previews. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Reasoning: `applyNetworkChannelMetadata()` advances `aggregate.lastActivityAt`, and `aggregateMessageIsLatest()` compares message timestamps against that same field. If metadata updates after the newest persisted message, preview selection can reject every message even though the channel has traffic. -- Fix plan: track latest message time independently from overall activity, base preview selection on that message-only timestamp, and add coverage proving metadata updates do not erase the preview. -- Resolution: split latest-message tracking from overall activity tracking, based preview selection on the newest message timestamp, and added regression coverage showing newer metadata does not erase message previews. -- Verification: `go test ./internal/api/core` and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_008.md b/.compozy/tasks/network-redesign/reviews-001/issue_008.md deleted file mode 100644 index 8ac2de845..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_008.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -status: resolved -file: internal/network/audit_test.go -line: 167 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151559901,nitpick_hash:f76f09563762 -review_hash: f76f09563762 -source_review_id: "4151559901" -source_review_submitted_at: "2026-04-22T01:22:21Z" ---- - -# Issue 008: Run these subtests in parallel. -## Review Comment - -Each case builds its own writer/store and does not share state, so adding `t.Parallel()` inside the subtests would keep this aligned with the repo's default test pattern. - -As per coding guidelines, "Add `t.Parallel()` to independent subtests in Go tests". - -## Triage - -- Decision: `valid` -- Reasoning: the affected subtests each build isolated store/writer fixtures and do not share mutable state. Adding `t.Parallel()` inside those subtests is safe and brings the file back in line with the repo's default independent-subtest pattern. -- Fix plan: add `t.Parallel()` to the independent audit-writer subtests only. -- Resolution: added `t.Parallel()` to the independent audit-writer subtests while keeping shared setup unchanged. -- Verification: `go test ./internal/network` and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_009.md b/.compozy/tasks/network-redesign/reviews-001/issue_009.md deleted file mode 100644 index 939db3957..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_009.md +++ /dev/null @@ -1,86 +0,0 @@ ---- -status: resolved -file: internal/store/globaldb/global_db.go -line: 142 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIeX,comment:PRRC_kwDOR5y4QM66CAk8 ---- - -# Issue 009: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Add workspace referential integrity to `network_channels`.** - -`workspace_id` is free text here, so deleting a workspace can leave orphaned channel rows behind. Since this table is workspace-scoped state, it should reference `workspaces(id)` directly and cascade on delete. - -
-Suggested schema change - -```diff - `CREATE TABLE IF NOT EXISTS network_channels ( - channel TEXT PRIMARY KEY, -- workspace_id TEXT NOT NULL, -+ workspace_id TEXT NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE, - purpose TEXT NOT NULL, - created_by TEXT NOT NULL DEFAULT '', - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL - );`, -``` -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion - `CREATE TABLE IF NOT EXISTS network_channels ( - channel TEXT PRIMARY KEY, - workspace_id TEXT NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE, - purpose TEXT NOT NULL, - created_by TEXT NOT NULL DEFAULT '', - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL - );`, -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/store/globaldb/global_db.go` around lines 135 - 142, The -network_channels table currently defines workspace_id as free text which can -leave orphaned rows; modify the CREATE TABLE for network_channels to make -workspace_id a proper foreign key referencing workspaces(id) with ON DELETE -CASCADE (i.e., add a FOREIGN KEY (workspace_id) REFERENCES workspaces(id) ON -DELETE CASCADE and ensure the column types match the referenced id), and update -any migration/initializer that uses the network_channels schema so the -constraint is applied for new deployments (or provide an ALTER TABLE migration -to add the foreign key for existing DBs). -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Reasoning: `network_channels.workspace_id` is currently free text, so deleting a workspace can leave orphaned channel metadata behind. Because this table is workspace-scoped durable state, the schema should enforce the relationship directly. -- Fix plan: add `REFERENCES workspaces(id) ON DELETE CASCADE` to the table definition, update the existing-schema migration path so older databases are rebuilt with the foreign key, and add schema/migration coverage. This requires a minimal migration change outside the listed file set because `CREATE TABLE` only fixes new databases. -- Resolution: added a workspace foreign key with `ON DELETE CASCADE` to `network_channels`, updated the legacy schema migration path in `migrate_workspace.go` to rebuild existing tables safely, and added schema and migration coverage. -- Verification: `go test ./internal/store/globaldb` and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_010.md b/.compozy/tasks/network-redesign/reviews-001/issue_010.md deleted file mode 100644 index 731cf65c6..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_010.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -status: resolved -file: internal/store/globaldb/global_db.go -line: 143 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4151559901,nitpick_hash:b9f10ce38600 -review_hash: b9f10ce38600 -source_review_id: "4151559901" -source_review_submitted_at: "2026-04-22T01:22:21Z" ---- - -# Issue 010: Use a composite index for workspace channel listings. -## Review Comment - -Two single-column indexes do not help much for the common `WHERE workspace_id = ? ORDER BY updated_at DESC` path. A composite index on workspace and activity time will serve the new room-list query much better. - -## Triage - -- Decision: `valid` -- Reasoning: the common channel-list query filters by `workspace_id` and sorts by `updated_at DESC, channel ASC`, but the schema only provides single-column indexes. A composite index better matches that access path and avoids extra sorting work for workspace-scoped listings. -- Fix plan: add a composite workspace/activity index to the schema and assert its presence in the channel schema test. -- Resolution: added the composite `network_channels(workspace_id, updated_at DESC, channel ASC)` index and asserted its presence in the schema test. -- Verification: `go test ./internal/store/globaldb` and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_011.md b/.compozy/tasks/network-redesign/reviews-001/issue_011.md deleted file mode 100644 index 844f9eb9d..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_011.md +++ /dev/null @@ -1,74 +0,0 @@ ---- -status: resolved -file: internal/store/globaldb/global_db_network_channels.go -line: 25 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIeN,comment:PRRC_kwDOR5y4QM66CAkv ---- - -# Issue 011: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Canonicalize `entry.Channel` before writing it.** - -`GetNetworkChannel()` and `DeleteNetworkChannel()` both trim the lookup key, but `WriteNetworkChannel()` persists `entry.Channel` as-is. A value like `" coord.core "` can therefore be inserted successfully and then become unreachable through the trimmed read/delete paths. Trim the channel before validation/persistence so all CRUD methods use the same canonical key. - -
-🔧 Suggested change - -```diff - func (g *GlobalDB) WriteNetworkChannel(ctx context.Context, entry store.NetworkChannelEntry) error { -+ entry.Channel = strings.TrimSpace(entry.Channel) -+ entry.CreatedBy = strings.TrimSpace(entry.CreatedBy) - if err := g.checkReady(ctx, "write network channel"); err != nil { - return err - } - if err := entry.Validate(); err != nil { - return fmt.Errorf("store: validate network channel entry: %w", err) -@@ - entry.Channel, - entry.WorkspaceID, - entry.Purpose, -- strings.TrimSpace(entry.CreatedBy), -+ entry.CreatedBy, - store.FormatTimestamp(entry.CreatedAt), - store.FormatTimestamp(entry.UpdatedAt), - ); err != nil { -``` -
- - -Also applies to: 28-52, 64-75, 139-149 - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/store/globaldb/global_db_network_channels.go` around lines 14 - 25, -Trim the channel string before validating or persisting it in -WriteNetworkChannel: call strings.TrimSpace on entry.Channel at the top of -WriteNetworkChannel (before entry.Validate() and before setting -CreatedAt/UpdatedAt) so the stored key matches the trimmed lookup key; do the -same canonicalization in the related functions GetNetworkChannel and -DeleteNetworkChannel (and any other write/update handlers in this file -referenced in the diff) to ensure all CRUD operations use the same trimmed -channel key. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Reasoning: `WriteNetworkChannel()` validates and persists `entry.Channel` before trimming it, while `GetNetworkChannel()` and `DeleteNetworkChannel()` normalize lookup keys with `strings.TrimSpace()`. A channel written with leading or trailing whitespace can therefore become unreachable through the read/delete paths. -- Fix plan: canonicalize `entry.Channel` before validation and persistence, keep `CreatedBy` trimming centralized, and add coverage for write/read/delete behavior with padded channel names. -- Resolution: canonicalized channel names before validation and persistence, kept creator trimming centralized, and added coverage for padded channel write/read/delete behavior. -- Verification: `go test ./internal/store/globaldb` and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_012.md b/.compozy/tasks/network-redesign/reviews-001/issue_012.md deleted file mode 100644 index abb2756ba..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_012.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -status: resolved -file: internal/store/globaldb/global_db_network_channels_test.go -line: 94 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIeK,comment:PRRC_kwDOR5y4QM66CAkt ---- - -# Issue 012: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Don't assert a stable order until the store query orders rows.** - -`internal/store/globaldb/global_db_network_channels.go:88-137` currently builds `SELECT ... FROM network_channels` without `ORDER BY` or `LIMIT`, so `entries[0].Channel == "ops.alerts"` depends on SQLite's incidental row order and `Limit: 10` is not actually being exercised. Either sort/limit in the store query and assert that here, or compare the returned channels as a set. - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/store/globaldb/global_db_network_channels_test.go` around lines 81 - -94, The test assumes a stable row order and a working LIMIT, but the store query -in global_db_network_channels.go (the ListNetworkChannels implementation) -currently omits ORDER BY and LIMIT; update the store code to apply a -deterministic ORDER BY (e.g., by Channel or created_at) and honor Limit from -NetworkChannelQuery inside ListNetworkChannels, or alternatively change the test -in global_db_network_channels_test.go to assert the returned channels as an -unordered set (compare entries' Channel values regardless of order) and verify -the length respects Limit; reference ListNetworkChannels, NetworkChannelQuery, -entries, and global_db_network_channels.go when making the change. -``` - -
- - - - - -## Triage - -- Decision: `invalid` -- Reasoning: `ListNetworkChannels()` already applies `ORDER BY updated_at DESC, channel ASC` and routes `query.Limit` through `store.AppendLimit()`. The test is asserting deterministic behavior against the current implementation, so the specific review concern is stale relative to the code under review. -- Resolution: no code change. The current implementation already applies the requested ordering and limit handling. -- Verification: `go test ./internal/store/globaldb` and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_013.md b/.compozy/tasks/network-redesign/reviews-001/issue_013.md deleted file mode 100644 index 5852e7567..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_013.md +++ /dev/null @@ -1,24 +0,0 @@ ---- -status: resolved -file: internal/store/globaldb/global_db_network_messages_test.go -line: 128 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4166737115,nitpick_hash:e9dd9f7a7931 -review_hash: e9dd9f7a7931 -source_review_id: "4166737115" -source_review_submitted_at: "2026-04-23T23:14:00Z" ---- - -# Issue 013: Cover cursor tie-breaks with equal timestamps. -## Review Comment - -The cursor pagination test only exercises strictly increasing timestamp values. To fully validate the `(timestamp, message_id)` ordering contract, add at least one pair of entries with the same timestamp but different message IDs (e.g., `msg-2a` and `msg-2b` at `recordedAt.Add(time.Minute)`). This ensures entries sharing a timestamp aren't skipped or duplicated during pagination. - -## Triage - -- Decision: `valid` -- Reasoning: the cursor pagination test only covers strictly increasing timestamps. Since `ListNetworkMessages()` orders by `(timestamp, message_id)`, the current test misses the tie-break case where multiple rows share a timestamp. -- Fix plan: add equal-timestamp entries with distinct `message_id` values and update the before/after assertions to verify the full `(timestamp, message_id)` ordering contract. -- Resolution: expanded the cursor pagination fixtures with equal-timestamp message IDs and updated the assertions to verify deterministic `(timestamp, message_id)` ordering. -- Verification: `go test ./internal/store/globaldb` and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_014.md b/.compozy/tasks/network-redesign/reviews-001/issue_014.md deleted file mode 100644 index 2402e7f4d..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_014.md +++ /dev/null @@ -1,47 +0,0 @@ ---- -status: resolved -file: internal/store/globaldb/global_db_network_messages_test.go -line: 218 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIeT,comment:PRRC_kwDOR5y4QM66CAk2 ---- - -# Issue 014: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Cover cursor tie-breaks with equal timestamps.** - -This only exercises strictly increasing `Timestamp` values. If `ListNetworkMessages` compares cursors by timestamp alone, entries that share the same timestamp can still be skipped or duplicated while this test passes. Please add at least one same-timestamp pair with different `MessageID`s so the before/after cases validate the `(timestamp, message_id)` ordering contract. - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/store/globaldb/global_db_network_messages_test.go` around lines 127 -- 206, TestGlobalDBListNetworkMessagesSupportsMessageIDCursors currently only -uses strictly increasing Timestamp values; add at least one pair of entries that -share the exact same Timestamp but have different MessageID values (e.g., -"msg-2a" and "msg-2b") and write them via globalDB.WriteNetworkMessage so the -test covers tie-break ordering; then update the subsequent ListNetworkMessages -queries and assertions (the before and after checks) to account for the -deterministic (timestamp, message_id) ordering expected by ListNetworkMessages -so the test verifies no skips/duplicates when timestamps are equal. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Reasoning: this is the same gap as issue 013 expressed against the before/after cursor cases. Without equal-timestamp fixtures, the test can pass even if tie-break ordering skips or duplicates rows. -- Fix plan: cover both issues with one test update that inserts same-timestamp messages and verifies the cursor windows remain stable. -- Resolution: the same equal-timestamp fixture update now verifies the before and after cursor windows do not skip or duplicate rows when timestamps match. -- Verification: `go test ./internal/store/globaldb` and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_015.md b/.compozy/tasks/network-redesign/reviews-001/issue_015.md deleted file mode 100644 index 3dea6b929..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_015.md +++ /dev/null @@ -1,77 +0,0 @@ ---- -status: resolved -file: web/src/hooks/routes/use-network-page.ts -line: 475 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIec,comment:PRRC_kwDOR5y4QM66CAlC ---- - -# Issue 015: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Normalize `channel`/`peer` search params to a single target.** - -`validateNetworkSearch()` can return both `channel` and `peer`, but `activeRoomItem` always checks `search.peer` first. If a stale `peer` param is present and does not resolve, it suppresses an otherwise valid `channel` selection and leaves the page without an active room. Either make the params mutually exclusive during validation or explicitly fall back to `channel` when the peer lookup misses. - -
-🔧 One simple fix - -```diff - function validateNetworkSearch(search: Record): NetworkRouteSearch { -+ const channel = normalizeSearchValue(search.channel); -+ const peer = normalizeSearchValue(search.peer); - const kindValue = normalizeSearchValue(search.kind); - const normalizedKind = - kindValue === "all" || (kindValue && toNetworkKindFilter(kindValue)) - ? (kindValue as NetworkKindFilter) - : undefined; - - return { -- channel: normalizeSearchValue(search.channel), -+ channel: peer ? undefined : channel, - details: search.details === "closed" ? "closed" : undefined, - kind: normalizedKind === "all" ? undefined : normalizedKind, -- peer: normalizeSearchValue(search.peer), -+ peer, - }; - } -``` -
- - -Also applies to: 562-573 - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/hooks/routes/use-network-page.ts` around lines 462 - 475, -validateNetworkSearch currently can return both channel and peer which lets a -stale peer param suppress a valid channel selection; update -validateNetworkSearch to make channel and peer mutually exclusive by preferring -channel when both are present: compute normalizedChannel = -normalizeSearchValue(search.channel) and normalizedPeer = -normalizeSearchValue(search.peer), then if normalizedChannel is truthy return -channel=normalizedChannel and peer=undefined, else return peer=normalizedPeer -and channel=undefined (keeping existing handling for kind/details). Apply the -same change to the other identical validation block referenced in the diff so -only one of channel/peer is ever returned. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Reasoning: `validateNetworkSearch()` currently allows both `channel` and `peer`, but `activeRoomItem` resolves `search.peer` first. A stale peer param can therefore suppress a valid channel selection and leave the page with no active room. -- Fix plan: normalize the route search so `channel` and `peer` are mutually exclusive, preferring the explicit channel target when both are present, and add route coverage in the existing network page test file. This requires a minimal test update outside the listed scope because the route behavior is already exercised there. -- Resolution: normalized network route search so `channel` and `peer` are mutually exclusive, preferring a valid channel target when both are present, and added route coverage for the stale-peer case. -- Verification: `bun run test:raw src/routes/_app/-network.test.tsx src/systems/network/components/network-create-channel-dialog.test.tsx`, `make web-lint`, `make web-typecheck`, and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_016.md b/.compozy/tasks/network-redesign/reviews-001/issue_016.md deleted file mode 100644 index 220f00386..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_016.md +++ /dev/null @@ -1,54 +0,0 @@ ---- -status: resolved -file: web/src/hooks/routes/use-network-page.ts -line: 573 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIeg,comment:PRRC_kwDOR5y4QM66CAlH ---- - -# Issue 016: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Keep sidebar filtering separate from active-room selection.** - -`activeRoomItem` is currently resolved from `filteredChannels` / `filteredPeers`, and the effect below immediately syncs that result back into the URL. That means typing a sidebar query can change the selected room (or clear it) just because the current room no longer matches the filter. Active-room resolution should come from the full room set, with filtering applied only to what the sidebar renders. - - - -Also applies to: 575-593 - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/hooks/routes/use-network-page.ts` around lines 522 - 573, -activeRoomItem is being derived from filteredChannels/filteredPeers so typing a -sidebar query can change/clear the selected room; instead compute the active -room from the full room set (allChannels/allPeers) and only use -filteredChannels/filteredPeers for what the sidebar renders. Concretely, build -unfiltered room arrays by mapping allChannels and allPeers with -makeChannelRoomItem/makePeerRoomItem (respecting starredChannels, readMarkers, -selectedRoomKey) and use those unfiltered arrays to compute activeRoomItem, -while leaving starredChannelRooms/channelRooms/directRooms (used for rendering) -to be created from filteredChannels/filteredPeers; update the activeRoomItem -useMemo dependencies to reference the new unfiltered arrays (and apply the same -fix around the similar block at lines 575-593). -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Reasoning: `activeRoomItem` is derived from `filteredChannels` and `filteredPeers`, and the sync effect writes that derived room back into the URL. Typing a sidebar filter can therefore clear or change the active selection even though the user did not choose a new room. -- Fix plan: derive the active room from the full unfiltered room sets while keeping sidebar rendering filtered, then add route coverage proving the current room remains selected during sidebar filtering. This requires a minimal test update outside the listed scope because the route behavior is already exercised there. -- Resolution: derived active-room selection from the full unfiltered room sets while keeping the sidebar display filtered, and added route coverage proving the selected room survives sidebar filtering. -- Verification: `bun run test:raw src/routes/_app/-network.test.tsx src/systems/network/components/network-create-channel-dialog.test.tsx`, `make web-lint`, `make web-typecheck`, and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_017.md b/.compozy/tasks/network-redesign/reviews-001/issue_017.md deleted file mode 100644 index d7030e403..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_017.md +++ /dev/null @@ -1,45 +0,0 @@ ---- -status: resolved -file: web/src/systems/network/components/network-create-channel-dialog.tsx -line: 102 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIej,comment:PRRC_kwDOR5y4QM66CAlJ ---- - -# Issue 017: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Mark the purpose input as required on the control itself.** - -Right now the field is only "required" by surrounding state, so screen readers will not announce that requirement and native form validation will not help if this dialog gets reused elsewhere. Adding `required` (and ideally `aria-required`) fixes that. - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/systems/network/components/network-create-channel-dialog.tsx` around -lines 90 - 102, Add the required attribute (and aria-required="true") to the -Textarea control so assistive tech and native form validation recognize the -field as mandatory; update the Textarea with id "network-channel-purpose" (the -control using value={draft.purpose} and onChange={event => -onPurposeChange(event.target.value)}) to include required and -aria-required="true" attributes. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Reasoning: the purpose textarea is required by surrounding submit logic, but the control itself is not marked `required`. That prevents assistive technology and native form validation from announcing the field correctly. -- Fix plan: add `required` and `aria-required="true"` to the purpose textarea and extend the existing component test file to assert the attributes. The test lives next to the component outside the listed scope, so that update will be kept minimal and documented here. -- Resolution: added `required` and `aria-required=\"true\"` to the purpose textarea and extended the dialog test to assert both attributes. -- Verification: `bun run test:raw src/routes/_app/-network.test.tsx src/systems/network/components/network-create-channel-dialog.test.tsx`, `make web-lint`, `make web-typecheck`, and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_018.md b/.compozy/tasks/network-redesign/reviews-001/issue_018.md deleted file mode 100644 index 8eee0f09e..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_018.md +++ /dev/null @@ -1,155 +0,0 @@ ---- -status: resolved -file: web/src/systems/network/components/network-workspace-shell.tsx -line: 244 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIen,comment:PRRC_kwDOR5y4QM66CAlP ---- - -# Issue 018: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Stop keyboard events from the star button from reaching the row handler.** - -Line 182 makes the whole row react to `Enter`/`Space`, but the nested star button only stops click propagation. Keyboard activation on the star control will also select the room, and `Space` can even suppress the button action. - -
-🔧 Minimal fix - -```diff -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion -
{ - if (event.key === "Enter" || event.key === " ") { - event.preventDefault(); - onSelect(item); - } - }} - onClick={() => onSelect(item)} - role="button" - tabIndex={0} - > -
- {isChannel ? ( - - ) : ( - - )} -
-
-
- 0 - ? "text-[color:var(--color-text-primary)]" - : "text-[color:var(--color-text-secondary)]" - )} - > - {item.title} - - {item.unreadCount > 0 ? ( - - {item.unreadCount} - - ) : null} -
-
- - {item.preview} - -
-
- - {item.meta} - - · - - {item.subtitle} - -
-
- {isChannel ? ( - - ) : null} -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/systems/network/components/network-workspace-shell.tsx` around lines -174 - 246, The row's onKeyDown handler (the anonymous handler that checks -event.key === "Enter" || " ") currently catches keyboard events for the whole -item; add an onKeyDown on the star button (the button that renders Sparkles and -calls onToggleStar) that stops propagation so keyboard presses on the star don't -bubble to the row. Concretely, in the star button element add an onKeyDown -handler that calls event.stopPropagation() (and for safety you can check keys -"Enter" and " " before stopping) so keyboard activation still triggers the -button but won't trigger the row's onSelect. -``` - -
- - - - - -## Triage - -- Decision: `invalid` -- Reasoning: the current `NetworkSidebarRow` implementation no longer attaches keyboard selection handling to a shared row container. Room selection happens on the main room button, while the star control is a sibling button, so keyboard activation on the star button does not bubble into a row-level `onSelect` handler in the current code. -- Resolution: no code change. The current room button and sibling star button structure already prevents the reported keyboard-bubbling path. -- Verification: `bun run test:raw src/routes/_app/-network.test.tsx src/systems/network/components/network-create-channel-dialog.test.tsx`, `make web-lint`, `make web-typecheck`, and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_019.md b/.compozy/tasks/network-redesign/reviews-001/issue_019.md deleted file mode 100644 index 92c5226cc..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_019.md +++ /dev/null @@ -1,86 +0,0 @@ ---- -status: resolved -file: web/src/systems/network/components/network-workspace-shell.tsx -line: 291 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59QGuv,comment:PRRC_kwDOR5y4QM661FLl ---- - -# Issue 019: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Using `step` as the React key may cause issues with duplicate execution outline steps.** - -If two steps have identical text, React will warn about duplicate keys and may incorrectly reuse DOM nodes. - - -
-🔧 Use index-based key for list items - -```diff -- {readStringList(capability, "execution_outline").map(step => ( -+ {readStringList(capability, "execution_outline").map((step, stepIndex) => ( -

- {step} -

- ))} -``` -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion - {readStringList(capability, "execution_outline").map((step, stepIndex) => ( -

- {step} -

- ))} -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/systems/network/components/network-workspace-shell.tsx` around lines -284 - 291, The list rendering in network-workspace-shell.tsx uses the step -string as the React key (inside the map over readStringList(capability, -"execution_outline")), which can produce duplicate keys when steps repeat; -update the map to use a stable per-item index-based key (e.g., -key={`${index}-${step}` or just key={index}) to guarantee uniqueness and avoid -React warnings and DOM reuse issues for identical step text. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Reasoning: `execution_outline` comes from dynamic payload data and can legitimately contain duplicate step text. Using the raw step string as the React key can therefore emit duplicate-key warnings and confuse reconciliation. -- Fix plan: use a stable per-item key that remains unique for repeated step text and add a route-level rendering test with duplicate outline steps to prove the warning disappears. That test lives in the existing network route test file outside the listed scope, so the expansion will be minimal and documented here. -- Resolution: changed execution outline keys to include the item index and added route-level coverage that renders duplicate step text without React duplicate-key warnings. -- Verification: `bun run test:raw src/routes/_app/-network.test.tsx src/systems/network/components/network-create-channel-dialog.test.tsx`, `make web-lint`, `make web-typecheck`, and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_020.md b/.compozy/tasks/network-redesign/reviews-001/issue_020.md deleted file mode 100644 index 5cadc3053..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_020.md +++ /dev/null @@ -1,75 +0,0 @@ ---- -status: resolved -file: web/src/systems/network/components/network-workspace-shell.tsx -line: 495 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59QGvA,comment:PRRC_kwDOR5y4QM661FL- ---- - -# Issue 020: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Composite key may collide if label-value pairs are duplicated.** - -Using `${field.label}-${field.value}` as a key could produce duplicates if two fields share the same label and value. - - -
-🔧 Use index to guarantee uniqueness - -```diff -- {fields.map(field => ( --
-+ {fields.map((field, index) => ( -+
-``` -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion -
- {fields.map((field, index) => ( -
-``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/systems/network/components/network-workspace-shell.tsx` around lines -493 - 495, The key generation in the fields.map callback uses a composite -`${field.label}-${field.value}` which can collide; update the JSX in -network-workspace-shell.tsx where fields.map is used (the map callback creating -the
) to use a stable unique identifier instead—preferably a -dedicated property like field.id if available, otherwise fall back to the -iteration index (e.g., use field.id or index) so keys are guaranteed unique and -React list reconciliation remains correct. -``` - -
- - - - - -## Triage - -- Decision: `invalid` -- Reasoning: `NetworkDetailFieldList` is fed by `summarizeChannelWireFields()` and `summarizePeerWireFields()`, and the current field producers generate unique labels for every rendered entry in a given list. With the present data model, `${field.label}-${field.value}` remains unique for reachable payloads in this component. -- Resolution: no code change. The current field producers already keep reachable detail entries unique for this component. -- Verification: `bun run test:raw src/routes/_app/-network.test.tsx src/systems/network/components/network-create-channel-dialog.test.tsx`, `make web-lint`, `make web-typecheck`, and `make verify` diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_021.md b/.compozy/tasks/network-redesign/reviews-001/issue_021.md deleted file mode 100644 index 26a02d911..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_021.md +++ /dev/null @@ -1,51 +0,0 @@ ---- -status: resolved -file: web/src/systems/network/components/network-workspace-shell.tsx -line: 567 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIer,comment:PRRC_kwDOR5y4QM66CAlW ---- - -# Issue 021: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Don’t hard-code the header status dot to success.** - -The dot always renders with `tone="success"`, so `"degraded"`/`"stopped"` states still look healthy even though `getNetworkStatusTone()` exists for this mapping. That makes the workspace header report the wrong runtime health. - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/systems/network/components/network-workspace-shell.tsx` around lines -565 - 569, The StatusDot in network-workspace-shell.tsx is hard-coded to -tone="success"; change it to use the mapping function instead (call -getNetworkStatusTone with the current status value, e.g. -getNetworkStatusTone(status.status)) so the dot reflects -"running"/"online"/"degraded"/"stopped" correctly; ensure getNetworkStatusTone -is imported/available in the file and remove the literal "success" prop. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: -- `web/src/systems/network/components/network-workspace-shell.tsx:564-567` hard-codes the header `StatusDot` to `tone="success"` while the same module already imports status-formatting helpers. -- The rendered header therefore reports degraded or stopped runtimes as healthy, which is a real UI-state mismatch rather than a stylistic preference. -- Fix approach: derive the tone from `status.status` via `getNetworkStatusTone(...)` and add regression coverage that exercises a non-healthy status through the rendered network page/component surface. - -## Resolution - -- Updated `network-workspace-shell.tsx` to derive the workspace header dot tone from `getNetworkStatusTone(status.status)` instead of a hard-coded success state. -- Added minimal out-of-scope regression coverage in `web/src/systems/network/components/network-workspace-shell.test.tsx` because the batch scope did not include an existing component test surface for this header behavior. -- Verified with `bun x vitest run src/systems/network/components/network-workspace-shell.test.tsx`, `make web-test`, and `make verify`. diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_022.md b/.compozy/tasks/network-redesign/reviews-001/issue_022.md deleted file mode 100644 index 68ab5eaf3..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_022.md +++ /dev/null @@ -1,31 +0,0 @@ ---- -status: resolved -file: web/src/systems/network/lib/network-formatters.test.ts -line: 99 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4166737115,nitpick_hash:0b044330b8ff -review_hash: 0b044330b8ff -source_review_id: "4166737115" -source_review_submitted_at: "2026-04-23T23:14:00Z" ---- - -# Issue 022: Consider expanding test coverage for formatNetworkKindLabel. -## Review Comment - -The current test only validates one kind (`"capability"`). Consider adding tests for: -- Other valid kinds (`say`, `direct`, `trace`, etc.) -- Unknown/unrecognized kind strings (should return the original string per implementation) - -## Triage - -- Decision: `valid` -- Notes: -- `web/src/systems/network/lib/network-formatters.test.ts` only covers `formatNetworkKindLabel("capability")`, leaving the rest of the supported network kinds and the passthrough fallback behavior unprotected. -- The implementation in `network-formatters.ts` intentionally preserves unknown strings, so missing coverage here creates an easy regression gap for future label-map edits. -- Fix approach: extend the existing formatter test file with representative known kinds and an unknown-kind passthrough assertion. - -## Resolution - -- Expanded `network-formatters.test.ts` to cover every supported network kind label and the unknown-kind passthrough behavior. -- Verified with `bun x vitest run src/systems/network/lib/network-formatters.test.ts`, `make web-test`, and `make verify`. diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_023.md b/.compozy/tasks/network-redesign/reviews-001/issue_023.md deleted file mode 100644 index e64d01d5e..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_023.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -status: resolved -file: web/src/systems/network/lib/network-formatters.ts -line: 286 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4166737115,nitpick_hash:7fd22c6b024f -review_hash: 7fd22c6b024f -source_review_id: "4166737115" -source_review_submitted_at: "2026-04-23T23:14:00Z" ---- - -# Issue 023: Redundant === true comparisons. -## Review Comment - -The `.includes()` method already returns a boolean, so the explicit `=== true` is unnecessary. However, this is a minor stylistic preference. - -## Triage - -- Decision: `invalid` -- Notes: -- `web/src/systems/network/lib/network-formatters.ts:286-289` does contain redundant `=== true` comparisons, but the code is already correct and unambiguous at runtime. -- This batch is scoped to review remediation, not opportunistic style churn, and no repository lint rule or behavioral bug requires rewriting these expressions. -- Leaving the code unchanged avoids mixing a preference-only edit into a batch that otherwise contains functional fixes. - -## Resolution - -- No code change was necessary. The report was closed as a style-only preference after confirming the current implementation is behaviorally correct and passes the repo lint/type/test gates unchanged. diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_024.md b/.compozy/tasks/network-redesign/reviews-001/issue_024.md deleted file mode 100644 index e04e775a4..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_024.md +++ /dev/null @@ -1,102 +0,0 @@ ---- -status: resolved -file: web/src/systems/network/lib/query-options.ts -line: 57 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIet,comment:PRRC_kwDOR5y4QM66CAlY ---- - -# Issue 024: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Normalize the timeline query once and reuse it for both the key and the fetch.** - -Right now `{}` and `undefined` hit the same endpoint payload (`limit` becomes `120` in `queryFn`) but produce different query keys because the raw `query` is passed into `networkKeys.*Messages(...)`. That splits the cache for identical requests and makes invalidation/refetch behavior inconsistent. - - -
-Suggested fix - -```diff - export function networkChannelMessagesOptions( - channel: string, -- query: NetworkChannelMessagesQuery = { limit: DEFAULT_TIMELINE_LIMIT }, -+ query: NetworkChannelMessagesQuery = {}, - enabled = true - ) { -+ const normalizedQuery = { limit: DEFAULT_TIMELINE_LIMIT, ...query }; -+ - return queryOptions({ -- queryKey: networkKeys.channelMessages(channel, query), -- queryFn: ({ signal }) => -- listNetworkChannelMessages(channel, { limit: DEFAULT_TIMELINE_LIMIT, ...query }, signal), -+ queryKey: networkKeys.channelMessages(channel, normalizedQuery), -+ queryFn: ({ signal }) => listNetworkChannelMessages(channel, normalizedQuery, signal), - staleTime: 2_000, - refetchInterval: MESSAGES_REFETCH_INTERVAL, - enabled: Boolean(channel) && enabled, - }); - } -@@ - export function networkPeerMessagesOptions( - peerId: string, -- query: NetworkPeerMessagesQuery = { limit: DEFAULT_TIMELINE_LIMIT }, -+ query: NetworkPeerMessagesQuery = {}, - enabled = true - ) { -+ const normalizedQuery = { limit: DEFAULT_TIMELINE_LIMIT, ...query }; -+ - return queryOptions({ -- queryKey: networkKeys.peerMessages(peerId, query), -- queryFn: ({ signal }) => -- listNetworkPeerMessages(peerId, { limit: DEFAULT_TIMELINE_LIMIT, ...query }, signal), -+ queryKey: networkKeys.peerMessages(peerId, normalizedQuery), -+ queryFn: ({ signal }) => listNetworkPeerMessages(peerId, normalizedQuery, signal), - staleTime: 2_000, - refetchInterval: MESSAGES_REFETCH_INTERVAL, - enabled: Boolean(peerId) && enabled, - }); - } -``` -
- - -Also applies to: 84-92 - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/systems/network/lib/query-options.ts` around lines 49 - 57, Normalize -the timeline query object once inside networkChannelMessagesOptions: create a -single normalizedQuery that applies the default (DEFAULT_TIMELINE_LIMIT) to -undefined/missing fields, then use that normalizedQuery both when building the -cache key via networkKeys.channelMessages(channel, normalizedQuery) and when -calling listNetworkChannelMessages(channel, normalizedQuery, signal). Do the -same refactor for the analogous function around the later block (the other -*MessagesOptions function) so keys and fetch payloads match exactly. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: -- `web/src/systems/network/lib/query-options.ts:49-57` and `:84-92` build cache keys from the raw `query` argument but call the API with `{ limit: DEFAULT_TIMELINE_LIMIT, ...query }`. -- That means `undefined` and `{}` produce the same request payload while generating different query keys, which splits the cache and weakens invalidation/refetch consistency for identical timeline requests. -- Fix approach: normalize the timeline query once per function, then reuse the same normalized object for both `queryKey` and `queryFn`; add regression coverage for the normalized key shape. - -## Resolution - -- Refactored both timeline option factories to compute one normalized query object with `limit: query.limit ?? DEFAULT_TIMELINE_LIMIT`, then reuse it for both the cache key and the fetch payload. -- Added minimal out-of-scope regression coverage in `web/src/systems/network/lib/query-options.test.ts` because the batch scope did not include any existing query-options test file. -- Verified with `bun x vitest run src/systems/network/lib/query-options.test.ts`, `make web-typecheck`, `make web-test`, and `make verify`. diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_025.md b/.compozy/tasks/network-redesign/reviews-001/issue_025.md deleted file mode 100644 index b9f94d35a..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_025.md +++ /dev/null @@ -1,58 +0,0 @@ ---- -status: resolved -file: web/src/systems/network/mocks/fixtures.ts -line: 243 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIev,comment:PRRC_kwDOR5y4QM66CAla ---- - -# Issue 025: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Keep the mock message payloads aligned with the server mapper.** - -`NetworkChannelMessagePayloadFromEntry` only exposes `session_id` for local/sent messages, and it prefers the session name for local `display_name`s when a session is known. These fixtures give received messages remote `session_id`s and keep local `display_name`s as peer-card names, so Storybook/tests can pass against payloads the API never returns. - - - -Also applies to: 245-276 - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/systems/network/mocks/fixtures.ts` around lines 191 - 243, The -fixture networkChannelMessagesFixture contains entries whose fields don't match -the server mapper: update each message so only local/sent messages include -session_id and their display_name uses the session name (not the peer card name) -when a session is known, and ensure received messages (direction: "received") do -NOT carry session_id; adjust the three entries (message_id "msg_storybook_1", -"msg_storybook_2", "msg_storybook_3") accordingly and apply the same corrections -to the later fixtures in the 245-276 range so the NetworkChannelMessage shape -matches NetworkChannelMessagePayloadFromEntry and the server-mapped payloads the -app expects. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: -- The mock payloads in `web/src/systems/network/mocks/fixtures.ts:191-276` diverge from `internal/api/core/network_details.go:986-1033`, which only exposes `session_id` for local/sent messages and rewrites local display names from the owning session when available. -- The current fixtures assign remote `session_id` values and keep local display names at the peer-card label, so Storybook/tests can pass against payloads the API never emits. -- Fix approach: align both channel and peer message fixtures with the server mapper semantics and add regression coverage that asserts the fixture contract directly. - -## Resolution - -- Updated the network message fixtures so local/sent entries use the session name (`Storybook rollout`) and only local/sent entries retain `session_id`. -- Added minimal out-of-scope regression coverage in `web/src/systems/network/mocks/network-mocks.test.ts` because the batch scope did not include an existing mock-contract test file. -- Verified with `bun x vitest run src/systems/network/mocks/network-mocks.test.ts`, `make web-test`, and `make verify`. diff --git a/.compozy/tasks/network-redesign/reviews-001/issue_026.md b/.compozy/tasks/network-redesign/reviews-001/issue_026.md deleted file mode 100644 index b3473a96f..000000000 --- a/.compozy/tasks/network-redesign/reviews-001/issue_026.md +++ /dev/null @@ -1,68 +0,0 @@ ---- -status: resolved -file: web/src/systems/network/mocks/handlers.ts -line: 82 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM58qIew,comment:PRRC_kwDOR5y4QM66CAlb ---- - -# Issue 026: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Rewrite both conversation endpoints when swapping fixture peer IDs.** - -Only `peer_from` is remapped here. If `peerId !== networkPeerFixture.peer_id`, sent messages can become self-addressed or still reference the old fixture peer on `peer_to`, which makes the mock DM timeline inconsistent. - - -
-Suggested fix - -```diff - return HttpResponse.json({ - messages: networkPeerMessagesFixture.map(message => ({ - ...message, - peer_from: message.peer_from === networkPeerFixture.peer_id ? peerId : message.peer_from, -+ peer_to: message.peer_to === networkPeerFixture.peer_id ? peerId : message.peer_to, - })), - }); -``` -
- -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/src/systems/network/mocks/handlers.ts` around lines 70 - 82, The handler -for GET "/api/network/peers/:peer_id/messages" only remaps peer_from causing -inconsistent DM timelines; update the mapping to remap both peer_from and -peer_to when swapping fixture peer IDs so neither side keeps the original -networkPeerFixture.peer_id. In the http.get handler (callback using params) -change the networkPeerMessagesFixture.map callback to replace occurrences of -networkPeerFixture.peer_id with the runtime peerId for both message.peer_from -and message.peer_to (leave other fields unchanged), and apply the same change to -the other conversation endpoint handler that performs the same fixture-id swap. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Notes: -- `web/src/systems/network/mocks/handlers.ts:77-81` rewrites participant ids when serving `/api/network/peers/:peer_id/messages`, and that rewrite is what makes the DM timeline inconsistent for non-default peer ids. -- The base fixtures already use real peer ids (`peer_storybook_local` / `peer_storybook_remote`), so mutating either side of those messages at handler time is unnecessary and can manufacture self-addressed messages. -- The file only has one peer-conversation endpoint with this behavior. Fix approach: stop rewriting participant ids in that handler and add regression coverage that proves the remote-peer timeline preserves the original local/remote participants. - -## Resolution - -- Removed the peer-id rewrite from the peer-conversation handler so it now returns the fixture timeline with its original local/remote participants intact. -- Reused the minimal out-of-scope regression coverage in `web/src/systems/network/mocks/network-mocks.test.ts` to verify the remote-peer handler response matches the fixture contract exactly. -- Verified with `bun x vitest run src/systems/network/mocks/network-mocks.test.ts`, `make web-test`, and `make verify`. diff --git a/.compozy/tasks/network-redesign/reviews-002/_meta.md b/.compozy/tasks/network-redesign/reviews-002/_meta.md deleted file mode 100644 index aa9c4ce0f..000000000 --- a/.compozy/tasks/network-redesign/reviews-002/_meta.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -provider: coderabbit -pr: "59" -round: 2 -created_at: 2026-04-24T01:40:36.45144Z ---- - -## Summary -- Total: 8 -- Resolved: 8 -- Unresolved: 0 diff --git a/.compozy/tasks/network-redesign/reviews-002/issue_001.md b/.compozy/tasks/network-redesign/reviews-002/issue_001.md deleted file mode 100644 index 7f8e1eb17..000000000 --- a/.compozy/tasks/network-redesign/reviews-002/issue_001.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -status: resolved -file: internal/acp/types_test.go -line: 206 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4167301360,nitpick_hash:ae87e4cae610 -review_hash: ae87e4cae610 -source_review_id: "4167301360" -source_review_submitted_at: "2026-04-24T01:39:58Z" ---- - -# Issue 001: Assert the flushed ToolCallID, not just the event type. -## Review Comment - -This case still passes if the buffer releases the wrong deferred result as long as it is *some* `EventTypeToolResult`. Checking the emitted `ToolCallID` makes the cap test prove the newest retained entry actually wins. - -As per coding guidelines, `Ensure tests verify behavior outcomes, not just function calls`. - -## Triage - -- Decision: `valid` -- Root cause: The bounded-buffer branch in `TestEmitPromptEventDeferredToolResultsStayBounded` proves that the oldest deferred result is evicted, but the final assertion only checks that some `tool_result` event flushes after the newest tool call. It does not assert that the flushed deferred result is specifically bound to `tool-128`. -- Fix plan: Extend the final assertion in `internal/acp/types_test.go` to verify the emitted deferred result keeps `ToolCallID == "tool-128"`, which makes the cap test prove the newest retained result wins. -- Outcome: Updated the ACP cap test to assert the flushed deferred result carries `ToolCallID == "tool-128"`. Verified with `go test ./internal/acp -count=1` and the full `make verify` gate. diff --git a/.compozy/tasks/network-redesign/reviews-002/issue_002.md b/.compozy/tasks/network-redesign/reviews-002/issue_002.md deleted file mode 100644 index 82cb395dd..000000000 --- a/.compozy/tasks/network-redesign/reviews-002/issue_002.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -status: resolved -file: internal/network/audit_test.go -line: 166 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4167301360,nitpick_hash:6b688d9e463b -review_hash: 6b688d9e463b -source_review_id: "4167301360" -source_review_submitted_at: "2026-04-24T01:39:58Z" ---- - -# Issue 002: Refactor to a table-driven test pattern to match repository standards. -## Review Comment - -The three subtests repeat identical setup: `storeSink`, `NewAuditWriter()`, `writer.now`, and assertions on `storeSink.messages`. Consolidate into a table-driven structure with `t.Run()` for each case. This aligns with the coding guideline: "Use table-driven tests with subtests (`t.Run`) as default pattern for Go tests." - -## Triage - -- Decision: `valid` -- Root cause: `TestAuditWriterPersistsTimelineMessagesForRenderableEnvelopes` has three sibling subtests that repeat the same `recordingAuditStore`, `NewAuditWriter`, and `writer.now` setup with case-specific assertions layered on afterward. The behavior under test is data-driven, but the test shape is still copy-pasted. -- Fix plan: Collapse the renderable-envelope cases in `internal/network/audit_test.go` into a table-driven subtest loop so setup is shared and each case only declares the record operation plus expected timeline fields. -- Outcome: Reworked the renderable-envelope coverage into one table-driven suite with shared setup. Verified with `go test ./internal/network -count=1` and `make verify`. diff --git a/.compozy/tasks/network-redesign/reviews-002/issue_003.md b/.compozy/tasks/network-redesign/reviews-002/issue_003.md deleted file mode 100644 index 342ece3c0..000000000 --- a/.compozy/tasks/network-redesign/reviews-002/issue_003.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -status: resolved -file: internal/network/audit_test.go -line: 224 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4167301360,nitpick_hash:f527ef25af56 -review_hash: f527ef25af56 -source_review_id: "4167301360" -source_review_submitted_at: "2026-04-24T01:39:58Z" ---- - -# Issue 003: Add a mirrored received-direct case for direction coverage. -## Review Comment - -This subtest now validates only the sent direct path. Adding a received direct variant would better protect direction-specific timeline mapping. - ---- - -## Triage - -- Decision: `valid` -- Root cause: The current timeline coverage only exercises the sent direct path. Direct-message normalization is direction-sensitive, so leaving out the received variant means the `peer_from`/`peer_to` mapping for inbound direct envelopes is not protected by tests. -- Fix plan: Add a mirrored received-direct case to the same table-driven suite in `internal/network/audit_test.go` and assert the expected direct-message fields for the inbound path. -- Outcome: Added a received-direct case to the shared renderable-envelope suite and asserted inbound addressing metadata. Verified with `go test ./internal/network -count=1` and `make verify`. diff --git a/.compozy/tasks/network-redesign/reviews-002/issue_004.md b/.compozy/tasks/network-redesign/reviews-002/issue_004.md deleted file mode 100644 index 40be95299..000000000 --- a/.compozy/tasks/network-redesign/reviews-002/issue_004.md +++ /dev/null @@ -1,81 +0,0 @@ ---- -status: resolved -file: internal/store/globaldb/global_db_network_channels_test.go -line: 223 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59ReBH,comment:PRRC_kwDOR5y4QM662-g5 ---- - -# Issue 004: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Handle `db.Close()` errors in cleanup.** - -Discarding the close error with `_` breaks the repo’s Go error-handling rule. Please report it from the cleanup closure instead. - - - -
-Suggested fix - -```diff -- t.Cleanup(func() { _ = db.Close() }) -+ t.Cleanup(func() { -+ if err := db.Close(); err != nil { -+ t.Errorf("db.Close() error = %v", err) -+ } -+ }) -``` -
- -As per coding guidelines, `**/*.go`: Never ignore errors with `_` — every error must be handled or have a written justification. - - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion - t.Cleanup(func() { - if err := db.Close(); err != nil { - t.Errorf("db.Close() error = %v", err) - } - }) -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/store/globaldb/global_db_network_channels_test.go` at line 223, The -cleanup currently discards the error from db.Close() with `_`, violating the -no-ignored-errors rule; update the t.Cleanup closure to call db.Close(), capture -its error, and report it via the testing.T instance (e.g., t.Errorf or t.Fatalf) -so failures are surfaced — locate the t.Cleanup(func() { _ = db.Close() }) line -and replace it with a closure that checks err := db.Close() and reports it using -t (preserving test context). -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Root cause: `TestMigrateGlobalSchemaRebuildsNetworkChannelsWithWorkspaceForeignKey` currently discards `db.Close()` with `_`, which violates the repository rule against ignored errors in Go code. -- Fix plan: Replace the cleanup closure in `internal/store/globaldb/global_db_network_channels_test.go` with one that checks `db.Close()` and reports failures through `t.Errorf`. -- Outcome: The migration test cleanup now reports `db.Close()` failures via `t.Errorf`. Verified with `go test ./internal/store/globaldb -count=1` and `make verify`. diff --git a/.compozy/tasks/network-redesign/reviews-002/issue_005.md b/.compozy/tasks/network-redesign/reviews-002/issue_005.md deleted file mode 100644 index 455af55fb..000000000 --- a/.compozy/tasks/network-redesign/reviews-002/issue_005.md +++ /dev/null @@ -1,57 +0,0 @@ ---- -status: resolved -file: internal/store/globaldb/global_db_network_channels_test.go -line: 300 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59ReBE,comment:PRRC_kwDOR5y4QM662-g3 ---- - -# Issue 005: _🛠️ Refactor suggestion_ | _🟠 Major_ -## Review Comment - -_🛠️ Refactor suggestion_ | _🟠 Major_ - -**Use `t.Run("Should ...")` subtests for this new suite.** - -These scenarios are all top-level tests right now, but this repo’s Go test convention requires subtests as the default pattern. Please group them under parent tests with `t.Run("Should ...")`, and keep `t.Parallel()` on the independent cases after the split. - - - -As per coding guidelines, `**/*_test.go`: MUST use `t.Run("Should...")` pattern for ALL test cases and use table-driven tests with subtests (`t.Run`) as default pattern for Go tests. - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/store/globaldb/global_db_network_channels_test.go` around lines 15 - -300, You need to convert the top-level Test* functions for network channel -behavior into subtests using the t.Run("Should ...") pattern: create a parent -test (e.g. TestNetworkChannelsSuite) and move each existing test body (from -TestOpenGlobalDBCreatesNetworkChannelsSchema, -TestGlobalDBWriteAndListNetworkChannels, TestGlobalDBGetNetworkChannelNotFound, -TestGlobalDBDeleteNetworkChannel, -TestGlobalDBDeleteWorkspaceCascadesNetworkChannels, -TestGlobalDBListNetworkChannelsWrapsTimestampParseFailures, -TestMigrateGlobalSchemaRebuildsNetworkChannelsWithWorkspaceForeignKey) into its -own t.Run("Should ") { ... } block, keeping each subtest's -t.Parallel() where present and preserving all calls to helpers like -openTestGlobalDB, registerWorkspaceForGlobalTests, globalDB.WriteNetworkChannel, -globalDB.GetNetworkChannel, ListNetworkChannels, migrateGlobalSchema, -tableHasForeignKey, etc.; leave helper names and assertions unchanged, only -refactor test entry points into subtests under a single parent test function. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Root cause: The network-channel database coverage in `internal/store/globaldb/global_db_network_channels_test.go` is split across several top-level `Test*` functions even though the scenarios belong to one cohesive suite. That diverges from the repository preference for `t.Run("Should ...")`-style subtests as the default test shape. -- Fix plan: Group the network-channel scenarios under a single parent suite test, keep each scenario as a descriptive `t.Run("Should ...")`, and preserve `t.Parallel()` for independent cases. -- Outcome: Grouped the network-channel database scenarios under a single `TestNetworkChannels` suite with `Should ...` subtests. Verified with `go test ./internal/store/globaldb -count=1` and `make verify`. diff --git a/.compozy/tasks/network-redesign/reviews-002/issue_006.md b/.compozy/tasks/network-redesign/reviews-002/issue_006.md deleted file mode 100644 index 8f9412c16..000000000 --- a/.compozy/tasks/network-redesign/reviews-002/issue_006.md +++ /dev/null @@ -1,175 +0,0 @@ ---- -status: resolved -file: internal/store/globaldb/migrate_workspace.go -line: 806 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59ReBO,comment:PRRC_kwDOR5y4QM662-hC ---- - -# Issue 006: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -
-🧩 Analysis chain - -🏁 Script executed: - -```shell -#!/bin/bash -python - <<'PY' -import sqlite3 - -db = sqlite3.connect(":memory:") -cur = db.cursor() - -cur.executescript(""" -PRAGMA foreign_keys = ON; - -CREATE TABLE workspaces ( - id TEXT PRIMARY KEY -); -INSERT INTO workspaces (id) VALUES ('ws-alpha'); - -CREATE TABLE network_channels ( - channel TEXT PRIMARY KEY, - workspace_id TEXT NOT NULL, - purpose TEXT NOT NULL, - created_by TEXT NOT NULL DEFAULT '', - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL -); -INSERT INTO network_channels ( - channel, workspace_id, purpose, created_by, created_at, updated_at -) VALUES ( - 'coord.core', ' ws-alpha ', 'Coordination', 'codex', - '2026-04-11T12:00:00Z', '2026-04-11T12:00:00Z' -); - -PRAGMA foreign_keys = OFF; -CREATE TABLE network_channels_new ( - channel TEXT PRIMARY KEY, - workspace_id TEXT NOT NULL REFERENCES workspaces(id) ON DELETE CASCADE, - purpose TEXT NOT NULL, - created_by TEXT NOT NULL DEFAULT '', - created_at TEXT NOT NULL, - updated_at TEXT NOT NULL -); -INSERT INTO network_channels_new ( - channel, workspace_id, purpose, created_by, created_at, updated_at -) SELECT - channel, workspace_id, purpose, created_by, created_at, updated_at -FROM network_channels -WHERE TRIM(workspace_id) IN (SELECT id FROM workspaces); -DROP TABLE network_channels; -ALTER TABLE network_channels_new RENAME TO network_channels; -PRAGMA foreign_keys = ON; -""") - -print("migrated rows:", cur.execute( - "SELECT channel, quote(workspace_id) FROM network_channels" -).fetchall()) - -cur.execute("DELETE FROM workspaces WHERE id = 'ws-alpha'") -print("remaining rows after workspace delete:", cur.execute( - "SELECT channel, quote(workspace_id) FROM network_channels" -).fetchall()) -PY -``` - -Repository: compozy/agh - -Length of output: 174 - ---- - -🏁 Script executed: - -```shell -# Check file exists and examine the migration code at lines 801-806 -if [ -f internal/store/globaldb/migrate_workspace.go ]; then - echo "File exists. Showing lines 801-810:" - sed -n '801,810p' internal/store/globaldb/migrate_workspace.go -else - echo "File not found. Searching for migration files..." - find . -name "*migrate*workspace*" -type f -fi -``` - -Repository: compozy/agh - -Length of output: 499 - ---- - - - -
- -**Trim `workspace_id` in the copied value, not only in the filter.** - -A row like `" ws-alpha "` passes the `WHERE TRIM(workspace_id) IN (...)` predicate, but this migration still inserts the untrimmed `workspace_id`. That leaves a non-matching FK value in the rebuilt table, so the migrated row will not participate in the new `ON DELETE CASCADE` relationship. - -
-Suggested fix - -```diff - `INSERT INTO network_channels_new ( - channel, workspace_id, purpose, created_by, created_at, updated_at - ) SELECT -- channel, workspace_id, purpose, created_by, created_at, updated_at -+ channel, TRIM(workspace_id), purpose, created_by, created_at, updated_at - FROM network_channels - WHERE TRIM(workspace_id) IN (SELECT id FROM workspaces)`, -``` -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion - `INSERT INTO network_channels_new ( - channel, workspace_id, purpose, created_by, created_at, updated_at - ) SELECT - channel, TRIM(workspace_id), purpose, created_by, created_at, updated_at - FROM network_channels - WHERE TRIM(workspace_id) IN (SELECT id FROM workspaces)`, -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/store/globaldb/migrate_workspace.go` around lines 801 - 806, The -INSERT selects untrimmed workspace_id but only trims it in the WHERE clause, -causing inserted FK values to include whitespace; update the SELECT to insert -TRIM(workspace_id) (e.g., SELECT channel, TRIM(workspace_id) AS workspace_id, -purpose, created_by, created_at, updated_at FROM network_channels WHERE -TRIM(workspace_id) IN (SELECT id FROM workspaces)) so the stored workspace_id -matches workspace ids and the new FK/ON DELETE CASCADE will work correctly. -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Root cause: The migration rebuild filters rows with `WHERE TRIM(workspace_id) IN (...)` but still copies the raw `workspace_id` into `network_channels_new`. A legacy row like `" ws-alpha "` therefore survives the filter yet keeps a foreign-key value that no longer matches `workspaces(id)`, which breaks the intended `ON DELETE CASCADE` relationship after migration. -- Fix plan: Update the `INSERT ... SELECT` in `internal/store/globaldb/migrate_workspace.go` to store `TRIM(workspace_id)` and extend the migration test to cover a whitespace-padded `workspace_id`. -- Outcome: The migration now writes `TRIM(workspace_id)`, and the migration test now proves whitespace-padded rows are preserved with a normalized FK and still cascade on workspace delete. Verified with `go test ./internal/store/globaldb -count=1` and `make verify`. diff --git a/.compozy/tasks/network-redesign/reviews-002/issue_007.md b/.compozy/tasks/network-redesign/reviews-002/issue_007.md deleted file mode 100644 index 615ba30a2..000000000 --- a/.compozy/tasks/network-redesign/reviews-002/issue_007.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -status: resolved -file: web/src/systems/network/components/network-workspace-shell.tsx -line: 493 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4167301360,nitpick_hash:3970a05f444f -review_hash: 3970a05f444f -source_review_id: "4167301360" -source_review_submitted_at: "2026-04-24T01:39:58Z" ---- - -# Issue 007: Composite key may collide if multiple fields share the same label-value pair. -## Review Comment - -Using `${field.label}-${field.value}` could produce duplicate keys. Consider using the array index for uniqueness. - -## Triage - -- Decision: `invalid` -- Reasoning: `NetworkDetailFieldList` is not rendering arbitrary user-provided field arrays. The only callers build `aboutFields` and `wireFields` from fixed, labeled arrays in `web/src/hooks/routes/use-network-page.ts`, and those labels are unique by construction (`Purpose`, `Created`, `Peer ID`, `Channel`, `Sent`, `Received`, etc.). Within the current model there is no duplicate-sibling key collision to fix. -- Why no code change: Replacing the current key with the array index would make the list less stable without addressing an observed defect. The existing key remains deterministic for the controlled field sets this component renders. -- Outcome: Analysis complete; no code change required. diff --git a/.compozy/tasks/network-redesign/reviews-002/issue_008.md b/.compozy/tasks/network-redesign/reviews-002/issue_008.md deleted file mode 100644 index 70da2c44f..000000000 --- a/.compozy/tasks/network-redesign/reviews-002/issue_008.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -status: resolved -file: web/src/systems/network/mocks/handlers.ts -line: 70 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4167301360,nitpick_hash:915d76105af6 -review_hash: 915d76105af6 -source_review_id: "4167301360" -source_review_submitted_at: "2026-04-24T01:39:58Z" ---- - -# Issue 008: Mock handler returns fixture directly without peer ID remapping. -## Review Comment - -The handler validates that `peerId` exists in `networkPeersFixture` before returning, which limits it to the two hardcoded fixture peers. This is acceptable for current mock/test purposes, but if tests need to query arbitrary peer IDs, the fixture messages would have inconsistent `peer_from`/`peer_to` values. - -For improved mock fidelity, consider remapping both `peer_from` and `peer_to` when the requested peer differs from the fixture's primary peer: - -## Triage - -- Decision: `invalid` -- Reasoning: The handler at line 70 intentionally only serves peer IDs that already exist in `networkPeersFixture`. For those two supported peers, returning the same two-party direct-history fixture is internally consistent: the conversation is still between the local and remote fixture peers, and `web/src/systems/network/mocks/network-mocks.test.ts` already locks that contract in by asserting the remote-peer request returns the fixture unchanged. -- Why no code change: The review comment describes a hypothetical expansion to arbitrary peer IDs, but this mock surface does not claim to support arbitrary IDs. Adding remapping logic here would widen the mock semantics rather than fix a concrete defect in the current test/storybook contract. -- Outcome: Analysis complete; no code change required. diff --git a/.compozy/tasks/qa-rounds/reviews-001/_meta.md b/.compozy/tasks/qa-rounds/reviews-001/_meta.md deleted file mode 100644 index b976fdd87..000000000 --- a/.compozy/tasks/qa-rounds/reviews-001/_meta.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -provider: coderabbit -pr: "82" -round: 1 -created_at: 2026-04-28T12:25:40.990284Z ---- - -## Summary -- Total: 12 -- Resolved: 0 -- Unresolved: 12 diff --git a/.compozy/tasks/qa-rounds/reviews-001/issue_001.md b/.compozy/tasks/qa-rounds/reviews-001/issue_001.md deleted file mode 100644 index d2b749157..000000000 --- a/.compozy/tasks/qa-rounds/reviews-001/issue_001.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -status: resolved -file: internal/api/core/coverage_helpers_test.go -line: 194 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4188693296,nitpick_hash:0cc472a0a94d -review_hash: 0cc472a0a94d -source_review_id: "4188693296" -source_review_submitted_at: "2026-04-28T12:24:35Z" ---- - -# Issue 001: Add t.Parallel() to the new top-level sort tests. -## Review Comment - -Both new top-level tests are independent and read-only, but unlike the rest of this file they run serially. That drifts from the package’s normal test shape for no clear benefit. - -As per coding guidelines, "`**/*_test.go`: Use `t.Run('Should ...')` subtests with `t.Parallel` as default (opt-out only with `t.Setenv`) in Go test files". - -Also applies to: 290-354 - -## Triage - -- Decision: `VALID` -- Notes: `TestSortedNetworkChannelPayloads` and `TestSortedNetworkPeerPayloads` already use independent `Should ...` subtests with `t.Parallel()`, but the top-level test functions do not call `t.Parallel()`. This is inconsistent with the file's surrounding shape and can be fixed safely by marking both top-level tests parallel before their subtests are registered. - -## Resolution - -- Added `t.Parallel()` to both top-level sort tests in `internal/api/core/coverage_helpers_test.go`. -- Verified through targeted Go tests and `make verify`. diff --git a/.compozy/tasks/qa-rounds/reviews-001/issue_002.md b/.compozy/tasks/qa-rounds/reviews-001/issue_002.md deleted file mode 100644 index 5dea797c0..000000000 --- a/.compozy/tasks/qa-rounds/reviews-001/issue_002.md +++ /dev/null @@ -1,123 +0,0 @@ ---- -status: resolved -file: internal/api/core/handlers.go -line: 550 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM5-IGMH,comment:PRRC_kwDOR5y4QM67_zc2 ---- - -# Issue 002: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -
-🧩 Analysis chain - -🏁 Script executed: - -```shell -#!/bin/bash -set -euo pipefail - -rg -n -C3 'func StatusForWorkspaceError|StatusForWorkspaceError\(' --type go -``` - -Repository: compozy/agh - -Length of output: 8499 - ---- - -🏁 Script executed: - -```shell -cat -n internal/api/core/errors.go | sed -n '40,100p' -``` - -Repository: compozy/agh - -Length of output: 2509 - ---- - -🏁 Script executed: - -```shell -rg -n 'func statusForWorkspaceError' internal/api/core/errors.go -A 20 -``` - -Repository: compozy/agh - -Length of output: 37 - ---- - -🏁 Script executed: - -```shell -rg -n 'func statusForWorkspaceError' --type go -``` - -Repository: compozy/agh - -Length of output: 229 - ---- - -🏁 Script executed: - -```shell -sed -n '105,150p' internal/api/core/session_workspace.go -``` - -Repository: compozy/agh - -Length of output: 1763 - ---- - - - -
- -**Return a stable dependency-unavailable response when workspace resolver is missing.** - -When `h.Workspaces` is nil, `workspaceAgentDefs` returns a plain error that isn't a workspace package sentinel. The default case of `statusForWorkspaceError` maps this to `http.StatusInternalServerError` (500), which doesn't reflect a missing injected dependency. `GET /api/agents?workspace=...` and `GET /api/agents/:name?workspace=...` should return a deterministic, capability-aware status (e.g., 503) rather than a generic internal error. - -Consider defining a dedicated sentinel (e.g., `ErrWorkspaceResolverUnavailable`) in the workspace package and mapping it explicitly in `statusForWorkspaceError`, or use a different status code path for nil-check failures that indicates unavailability rather than server error. - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/api/core/handlers.go` around lines 538 - 550, The nil-check in -BaseHandlers.workspaceAgentDefs currently returns a generic fmt.Errorf when -h.Workspaces is nil; instead introduce and return a dedicated sentinel error -(e.g., workspace.ErrWorkspaceResolverUnavailable) from this function (or a -well-named package-level sentinel in the workspace package) so callers can -detect a missing injected dependency, and update statusForWorkspaceError to map -that sentinel to a 503 (or other capability-unavailable status) so GET -/api/agents... yields a deterministic dependency-unavailable response; locate -the check in workspaceAgentDefs (h.Workspaces) and the error-mapping in -statusForWorkspaceError to implement the sentinel and mapping. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: `BaseHandlers.workspaceAgentDefs` returns a generic formatted error when `h.Workspaces` is nil. `statusForAgentWorkspaceError` delegates generic errors to `StatusForWorkspaceError`, which currently maps them to HTTP 500. The root cause is the missing domain sentinel for a resolver dependency that is unavailable. Fix by adding a workspace sentinel, returning it from the nil dependency path, and mapping it to HTTP 503 with regression coverage. Minimal support edits outside the issue's primary file are required in `internal/workspace/workspace.go` for the sentinel and `internal/api/core/session_workspace.go` plus tests for the HTTP status mapping. - -## Resolution - -- Added `workspace.ErrWorkspaceResolverUnavailable`, returned it from the nil resolver path, and mapped it to HTTP 503. -- Added regression coverage for `/agents?workspace=...` and the workspace status mapping. -- Verified through targeted Go tests and `make verify`. diff --git a/.compozy/tasks/qa-rounds/reviews-001/issue_003.md b/.compozy/tasks/qa-rounds/reviews-001/issue_003.md deleted file mode 100644 index 13cc354ac..000000000 --- a/.compozy/tasks/qa-rounds/reviews-001/issue_003.md +++ /dev/null @@ -1,52 +0,0 @@ ---- -status: resolved -file: internal/api/core/network_details.go -line: 1242 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM5-IGMO,comment:PRRC_kwDOR5y4QM67_zc_ ---- - -# Issue 003: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**This turns cursor pagination into full-history reads.** - -Lines 1238-1241 clear `BeforeMessageID`, `AfterMessageID`, and `Limit` before calling `ListNetworkMessages`, so every page request now loads the entire channel/peer history and trims it in memory. On active channels this becomes O(total_messages) per request and will regress latency and memory usage quickly. - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/api/core/network_details.go` around lines 1233 - 1242, The helper -listTimelineRawMessages currently wipes out pagination by resetting -query.BeforeMessageID, query.AfterMessageID, and query.Limit before calling -networkStore.ListNetworkMessages, causing full-history reads; restore and pass -through the original cursor fields instead of clearing them (i.e., stop -modifying the incoming query in listTimelineRawMessages) or introduce an -explicit parameter or a new store method (e.g., ListNetworkMessagesFullHistory) -if a true full-scan is required; ensure calls that expect paginated results -continue to use the unmodified query and only perform full reads when explicitly -requested. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: `listTimelineRawMessages` clears `BeforeMessageID`, `AfterMessageID`, and `Limit` before calling the store, so cursor requests scan the full raw channel or peer lane. The root cause is conflating raw fetch shape with handler-side visible timeline pagination. The fix should preserve cursor fields in the store query so cursor pages do not read the opposite side of history, while keeping `Limit` at the handler layer because public timelines filter directed traffic and coalesce presence before pagination. Existing assertions in `internal/api/core/network_test.go` explicitly expect cleared raw cursors; those tests need a minimal out-of-scope update to validate cursor pass-through while preserving handler-side limit semantics. - -## Resolution - -- Preserved raw `before`/`after` cursor fields when loading timeline messages while keeping handler-side `Limit` behavior for filtered/coalesced timelines. -- Updated network timeline tests to assert cursor pass-through for channel and peer pagination. -- Removed the now-dead pagination error return flagged by lint. -- Verified through targeted network tests and `make verify`. diff --git a/.compozy/tasks/qa-rounds/reviews-001/issue_004.md b/.compozy/tasks/qa-rounds/reviews-001/issue_004.md deleted file mode 100644 index b267ac16d..000000000 --- a/.compozy/tasks/qa-rounds/reviews-001/issue_004.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -status: resolved -file: internal/api/core/settings_test.go -line: 1009 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4188693296,nitpick_hash:157dafdb6b61 -review_hash: 157dafdb6b61 -source_review_id: "4188693296" -source_review_submitted_at: "2026-04-28T12:24:35Z" ---- - -# Issue 004: Assert the /sandboxes/:name response body too. -## Review Comment - -This case currently proves only that the handler delegated to `CollectionSandboxes`. It would still pass if `:name` selection broke and the endpoint returned the wrong item. Please decode the response and assert the returned sandbox `name`/`profile` for this renamed route. - -As per coding guidelines, "`**/*_test.go`: Always assert both HTTP status code AND response body (never status-code-only) in Go tests". - -Also applies to: 1097-1208 - -## Triage - -- Decision: `VALID` -- Notes: The collection handler table only checks HTTP 200 and service delegation for `/api/settings/sandboxes/local`; it does not decode the body, so a route-name lookup regression could return the wrong sandbox and still pass. Fix by adding response-body assertions, including the sandbox `name` and `profile.backend`. - -## Resolution - -- Added response-body assertions for settings collection handlers, including sandbox name and backend. -- Verified through targeted Go tests and `make verify`. diff --git a/.compozy/tasks/qa-rounds/reviews-001/issue_005.md b/.compozy/tasks/qa-rounds/reviews-001/issue_005.md deleted file mode 100644 index 77e546b59..000000000 --- a/.compozy/tasks/qa-rounds/reviews-001/issue_005.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -status: resolved -file: internal/api/core/tasks_terminal_integration_test.go -line: 112 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM5-IGMQ,comment:PRRC_kwDOR5y4QM67_zdD ---- - -# Issue 005: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Compare JSON payloads semantically, not as raw strings.** - -These assertions lock the test to a specific marshal order for `metadata`/`result`. A harmless re-encode of the same JSON object will start failing this suite even when the handler behavior is still correct. - - - -Also applies to: 157-158, 210-215 - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/api/core/tasks_terminal_integration_test.go` around lines 111 - 112, -The test is comparing JSON payloads as raw strings which is brittle; instead, -parse capture.failure.Metadata into a generic JSON structure (e.g., -map[string]interface{} or a struct) and compare it semantically to the expected -JSON (e.g., expected := -map[string]interface{}{"step":"claim","mode":"historical-http"}) using deep -equality (reflect.DeepEqual or cmp.Equal) and only call t.Fatalf with the -marshaled/pretty-printed values when they differ; apply the same change for the -other raw-string comparisons mentioned around the capture variables at the other -locations (lines referenced: the blocks containing capture.failure.Metadata -checks and the similar comparisons at 157-158 and 210-215). -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: The integration test compares JSON metadata and result payloads via raw string equality. That makes the test sensitive to object key order rather than behavior. Fix by decoding `json.RawMessage` values and expected JSON strings into generic values and comparing them semantically. - -## Resolution - -- Replaced raw JSON string equality with semantic JSON decoding/comparison in the task terminal integration test. -- Verified with the integration test target and `make verify`. diff --git a/.compozy/tasks/qa-rounds/reviews-001/issue_006.md b/.compozy/tasks/qa-rounds/reviews-001/issue_006.md deleted file mode 100644 index 6146a7dfa..000000000 --- a/.compozy/tasks/qa-rounds/reviews-001/issue_006.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -status: resolved -file: internal/api/core/workspaces.go -line: 105 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4188693296,nitpick_hash:8434b2db588a -review_hash: 8434b2db588a -source_review_id: "4188693296" -source_review_submitted_at: "2026-04-28T12:24:35Z" ---- - -# Issue 006: Consider a defensive nil guard in workspaceDetailAgents. -## Review Comment - -This helper currently assumes `resolved` is always non-nil; adding a guard would make future reuse safer. - -## Triage - -- Decision: `VALID` -- Notes: `workspaceDetailAgents` dereferences `resolved.Agents` without a nil guard. Current callers pass a non-nil value, but the helper has no local contract enforcement and future reuse would panic instead of returning an API error. Fix by returning an explicit error when `resolved` is nil. - -## Resolution - -- Added an explicit nil guard in `workspaceDetailAgents` that returns a normal error instead of panicking. -- Verified through targeted Go tests and `make verify`. diff --git a/.compozy/tasks/qa-rounds/reviews-001/issue_007.md b/.compozy/tasks/qa-rounds/reviews-001/issue_007.md deleted file mode 100644 index 650619f39..000000000 --- a/.compozy/tasks/qa-rounds/reviews-001/issue_007.md +++ /dev/null @@ -1,26 +0,0 @@ ---- -status: resolved -file: internal/cli/agent.go -line: 89 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4188693296,nitpick_hash:fc0a36b91f40 -review_hash: fc0a36b91f40 -source_review_id: "4188693296" -source_review_submitted_at: "2026-04-28T12:24:35Z" ---- - -# Issue 007: Consolidate duplicated workspace-flag parsing helper. -## Review Comment - -`agentWorkspaceFlag` duplicates `skillWorkspaceFlag` behavior in the same package. Reusing one helper avoids divergence in future validation/error-message changes. - -## Triage - -- Decision: `VALID` -- Notes: `agentWorkspaceFlag` and `skillWorkspaceFlag` contain identical flag parsing, trimming, and empty explicit flag validation. This duplication can diverge. Fix by consolidating both call sites on one shared workspace flag helper. - -## Resolution - -- Consolidated agent and skill workspace flag parsing on `commandWorkspaceFlag`. -- Verified through targeted CLI tests and `make verify`. diff --git a/.compozy/tasks/qa-rounds/reviews-001/issue_008.md b/.compozy/tasks/qa-rounds/reviews-001/issue_008.md deleted file mode 100644 index a723d39c0..000000000 --- a/.compozy/tasks/qa-rounds/reviews-001/issue_008.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -status: resolved -file: internal/cli/config.go -line: 97 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4188693296,nitpick_hash:519534b735ab -review_hash: 519534b735ab -source_review_id: "4188693296" -source_review_submitted_at: "2026-04-28T12:24:35Z" ---- - -# Issue 008: Add coverage for the new defaults.sandbox mutation key. -## Review Comment - -Line 97 adds new mutable surface (`defaults.sandbox`), but this change should be pinned with a direct CLI regression test (accept `defaults.sandbox`, reject legacy `defaults.environment`) to avoid rename drift. - -As per coding guidelines, "`**/*.go`: Maintain 80% code coverage per Go package". - -## Triage - -- Decision: `VALID` -- Notes: `defaults.sandbox` is now accepted by `config set`, but `config_test.go` only covers `defaults.provider` and sandbox profile paths. Add a direct CLI regression that sets `defaults.sandbox`, verifies JSON output, and verifies the persisted config value. - -## Resolution - -- Added CLI coverage that sets `defaults.sandbox`, checks JSON output, and verifies persisted config state. -- Verified through targeted CLI tests and `make verify`. diff --git a/.compozy/tasks/qa-rounds/reviews-001/issue_009.md b/.compozy/tasks/qa-rounds/reviews-001/issue_009.md deleted file mode 100644 index 3648cab80..000000000 --- a/.compozy/tasks/qa-rounds/reviews-001/issue_009.md +++ /dev/null @@ -1,28 +0,0 @@ ---- -status: resolved -file: internal/cli/config_test.go -line: 242 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4188693296,nitpick_hash:bd52edee9423 -review_hash: bd52edee9423 -source_review_id: "4188693296" -source_review_submitted_at: "2026-04-28T12:24:35Z" ---- - -# Issue 009: Add a negative regression for legacy environments.* mutation paths. -## Review Comment - -After the rename to `sandboxes.*`, add an explicit assertion that `config set environments.dev.backend local` fails. This prevents accidental alias/fallback reintroduction. - -Based on learnings, "Renames must update code, storage, APIs, CLI, extensions, specs, RFCs, and .compozy/tasks/* artifacts all in a single change. Do not create aliases, dual fields, or schema fallback paths." - -## Triage - -- Decision: `VALID` -- Notes: The hard rename from environments to sandboxes needs a negative guard against alias reintroduction. `config set environments.dev.backend local` is currently unsupported through the classifier, but no regression pins that behavior. Add an explicit CLI failure assertion for the legacy path. - -## Resolution - -- Added negative CLI regressions for legacy `defaults.environment` and `environments.*` mutation paths. -- Verified through targeted CLI tests and `make verify`. diff --git a/.compozy/tasks/qa-rounds/reviews-001/issue_010.md b/.compozy/tasks/qa-rounds/reviews-001/issue_010.md deleted file mode 100644 index 77760b4c4..000000000 --- a/.compozy/tasks/qa-rounds/reviews-001/issue_010.md +++ /dev/null @@ -1,27 +0,0 @@ ---- -status: resolved -file: internal/cli/install_test.go -line: 249 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4188693296,nitpick_hash:95f5b525c802 -review_hash: 95f5b525c802 -source_review_id: "4188693296" -source_review_submitted_at: "2026-04-28T12:24:35Z" ---- - -# Issue 010: Consider using the returned model from Update() for idiomatic Bubble Tea usage. -## Review Comment - -While the current code works because `installWizardModel` uses pointer receivers and mutates in place, idiomatic Bubble Tea code typically uses the returned model value. This makes the test more resilient to future refactoring. - -## Triage - -- Decision: `VALID` -- Notes: `TestInstallWizardModelTransitions` calls `Update` and discards the returned Bubble Tea model in several places. It passes today because the model mutates in place, but the test would mask a future value-style refactor. Fix by assigning the returned model through a typed test helper. - -## Resolution - -- Added a typed `updateInstallWizardModel` helper and assigned the returned model for each wizard transition. -- Removed underscore-discarded command returns and asserted command behavior where relevant. -- Verified through targeted CLI tests and `make verify`. diff --git a/.compozy/tasks/qa-rounds/reviews-001/issue_011.md b/.compozy/tasks/qa-rounds/reviews-001/issue_011.md deleted file mode 100644 index 940e7dd0a..000000000 --- a/.compozy/tasks/qa-rounds/reviews-001/issue_011.md +++ /dev/null @@ -1,71 +0,0 @@ ---- -status: resolved -file: internal/cli/skill_commands.go -line: 130 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM5-IGMV,comment:PRRC_kwDOR5y4QM67_zdM ---- - -# Issue 011: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Normalize skill name before daemon/local dispatch.** - -In workspace mode, raw `name` is passed through; in local mode it gets trimmed by `findSkillByName`. This creates inconsistent behavior for inputs with surrounding whitespace. - - -
-Suggested fix - -```diff - func runSkillViewCommand(cmd *cobra.Command, deps commandDeps, name string, filePath string) error { -+ name = strings.TrimSpace(name) -+ if name == "" { -+ return errors.New("skill name is required") -+ } - workspaceRef, err := skillWorkspaceFlag(cmd) - if err != nil { - return err - } - if workspaceRef != "" { - return runDaemonSkillViewCommand(cmd, deps, name, filePath, workspaceRef) - } - return runLocalSkillViewCommand(cmd, deps, name, filePath) - } -``` -
- -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/cli/skill_commands.go` around lines 121 - 130, Trim/normalize the -incoming skill name before deciding daemon vs local handling so both paths see -the same value: in runSkillViewCommand call the same normalization used by -findSkillByName (e.g., strings.TrimSpace(name) or the project’s canonical -normalize function) and then pass the normalized name into -runDaemonSkillViewCommand and runLocalSkillViewCommand; update references to the -local variable (name) in runSkillViewCommand so both branches use the normalized -skill name. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: `runSkillViewCommand` passes the raw CLI argument to daemon mode, while local mode trims inside `findSkillByName`. A padded skill name therefore behaves differently depending on `--workspace`. Fix by trimming and validating the name once before choosing daemon or local execution. - -## Resolution - -- Normalized and validated the skill name once before dispatching local or daemon skill view mode. -- Added daemon CLI coverage for padded skill names. -- Verified through targeted CLI tests and `make verify`. diff --git a/.compozy/tasks/qa-rounds/reviews-001/issue_012.md b/.compozy/tasks/qa-rounds/reviews-001/issue_012.md deleted file mode 100644 index 44c5740c7..000000000 --- a/.compozy/tasks/qa-rounds/reviews-001/issue_012.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -status: resolved -file: internal/cli/skill_workspace.go -line: 269 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM5-IGMX,comment:PRRC_kwDOR5y4QM67_zdR ---- - -# Issue 012: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Normalize `Source` in `skillInfoItemFromRecord`.** - -`skillListItemsFromRecords` trims `record.Source`, but `skillInfoItemFromRecord` returns it raw. This can produce inconsistent `skill list` vs `skill info` output for the same daemon record. - - -
-Suggested fix - -```diff - func skillInfoItemFromRecord(record SkillRecord) skillInfoItem { - return skillInfoItem{ - Name: record.Name, - Description: record.Description, - Version: record.Version, -- Source: record.Source, -+ Source: strings.TrimSpace(record.Source), - Path: record.Dir, - Enabled: record.Enabled, - Metadata: cloneMetadata(record.Metadata), - } - } -``` -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion -func skillInfoItemFromRecord(record SkillRecord) skillInfoItem { - return skillInfoItem{ - Name: record.Name, - Description: record.Description, - Version: record.Version, - Source: strings.TrimSpace(record.Source), - Path: record.Dir, - Enabled: record.Enabled, - Metadata: cloneMetadata(record.Metadata), - } -} -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/cli/skill_workspace.go` around lines 263 - 269, The Source field is -returned raw in skillInfoItemFromRecord causing inconsistent output versus -skillListItemsFromRecords which trims record.Source; update -skillInfoItemFromRecord to normalize Source the same way (e.g., use -strings.TrimSpace on record.Source) before assigning to skillInfoItem.Source so -both skillInfoItemFromRecord and skillListItemsFromRecords produce consistent -Source values. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: `skillListItemsFromRecords` normalizes `record.Source`, but `skillInfoItemFromRecord` returns it raw. This can make `skill list --workspace` and `skill info --workspace` disagree for the same daemon payload. Fix by trimming the source in the info conversion path and pin it with daemon CLI coverage. - -## Resolution - -- Trimmed daemon skill source values in the `skill info` conversion path. -- Updated daemon CLI coverage to prove list/info source normalization stays consistent. -- Verified through targeted CLI tests and `make verify`. diff --git a/.compozy/tasks/qa-rounds/reviews-002/_meta.md b/.compozy/tasks/qa-rounds/reviews-002/_meta.md deleted file mode 100644 index b13265745..000000000 --- a/.compozy/tasks/qa-rounds/reviews-002/_meta.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -provider: coderabbit -pr: "82" -round: 2 -created_at: 2026-04-28T13:43:49.223864Z ---- - -## Summary -- Total: 4 -- Resolved: 0 -- Unresolved: 4 diff --git a/.compozy/tasks/qa-rounds/reviews-002/issue_001.md b/.compozy/tasks/qa-rounds/reviews-002/issue_001.md deleted file mode 100644 index 0ff7035b0..000000000 --- a/.compozy/tasks/qa-rounds/reviews-002/issue_001.md +++ /dev/null @@ -1,30 +0,0 @@ ---- -status: resolved -file: internal/api/core/error_paths_test.go -line: 236 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4189206979,nitpick_hash:a7454981cf39 -review_hash: a7454981cf39 -source_review_id: "4189206979" -source_review_submitted_at: "2026-04-28T13:29:54Z" ---- - -# Issue 001: Wrap this case in t.Run("Should ...") to match test conventions. -## Review Comment - -The test does not use the required subtest pattern. Per coding guidelines, `**/*_test.go` tests must use `t.Run("Should ...")` subtests with `t.Parallel()` by default. - -## Triage - -- Decision: `VALID` -- Notes: - - `TestListAgentsWorkspaceResolverUnavailable` is a standalone test body with no `t.Run("Should ...")` subtest. - - This violates the AGH test-shape rule that each case must live inside a `Should ...` subtest with `t.Parallel()` by default. - - Fix approach: wrap the current assertions in a `t.Run("Should return service unavailable when workspace resolver is missing", ...)` subtest and keep the subtest parallel because it does not use `t.Setenv` or shared mutable state. - -## Resolution - -- Wrapped the workspace-resolver-unavailable assertions in a `Should ...` subtest and kept both the parent test and subtest parallel. -- Verified with targeted `go test -race ./internal/api/core -run 'TestListAgentsWorkspaceResolverUnavailable|TestBaseHandlersNetworkChannelMessagesPreserveRemoteAuthors' -count=1`. -- Verified the repository gate with `make verify` after code changes. diff --git a/.compozy/tasks/qa-rounds/reviews-002/issue_002.md b/.compozy/tasks/qa-rounds/reviews-002/issue_002.md deleted file mode 100644 index 86a96977a..000000000 --- a/.compozy/tasks/qa-rounds/reviews-002/issue_002.md +++ /dev/null @@ -1,94 +0,0 @@ ---- -status: resolved -file: internal/api/core/network_details.go -line: 217 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM5-JRP8,comment:PRRC_kwDOR5y4QM68BYWu ---- - -# Issue 002: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Cursor pages can incorrectly 404 history-only channels.** - -An empty `rawMessages` slice does not mean the channel is missing once `before`/`after` is in play. For history-only rooms with no live sessions/peers/metadata, paging past the last visible item will hit this branch and return 404 instead of `200` with an empty page. - -
-Suggested fix - -```diff -- if len(rawMessages) == 0 && !networkChannelExists(sessions, peers, metadata, channel) { -+ if len(rawMessages) == 0 && -+ strings.TrimSpace(query.BeforeMessageID) == "" && -+ strings.TrimSpace(query.AfterMessageID) == "" && -+ !networkChannelExists(sessions, peers, metadata, channel) { - notFoundErr := fmt.Errorf("%w: %s", errNetworkChannelNotFound, channel) - h.respondError(c, http.StatusNotFound, notFoundErr) - return - } -``` -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion - if len(rawMessages) == 0 && - strings.TrimSpace(query.BeforeMessageID) == "" && - strings.TrimSpace(query.AfterMessageID) == "" && - !networkChannelExists(sessions, peers, metadata, channel) { - notFoundErr := fmt.Errorf("%w: %s", errNetworkChannelNotFound, channel) - h.respondError(c, http.StatusNotFound, notFoundErr) - return - } -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/api/core/network_details.go` around lines 213 - 217, The current -check treats an empty rawMessages slice as "not found" and calls h.respondError, -which incorrectly 404s history-only channels when paging (using before/after); -instead, first call networkChannelExists(sessions, peers, metadata, channel) and -only return the 404 via h.respondError if the channel truly does not exist; if -the channel exists but rawMessages is empty (especially when before/after is -set), return a 200 empty page response (use the same success response path that -would return an empty list) so paging past the end yields a 200 with no messages -rather than a 404. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `NetworkChannelMessages` currently treats `len(rawMessages) == 0` plus no live session/peer/metadata as a missing channel. - - With cursor pagination, the store can legitimately return an empty slice for a history-only channel after the caller pages past the last visible item; in that case the handler should return `200` with an empty `messages` array, not `404`. - - Fix approach: only run the missing-channel 404 check for non-cursor requests. Cursor requests with an empty page will continue through the normal success response path. - - Additional regression coverage in `internal/api/core/network_test.go` is required to prove the behavior because the scoped production file has no local test cases. - -## Resolution - -- Updated `NetworkChannelMessages` so the missing-channel 404 branch applies only to non-cursor requests. -- Added regression coverage that proves an empty cursor page on a history-only channel returns `200` with no messages. -- Verified with targeted `go test -race ./internal/api/core -run 'TestListAgentsWorkspaceResolverUnavailable|TestBaseHandlersNetworkChannelMessagesPreserveRemoteAuthors' -count=1`. -- Verified the repository gate with `make verify` after code changes. diff --git a/.compozy/tasks/qa-rounds/reviews-002/issue_003.md b/.compozy/tasks/qa-rounds/reviews-002/issue_003.md deleted file mode 100644 index 4ddec4de4..000000000 --- a/.compozy/tasks/qa-rounds/reviews-002/issue_003.md +++ /dev/null @@ -1,95 +0,0 @@ ---- -status: resolved -file: internal/api/core/tasks_terminal_integration_test.go -line: 219 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM5-JRQZ,comment:PRRC_kwDOR5y4QM68BYXZ ---- - -# Issue 003: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Assert empty `payload.Run.Result` on fail/cancel paths.** - -When `wantResultJSON` is empty, this block skips any result assertion. A regression that leaks stale result data in non-complete responses would still pass. - - - -
-Patch suggestion - -```diff -- if tc.wantResultJSON != "" { -- assertRawJSONEqual(t, "payload.Run.Result", payload.Run.Result, tc.wantResultJSON) -- } -+ if tc.wantResultJSON == "" { -+ if len(payload.Run.Result) != 0 { -+ t.Fatalf("payload.Run.Result = %s, want empty result", string(payload.Run.Result)) -+ } -+ } else { -+ assertRawJSONEqual(t, "payload.Run.Result", payload.Run.Result, tc.wantResultJSON) -+ } -``` -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion - if tc.wantResultJSON == "" { - if len(payload.Run.Result) != 0 { - t.Fatalf("payload.Run.Result = %s, want empty result", string(payload.Run.Result)) - } - } else { - assertRawJSONEqual(t, "payload.Run.Result", payload.Run.Result, tc.wantResultJSON) - } -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/api/core/tasks_terminal_integration_test.go` around lines 217 - 219, -The test currently only asserts payload.Run.Result when tc.wantResultJSON != "", -which lets stale results slip through on fail/cancel paths; modify the block -around tc.wantResultJSON to add an explicit assertion for the empty-case so that -when tc.wantResultJSON == "" the test asserts payload.Run.Result is empty (e.g., -assert.Empty or assert.Equal(t, "", payload.Run.Result) / assert.Nil as -appropriate) instead of skipping; keep using the same variables -(tc.wantResultJSON, payload.Run.Result) and the existing assert helpers -(assertRawJSONEqual) for the non-empty branch. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `TestTaskRunTerminalHandlersPreserveHistoricalChannelBindingsIntegration` asserts `payload.Run.Result` only when `wantResultJSON` is non-empty. - - Fail/cancel cases use an empty expected result, so a stale non-empty result payload could leak through without test coverage. - - Fix approach: add an explicit empty-result assertion when `wantResultJSON == ""` and keep the existing `assertRawJSONEqual` path for non-empty results. - -## Resolution - -- Added an explicit empty-result assertion for cases where `wantResultJSON == ""`. -- Preserved `assertRawJSONEqual` for non-empty result payloads. -- Verified with targeted `go test -race -tags integration ./internal/api/core -run TestTaskRunTerminalHandlersPreserveHistoricalChannelBindingsIntegration -count=1`. -- Verified the repository gate with `make verify` after code changes. diff --git a/.compozy/tasks/qa-rounds/reviews-002/issue_004.md b/.compozy/tasks/qa-rounds/reviews-002/issue_004.md deleted file mode 100644 index 822f8e0ed..000000000 --- a/.compozy/tasks/qa-rounds/reviews-002/issue_004.md +++ /dev/null @@ -1,33 +0,0 @@ ---- -status: resolved -file: internal/cli/agent.go -line: 39 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4189206979,nitpick_hash:e5724ed5c1bb -review_hash: e5724ed5c1bb -source_review_id: "4189206979" -source_review_submitted_at: "2026-04-28T13:29:54Z" ---- - -# Issue 004: Optional: deduplicate workspace query construction between commands. -## Review Comment - -The same `commandWorkspaceFlag` + `AgentQuery` block appears twice. A tiny helper would keep command handlers slimmer and reduce drift later. - -Also applies to: 73-77 - -## Triage - -- Decision: `VALID` -- Notes: - - Both `agent list` and `agent info` repeat the same `commandWorkspaceFlag` and `AgentQuery{Workspace: workspace}` construction. - - The duplication is small but real; centralizing it reduces drift at the CLI boundary where workspace flag validation must stay consistent. - - Fix approach: add a small helper that returns `AgentQuery` from the command workspace flag and use it in both handlers without changing command behavior. - -## Resolution - -- Added `agentQueryFromCommand` to centralize workspace flag parsing and `AgentQuery` construction. -- Updated `agent list` and `agent info` to use the shared helper without changing behavior. -- Verified with targeted `go test -race ./internal/cli -run 'TestAgentListAndInfoCommands|TestAgentCommandsPassWorkspaceQuery|TestAgentWorkspaceFlagRejectsEmptyExplicitValue' -count=1`. -- Verified the repository gate with `make verify` after code changes. diff --git a/.compozy/tasks/qa-rounds/reviews-003/_meta.md b/.compozy/tasks/qa-rounds/reviews-003/_meta.md deleted file mode 100644 index e65caff2a..000000000 --- a/.compozy/tasks/qa-rounds/reviews-003/_meta.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -provider: coderabbit -pr: "82" -round: 3 -created_at: 2026-04-28T14:17:15.419963Z ---- - -## Summary -- Total: 3 -- Resolved: 0 -- Unresolved: 3 diff --git a/.compozy/tasks/qa-rounds/reviews-003/issue_001.md b/.compozy/tasks/qa-rounds/reviews-003/issue_001.md deleted file mode 100644 index 20fdad2b1..000000000 --- a/.compozy/tasks/qa-rounds/reviews-003/issue_001.md +++ /dev/null @@ -1,84 +0,0 @@ ---- -status: resolved -file: internal/api/core/network_details.go -line: 216 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM5-KLdi,comment:PRRC_kwDOR5y4QM68CnK_ ---- - -# Issue 001: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Paginated empty pages now mask unknown channels.** - -With any `before`/`after` cursor, this branch skips the 404 even when there is no metadata, no live peer/session, and no stored history. That makes `/network/channels//messages?after=...` indistinguishable from “past end of history”. - - -
-Suggested direction - -```diff -- if len(rawMessages) == 0 && -- strings.TrimSpace(query.BeforeMessageID) == "" && -- strings.TrimSpace(query.AfterMessageID) == "" && -- !networkChannelExists(sessions, peers, metadata, channel) { -- notFoundErr := fmt.Errorf("%w: %s", errNetworkChannelNotFound, channel) -- h.respondError(c, http.StatusNotFound, notFoundErr) -- return -- } -+ if len(rawMessages) == 0 && !networkChannelExists(sessions, peers, metadata, channel) { -+ history, err := networkStore.ListNetworkMessages(c.Request.Context(), store.NetworkMessageQuery{ -+ Channel: channel, -+ Limit: 1, -+ }) -+ if err != nil { -+ h.respondNetworkMessageError(c, err) -+ return -+ } -+ if len(history) == 0 { -+ notFoundErr := fmt.Errorf("%w: %s", errNetworkChannelNotFound, channel) -+ h.respondError(c, http.StatusNotFound, notFoundErr) -+ return -+ } -+ } -``` -
- -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/api/core/network_details.go` around lines 213 - 216, The current -conditional lets requests with before/after cursors fall through and mask -unknown channels; ensure unknown channels yield a 404 regardless of pagination -cursors by checking channel existence first: call networkChannelExists(sessions, -peers, metadata, channel) and if it returns false and len(rawMessages) == 0 then -return the 404 error immediately (i.e., move or add the existence check out of -the combined if that includes query.BeforeMessageID/query.AfterMessageID so that -channel non-existence is handled separately from “past end of history” -pagination). -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `NetworkChannelMessages` currently suppresses the unknown-channel 404 whenever `before` or `after` is present. - - Once raw timeline loading is corrected to fetch the complete unpaginated channel history, the handler can distinguish an unknown channel from an empty page by checking `len(rawMessages) == 0 && !networkChannelExists(...)` independently of cursor inputs. - - Fix: remove the cursor guards from the not-found branch and add regression coverage for a cursor request against a channel with no metadata, peers, sessions, or stored history. - -## Resolution - -- Removed the cursor guards from the channel not-found branch in `NetworkChannelMessages`. -- Added regression coverage for a paginated request against a channel with no metadata, peers, sessions, or stored history returning 404 instead of an empty 200 page. -- Verified with `go test -race ./internal/api/core -count=1` and `make verify`. diff --git a/.compozy/tasks/qa-rounds/reviews-003/issue_002.md b/.compozy/tasks/qa-rounds/reviews-003/issue_002.md deleted file mode 100644 index d8a08f7bc..000000000 --- a/.compozy/tasks/qa-rounds/reviews-003/issue_002.md +++ /dev/null @@ -1,149 +0,0 @@ ---- -status: resolved -file: internal/api/core/network_details.go -line: 1235 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM5-KLdo,comment:PRRC_kwDOR5y4QM68CnLG ---- - -# Issue 002: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -
-🧩 Analysis chain - -🏁 Script executed: - -```shell -#!/bin/bash -set -euo pipefail - -echo "ListNetworkMessages declarations/implementations:" -rg -n -C4 --type=go '\bListNetworkMessages\s*\(' - -echo -echo "NetworkMessageQuery cursor handling:" -rg -n -C4 --type=go 'BeforeMessageID|AfterMessageID|Limit' -``` - -Repository: compozy/agh - -Length of output: 50368 - ---- - -🏁 Script executed: - -```shell -# First, examine the store implementation to see how it handles cursors with Limit == 0 -sed -n '75,150p' internal/store/globaldb/global_db_network_messages.go - -# Then check the callers of listTimelineRawMessages in network_details.go -rg -n 'listTimelineRawMessages' internal/api/core/network_details.go -``` - -Repository: compozy/agh - -Length of output: 2121 - ---- - -🏁 Script executed: - -```shell -# Get the full buildNetworkMessageListQuery implementation -sed -n '125,220p' internal/store/globaldb/global_db_network_messages.go - -# Get the calling context and complete listTimelineRawMessages -sed -n '1200,1240p' internal/api/core/network_details.go -``` - -Repository: compozy/agh - -Length of output: 4330 - ---- - -🏁 Script executed: - -```shell -# Verify the store applies cursors regardless of Limit value -sed -n '145,165p' internal/store/globaldb/global_db_network_messages.go - -# Check if there are any other places where cursors are cleared before raw fetches -rg -n 'BeforeMessageID\s*=|AfterMessageID\s*=' internal/api/core/network_details.go -``` - -Repository: compozy/agh - -Length of output: 943 - ---- - -🏁 Script executed: - -```shell -# Look at the full switch statement in buildNetworkMessageListQuery -sed -n '145,175p' internal/store/globaldb/global_db_network_messages.go - -# Verify that the tests DO check for empty cursors in raw fetches -grep -A5 "want empty raw" internal/api/core/network_test.go | head -20 -``` - -Repository: compozy/agh - -Length of output: 1817 - ---- - - - -
- -**Strip cursors in `listTimelineRawMessages` to prevent store-side filtering before visibility coalescing.** - -The `listTimelineRawMessages` function sets `Limit = 0` but does not clear `BeforeMessageID` / `AfterMessageID`. The store's `buildNetworkMessageListQuery` applies cursor filters regardless of Limit value, which can split episodes or drop cursor anchors before the handler applies visibility filtering and coalescing. Add: - -``` -rawQuery.BeforeMessageID = "" -rawQuery.AfterMessageID = "" -``` - -This ensures raw fetches retrieve the complete unfiltered dataset for handler-side pagination after visibility operations. - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/api/core/network_details.go` around lines 1228 - 1235, The function -listTimelineRawMessages sets rawQuery.Limit = 0 but leaves cursor fields set, -which lets the store apply BeforeMessageID/AfterMessageID filtering prematurely; -modify listTimelineRawMessages to also clear rawQuery.BeforeMessageID and -rawQuery.AfterMessageID (set them to empty strings) before calling -networkStore.ListNetworkMessages so the store returns the full unfiltered -dataset for handler-side visibility coalescing and pagination. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `listTimelineRawMessages` clears `Limit` but leaves `BeforeMessageID` and `AfterMessageID` intact. - - The global store applies cursor filters before returning rows regardless of `Limit`, which can remove the cursor anchor or split raw presence episodes before handler-side visibility filtering and coalescing. - - Fix: clear both cursor fields on the raw query before calling `ListNetworkMessages`, while keeping the original query for handler-side pagination. - -## Resolution - -- Updated `listTimelineRawMessages` to clear `BeforeMessageID` and `AfterMessageID` before calling `ListNetworkMessages`. -- Updated channel and peer timeline tests so raw store fetches must receive empty cursors while handler-side pagination still uses the original request query. -- Verified with `go test -race ./internal/api/core -count=1` and `make verify`. diff --git a/.compozy/tasks/qa-rounds/reviews-003/issue_003.md b/.compozy/tasks/qa-rounds/reviews-003/issue_003.md deleted file mode 100644 index 3133f51a6..000000000 --- a/.compozy/tasks/qa-rounds/reviews-003/issue_003.md +++ /dev/null @@ -1,106 +0,0 @@ ---- -status: resolved -file: internal/api/core/network_details.go -line: 1445 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM5-KLds,comment:PRRC_kwDOR5y4QM68CnLM ---- - -# Issue 003: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Reject cursors that are not present in the visible timeline.** - -When a `before`/`after` cursor points to a row that was filtered out or coalesced away, `indexNetworkTimelineViewByMessageID` returns `-1` and pagination silently falls back to the first/last page. That returns duplicated/incorrect pages instead of the existing `"message cursor not found"` validation error. - - -
-Suggested fix - -```diff --func paginateNetworkTimelineViews( -+func paginateNetworkTimelineViews( - views []networkTimelineMessageView, - query store.NetworkMessageQuery, --) []networkTimelineMessageView { -+) ([]networkTimelineMessageView, error) { - paginated := views - if before := strings.TrimSpace(query.BeforeMessageID); before != "" { - index := indexNetworkTimelineViewByMessageID(paginated, before) -- if index >= 0 { -- paginated = paginated[:index] -+ if index < 0 { -+ return nil, sql.ErrNoRows - } -+ paginated = paginated[:index] - } - if after := strings.TrimSpace(query.AfterMessageID); after != "" { - index := indexNetworkTimelineViewByMessageID(paginated, after) -- if index >= 0 { -- paginated = paginated[index+1:] -+ if index < 0 { -+ return nil, sql.ErrNoRows - } -+ paginated = paginated[index+1:] - } - if query.Limit <= 0 || len(paginated) <= query.Limit { -- return paginated -+ return paginated, nil - } - if strings.TrimSpace(query.BeforeMessageID) != "" { -- return paginated[len(paginated)-query.Limit:] -+ return paginated[len(paginated)-query.Limit:], nil - } -- return paginated[:query.Limit] -+ return paginated[:query.Limit], nil - } -``` - -`networkTimelinePayloads` should then return an error and let `respondNetworkMessageError(...)` keep the existing 400 mapping. -
- - -Also applies to: 1456-1464 - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/api/core/network_details.go` around lines 1435 - 1445, The -pagination code currently silences missing cursors by treating -indexNetworkTimelineViewByMessageID(...)= -1 as "not found" and continues; -instead, when indexNetworkTimelineViewByMessageID returns -1 for either -query.BeforeMessageID or query.AfterMessageID you should return a validation -error from networkTimelinePayloads and propagate it so -respondNetworkMessageError can map it to a 400. Concretely, in the blocks that -handle BeforeMessageID and AfterMessageID, check the index result and if index -== -1 return a descriptive error (e.g., "message cursor not found") from -networkTimelinePayloads rather than trimming/ slicing paginated; ensure the -caller uses respondNetworkMessageError to convert that error to the existing 400 -response. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `paginateNetworkTimelineViews` treats a missing cursor index as a no-op and returns the first or last page instead of rejecting the request. - - After public/direct visibility filtering and presence coalescing, a raw cursor can point at a row that is not present in the visible timeline; silently ignoring that cursor duplicates pages and masks invalid client state. - - Fix: make timeline pagination return an error when `before` or `after` is absent from the visible/coalesced views, propagate it through the channel and peer handlers, and rely on `respondNetworkMessageError` for the existing 400 "message cursor not found" response. - -## Resolution - -- Changed visible timeline pagination to return a cursor-not-found error when `before` or `after` is absent from the visible/coalesced views. -- Propagated that error through both channel and peer message handlers so `respondNetworkMessageError` preserves the existing 400 response. -- Added regression coverage for hidden directed-message cursors and coalesced-away presence cursors returning `message cursor not found`. -- Verified with `go test -race ./internal/api/core -count=1` and `make verify`. diff --git a/.compozy/tasks/release-adjustments/reviews-001/_meta.md b/.compozy/tasks/release-adjustments/reviews-001/_meta.md deleted file mode 100644 index 2c5c3120f..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/_meta.md +++ /dev/null @@ -1,11 +0,0 @@ ---- -provider: coderabbit -pr: "66" -round: 1 -created_at: 2026-04-24T17:37:31.308161Z ---- - -## Summary -- Total: 23 -- Resolved: 0 -- Unresolved: 23 diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_001.md b/.compozy/tasks/release-adjustments/reviews-001/issue_001.md deleted file mode 100644 index b7ea40326..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_001.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -status: resolved -file: go.mod -line: 25 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4172207861,nitpick_hash:ebda09433582 -review_hash: ebda09433582 -source_review_id: "4172207861" -source_review_submitted_at: "2026-04-24T17:07:23Z" ---- - -# Issue 001: Remove the unused github.com/pelletier/go-toml/v2 dependency. -## Review Comment - -The v2.2.4 version is not imported anywhere in the codebase, making it an orphaned direct dependency that bloats go.mod. Only the v1 version is actively used (in `persistence.go` and `transport_parity_integration_test.go`). Remove `github.com/pelletier/go-toml/v2 v2.2.4` from go.mod. - -As a secondary improvement, consider whether you can consolidate the two TOML libraries (`github.com/BurntSushi/toml` and `github.com/pelletier/go-toml` v1) to reduce the overall dependency footprint. - -## Triage - -- Decision: `INVALID` -- Notes: - - Current code imports `github.com/pelletier/go-toml/v2/unstable` in `internal/config/persistence.go`, so `github.com/pelletier/go-toml/v2 v2.2.4` is not an orphaned dependency. - - Removing the v2 requirement would break the config persistence code that relies on the v2 unstable AST package. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_002.md b/.compozy/tasks/release-adjustments/reviews-001/issue_002.md deleted file mode 100644 index 4ea08375e..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_002.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -status: resolved -file: internal/acp/client_test.go -line: 360 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4172207861,nitpick_hash:9d13db1d0d63 -review_hash: 9d13db1d0d63 -source_review_id: "4172207861" -source_review_submitted_at: "2026-04-24T17:07:23Z" ---- - -# Issue 002: Wrap this in the repo’s required Should... subtest pattern. -## Review Comment - -The assertions look good, but this new test skips both `t.Run("Should...")` and `t.Parallel()` even though it is independent. - -As per coding guidelines, "MUST use t.Run("Should...") pattern for ALL test cases", "Use table-driven tests with subtests (`t.Run`) as default pattern for Go tests", and "Add `t.Parallel()` for independent subtests in Go tests". - -## Triage - -- Decision: `VALID` -- Notes: - - `TestPromptActivityReporterReportsWhilePromptIsInFlight` currently runs assertions directly in the top-level test body and does not use the required `t.Run("Should...")` subtest wrapper. - - The scenario is independent, so the fix is to move the existing assertions into a named `Should...` subtest with `t.Parallel()`. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_003.md b/.compozy/tasks/release-adjustments/reviews-001/issue_003.md deleted file mode 100644 index a129de70d..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_003.md +++ /dev/null @@ -1,78 +0,0 @@ ---- -status: resolved -file: internal/acp/handlers_test.go -line: 1147 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59dk02,comment:PRRC_kwDOR5y4QM67HMV3 ---- - -# Issue 003: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Make the new negative-heartbeat case a named subtest with a specific error assertion.** - -This currently only checks non-nil error, so an unrelated validation failure would still pass. - - -
-✅ Suggested test tightening - -```diff -- if err := (PromptRequest{ -- TurnID: "turn-negative-heartbeat", -- Message: "hello", -- ActivityHeartbeatInterval: -time.Second, -- }).Validate(); err == nil { -- t.Fatal("PromptRequest.Validate(negative heartbeat) error = nil, want validation error") -- } -+ t.Run("ShouldRejectNegativeHeartbeatInterval", func(t *testing.T) { -+ t.Parallel() -+ -+ err := (PromptRequest{ -+ TurnID: "turn-negative-heartbeat", -+ Message: "hello", -+ ActivityHeartbeatInterval: -time.Second, -+ }).Validate() -+ if err == nil { -+ t.Fatal("PromptRequest.Validate(negative heartbeat) error = nil, want validation error") -+ } -+ if !strings.Contains(err.Error(), "heartbeat") { -+ t.Fatalf("PromptRequest.Validate(negative heartbeat) error = %v, want heartbeat-specific validation", err) -+ } -+ }) -``` -
- -As per coding guidelines, `**/*_test.go`: `MUST use t.Run("Should...") pattern for ALL test cases` and `MUST have specific error assertions (ErrorContains, ErrorAs)`. - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/acp/handlers_test.go` around lines 1141 - 1147, Convert the -anonymous negative-heartbeat check into a named subtest (e.g., -t.Run("ShouldRejectNegativeHeartbeat", func(t *testing.T) { ... })) and call -PromptRequest{TurnID:"turn-negative-heartbeat", Message:"hello", -ActivityHeartbeatInterval:-time.Second}.Validate() inside it; then assert the -returned error specifically using ErrorContains/ErrorAs (for example -require.ErrorContains(t, err, "heartbeat") or errors.Is/As as appropriate) -instead of only checking err != nil so the test ensures the validation failed -for the negative ActivityHeartbeatInterval via PromptRequest.Validate. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `TestAccessorsAndValidationHelpers` validates the negative heartbeat case inline and only checks for a non-nil error. - - A different validation failure could satisfy the current assertion, so the fix is a named `Should...` subtest plus a heartbeat-specific error assertion. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_004.md b/.compozy/tasks/release-adjustments/reviews-001/issue_004.md deleted file mode 100644 index fbc9d4442..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_004.md +++ /dev/null @@ -1,50 +0,0 @@ ---- -status: resolved -file: internal/api/contract/contract.go -line: 67 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59dk08,comment:PRRC_kwDOR5y4QM67HMV- ---- - -# Issue 004: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Keep zero-valued runtime metrics in the JSON payload.** - -`omitempty` on `idle_seconds`, `elapsed_seconds`, and the iteration counters drops legitimate `0` values. Clients then cannot distinguish “0 seconds / 0 iterations” from “field not populated”. Either remove `omitempty` for those metrics or make them pointers if absence is intentional. - - - -Also applies to: 188-205 - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/api/contract/contract.go` around lines 53 - 67, The runtime metric -fields in RuntimeActivityPayload (IdleSeconds, ElapsedSeconds, IterationCurrent, -IterationMax) are using `omitempty`, which drops legitimate zero values; update -the struct so clients can distinguish 0 from absent by either removing -`omitempty` from the json tags for those fields or changing their types to -pointers (e.g., *int, *int64) and ensuring construction code sets nil vs zero -appropriately; apply the same change to the corresponding fields in the other -related struct referenced around lines 188-205 so both payloads preserve -zero-valued metrics. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `RuntimeActivityPayload` and `SessionActivityHealthPayload` use `omitempty` on zero-valued runtime metrics, which drops meaningful `0` values from JSON. - - The fix is to remove `omitempty` from `iteration_current`, `iteration_max`, `idle_seconds`, and `elapsed_seconds`, and add JSON shape coverage for zero metrics. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_005.md b/.compozy/tasks/release-adjustments/reviews-001/issue_005.md deleted file mode 100644 index 9371593e1..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_005.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -status: resolved -file: internal/api/core/coverage_helpers_test.go -line: 471 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4172207861,nitpick_hash:17d6b9529329 -review_hash: 17d6b9529329 -source_review_id: "4172207861" -source_review_submitted_at: "2026-04-24T17:07:23Z" ---- - -# Issue 005: Wrap this new test case in t.Run("Should...") to match test conventions. -## Review Comment - -The assertions are good, but this new test path should follow the repo’s required subtest naming pattern. - -As per coding guidelines "Use table-driven tests with subtests (`t.Run`) as default pattern for Go tests" and "MUST use t.Run(\"Should...\") pattern for ALL test cases". - -## Triage - -- Decision: `VALID` -- Notes: - - `TestObserveHealthPayloadIncludesRuntimeActivity` currently runs directly in the top-level test body. - - The fix is to wrap the scenario in a `Should...` subtest and keep the independent subtest parallel. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_006.md b/.compozy/tasks/release-adjustments/reviews-001/issue_006.md deleted file mode 100644 index d1a581f4d..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_006.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -status: resolved -file: internal/api/httpapi/handlers_test.go -line: 1339 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4172207861,nitpick_hash:8478231e8896 -review_hash: 8478231e8896 -source_review_id: "4172207861" -source_review_submitted_at: "2026-04-24T17:07:23Z" ---- - -# Issue 006: Use a t.Run("Should...") scenario wrapper for this test case. -## Review Comment - -The behavioral coverage is strong; please wrap this single scenario in the required subtest style for consistency. - -As per coding guidelines "Use table-driven tests with subtests (`t.Run`) as default pattern for Go tests" and "MUST use t.Run(\"Should...\") pattern for ALL test cases". - -## Triage - -- Decision: `VALID` -- Notes: - - `TestPromptSessionHandlerDrainsPromptAfterRequestCancellation` in `internal/api/httpapi/handlers_test.go` has a single scenario without the required `Should...` subtest wrapper. - - The fix is to wrap the existing behavior in a named subtest. The test uses request cancellation and explicit goroutine synchronization, so the logic will remain unchanged. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_007.md b/.compozy/tasks/release-adjustments/reviews-001/issue_007.md deleted file mode 100644 index 8192ff407..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_007.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -status: resolved -file: internal/api/udsapi/prompt.go -line: 78 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59dk0_,comment:PRRC_kwDOR5y4QM67HMWD ---- - -# Issue 007: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Give the detached drain goroutine an explicit shutdown bound.** - -This is now a fire-and-forget goroutine, and `cancelPrompt()` is deferred until `events` closes. If the prompt stream wedges, the goroutine and detached prompt can survive indefinitely after the client disconnects. Tie the drain to a bounded context/timeout and track it with handler shutdown. - -As per coding guidelines, "Every goroutine must have explicit ownership and shutdown via `context.Context` cancellation" and "No fire-and-forget goroutines — track with `sync.WaitGroup` or equivalent". - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/api/udsapi/prompt.go` around lines 73 - 78, The detached goroutine -in Handlers.drainPromptEventsAsync currently fires-and-forgets and only defers -cancelPrompt(), which can hang if events never close; change it to create a -bounded child context (e.g., context.WithTimeout or context.WithCancel derived -from the handler's lifecycle context) and register the goroutine with the -handler's shutdown tracking (e.g., h.wg.Add(1) / h.wg.Done() or equivalent) so -the goroutine is owned and can be cancelled on handler shutdown; inside the -goroutine select on the child ctx.Done() and the events channel and ensure -cancelPrompt() is called and wg.Done() on exit, and remove any untracked -anonymous spawn so drainPromptEvents is not left running indefinitely. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `internal/api/udsapi/prompt.go` starts a detached goroutine in `drainPromptEventsAsync` without local ownership tracking and without a timeout bound if the events channel wedges. - - The fix is to make the drain goroutine owned by the UDS handlers with a wait group, tie it to stream shutdown, and add a bounded drain context that cancels the prompt on timeout or shutdown. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_008.md b/.compozy/tasks/release-adjustments/reviews-001/issue_008.md deleted file mode 100644 index ea6ba8aec..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_008.md +++ /dev/null @@ -1,139 +0,0 @@ ---- -status: resolved -file: internal/config/config.go -line: 809 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59dk1C,comment:PRRC_kwDOR5y4QM67HMWG ---- - -# Issue 008: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Reject warning thresholds that exceed the timeout.** - -When both `InactivityWarningAfter` and `InactivityTimeout` are set, `warning_after > timeout` is unreachable configuration. The validator currently accepts it, so operators can configure “timeout with no possible prior warning” without feedback. - - -
-Suggested validation - -```diff - func (c SessionSupervisionConfig) Validate() error { - switch { - case c.ActivityHeartbeatInterval <= 0: - return fmt.Errorf( - "session.supervision.activity_heartbeat_interval must be positive: %s", - c.ActivityHeartbeatInterval, - ) - case c.ProgressNotifyInterval < 0: - return fmt.Errorf( - "session.supervision.progress_notify_interval "+ - "must be zero or positive: %s", - c.ProgressNotifyInterval, - ) - case c.InactivityWarningAfter < 0: - return fmt.Errorf( - "session.supervision.inactivity_warning_after "+ - "must be zero or positive: %s", - c.InactivityWarningAfter, - ) - case c.InactivityTimeout < 0: - return fmt.Errorf("session.supervision.inactivity_timeout must be zero or positive: %s", c.InactivityTimeout) -+ case c.InactivityTimeout > 0 && -+ c.InactivityWarningAfter > 0 && -+ c.InactivityWarningAfter > c.InactivityTimeout: -+ return fmt.Errorf( -+ "session.supervision.inactivity_warning_after must be <= session.supervision.inactivity_timeout: %s > %s", -+ c.InactivityWarningAfter, -+ c.InactivityTimeout, -+ ) - case c.TimeoutCancelGrace <= 0: - return fmt.Errorf("session.supervision.timeout_cancel_grace must be positive: %s", c.TimeoutCancelGrace) - default: - return nil - } - } -``` -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion -func (c SessionSupervisionConfig) Validate() error { - switch { - case c.ActivityHeartbeatInterval <= 0: - return fmt.Errorf( - "session.supervision.activity_heartbeat_interval must be positive: %s", - c.ActivityHeartbeatInterval, - ) - case c.ProgressNotifyInterval < 0: - return fmt.Errorf( - "session.supervision.progress_notify_interval "+ - "must be zero or positive: %s", - c.ProgressNotifyInterval, - ) - case c.InactivityWarningAfter < 0: - return fmt.Errorf( - "session.supervision.inactivity_warning_after "+ - "must be zero or positive: %s", - c.InactivityWarningAfter, - ) - case c.InactivityTimeout < 0: - return fmt.Errorf("session.supervision.inactivity_timeout must be zero or positive: %s", c.InactivityTimeout) - case c.InactivityTimeout > 0 && - c.InactivityWarningAfter > 0 && - c.InactivityWarningAfter > c.InactivityTimeout: - return fmt.Errorf( - "session.supervision.inactivity_warning_after must be <= session.supervision.inactivity_timeout: %s > %s", - c.InactivityWarningAfter, - c.InactivityTimeout, - ) - case c.TimeoutCancelGrace <= 0: - return fmt.Errorf("session.supervision.timeout_cancel_grace must be positive: %s", c.TimeoutCancelGrace) - default: - return nil - } -} -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/config/config.go` around lines 783 - 809, The validator in -SessionSupervisionConfig.Validate currently allows InactivityWarningAfter to -exceed InactivityTimeout; update Validate (method -SessionSupervisionConfig.Validate) to check when both c.InactivityWarningAfter > -0 and c.InactivityTimeout > 0 that c.InactivityWarningAfter <= -c.InactivityTimeout and return a clear fmt.Errorf indicating -"session.supervision.inactivity_warning_after must be <= inactivity_timeout" -(include the offending values) if the check fails; keep this check alongside the -existing range checks for c.InactivityWarningAfter and c.InactivityTimeout. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `SessionSupervisionConfig.Validate` rejects negative values but currently accepts `InactivityWarningAfter > InactivityTimeout` when both are enabled. - - That configuration cannot emit a warning before timeout, so the fix is to reject it with a contextual validation error and add coverage. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_009.md b/.compozy/tasks/release-adjustments/reviews-001/issue_009.md deleted file mode 100644 index d294ecbe2..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_009.md +++ /dev/null @@ -1,99 +0,0 @@ ---- -status: resolved -file: internal/daemon/restart.go -line: 819 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59dk1X,comment:PRRC_kwDOR5y4QM67HMWg ---- - -# Issue 009: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Preserve cancellation cause instead of reporting timeout on canceled readiness waits.** - -When `waitCtx.Done()` is triggered by cancellation, this path still performs timeout-style drain handling and can persist a timeout-oriented failure reason after an unnecessary grace delay. - - -
-🔧 Proposed fix - -```diff - case <-waitCtx.Done(): -+ if errors.Is(waitCtx.Err(), context.Canceled) { -+ return h.fail( -+ store, -+ operationID, -+ fmt.Errorf("daemon: replacement daemon readiness canceled: %w", waitCtx.Err()), -+ ) -+ } - if exited, err := waitForProcessExitAfterReadyTimeout( -+ ctx, - processErrCh, - h.cfg.ExitDrainWait, - ); exited { - if err != nil { - return h.fail( -@@ --func waitForProcessExitAfterReadyTimeout(processErrCh <-chan error, grace time.Duration) (bool, error) { -+func waitForProcessExitAfterReadyTimeout( -+ ctx context.Context, -+ processErrCh <-chan error, -+ grace time.Duration, -+) (bool, error) { - select { - case err := <-processErrCh: - return true, err - default: - } -@@ - select { - case err := <-processErrCh: - return true, err - case <-timer.C: - return false, nil -+ case <-ctx.Done(): -+ return false, nil - } - } -``` -
- -As per coding guidelines, `Use select with ctx.Done() in all long-running goroutine loops` and `context.Context as first argument to functions crossing runtime boundaries`. - - -Also applies to: 844-863 - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/daemon/restart.go` around lines 803 - 819, The readiness-wait path -in restart.go is converting context cancellations into timeout-style replacement -failures; update the logic around waitForProcessExitAfterReadyTimeout and the -surrounding readiness wait to detect when waitCtx.Done() is triggered and -preserve/return that cancellation error (or propagate the original -context.Err()) instead of treating it as a timeout and calling h.fail with -errReplacementDaemonExitedBeforeReady; specifically, change the post-wait -handling in the block that calls waitForProcessExitAfterReadyTimeout (and the -similar block around the 844-863 range) to check for context cancellation -(ctx.Err() or the cancellation cause) and return/propagate that immediately, and -ensure any long-running loops use select { case <-waitCtx.Done(): ... } so the -cancellation cause is not lost when invoking h.fail(store, operationID, ...). -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `waitForReady` treats `waitCtx.Done()` as a readiness timeout even when the parent context was canceled. - - The fix is to detect cancellation separately, preserve the cancellation cause, and make the post-timeout process-exit drain context-aware. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_010.md b/.compozy/tasks/release-adjustments/reviews-001/issue_010.md deleted file mode 100644 index 3009d16f1..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_010.md +++ /dev/null @@ -1,160 +0,0 @@ ---- -status: resolved -file: internal/extension/teams_provider_integration_test.go -line: 73 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59dk1b,comment:PRRC_kwDOR5y4QM67HMWm ---- - -# Issue 010: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -
-🧩 Analysis chain - -🏁 Script executed: - -```shell -#!/bin/bash -# Verify whether this integration test file is using "Should..." subtests for test cases. -rg -n --type go 'func Test' internal/extension/teams_provider_integration_test.go -rg -n --type go '\bt\.Run\("Should' internal/extension/teams_provider_integration_test.go -``` - -Repository: compozy/agh - -Length of output: 280 - ---- - -🏁 Script executed: - -```shell -#!/bin/bash -# Check for integration test build tag -head -5 internal/extension/teams_provider_integration_test.go - -# Get line count to understand file size -wc -l internal/extension/teams_provider_integration_test.go - -# Show the context around lines 65-73 with the full test function -sed -n '40,75p' internal/extension/teams_provider_integration_test.go -``` - -Repository: compozy/agh - -Length of output: 1795 - ---- - -🏁 Script executed: - -```shell -#!/bin/bash -# Search for ALL t.Run usages (not just "Should" pattern) -rg -n 't\.Run\(' internal/extension/teams_provider_integration_test.go | head -20 - -# Get context of the second test function to compare structure -sed -n '113,180p' internal/extension/teams_provider_integration_test.go -``` - -Repository: compozy/agh - -Length of output: 3209 - ---- - - - -
- -**Wrap per-instance assertions in `t.Run()` subtests for failure isolation.** - -Lines 65-73 validate each instance sequentially in a plain loop. Per test guidelines, this should use subtests to isolate failures and improve test output clarity. - -
-♻️ Suggested refactor - -```diff -- for _, instanceID := range expectedInstanceIDs { -- state, ok := teamsProviderLastStateForInstance(states, instanceID) -- if !ok { -- t.Fatalf("adapter state for %q missing after wait: %#v", instanceID, states) -- } -- if got, want := state.Status.Normalize(), bridgepkg.BridgeStatusReady; got != want { -- t.Fatalf("adapter state for %q = %q (error=%q), want %q", instanceID, got, state.Error, want) -- } -- } -+ for _, instanceID := range expectedInstanceIDs { -+ instanceID := instanceID -+ t.Run("ShouldReportReadyStateFor_"+instanceID, func(t *testing.T) { -+ state, ok := teamsProviderLastStateForInstance(states, instanceID) -+ if !ok { -+ t.Fatalf("adapter state for %q missing after wait: %#v", instanceID, states) -+ } -+ if got, want := state.Status.Normalize(), bridgepkg.BridgeStatusReady; got != want { -+ t.Fatalf("adapter state for %q = %q (error=%q), want %q", instanceID, got, state.Error, want) -+ } -+ }) -+ } -``` -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion - for _, instanceID := range expectedInstanceIDs { - instanceID := instanceID - t.Run("ShouldReportReadyStateFor_"+instanceID, func(t *testing.T) { - state, ok := teamsProviderLastStateForInstance(states, instanceID) - if !ok { - t.Fatalf("adapter state for %q missing after wait: %#v", instanceID, states) - } - if got, want := state.Status.Normalize(), bridgepkg.BridgeStatusReady; got != want { - t.Fatalf("adapter state for %q = %q (error=%q), want %q", instanceID, got, state.Error, want) - } - }) - } -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/extension/teams_provider_integration_test.go` around lines 65 - 73, -Wrap each per-instance assertion into a t.Run subtest to isolate failures: -iterate expectedInstanceIDs and for each call t.Run(instanceID, func(t -*testing.T) { ... }) and move the existing checks (calling -teamsProviderLastStateForInstance(states, instanceID), verifying ok, and -comparing state.Status.Normalize() to bridgepkg.BridgeStatusReady and -state.Error) inside that subtest; ensure you use the loop variable correctly -(capture instanceID) so t.Fatalf remains inside the subtest to report only that -instance's failure. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `TestTeamsProviderLaunchNegotiatesBridgeRuntime` loops over expected instance IDs and fails from the parent test body, which loses per-instance failure isolation. - - The fix is to wrap each instance assertion in a named `Should...` subtest while preserving the existing readiness wait and conformance checks. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_011.md b/.compozy/tasks/release-adjustments/reviews-001/issue_011.md deleted file mode 100644 index 4487ffb07..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_011.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -status: resolved -file: internal/network/delivery_test.go -line: 525 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59dk1k,comment:PRRC_kwDOR5y4QM67HMWx ---- - -# Issue 011: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Run the retry-delay cases as named subtests.** - -This block is already table-driven, so wrapping each case in `t.Run("Should...")` with `t.Parallel()` will align it with the repo’s test contract and make the failing attempt obvious when it regresses. - -As per coding guidelines, "Use table-driven tests with subtests (`t.Run`) as default pattern for Go tests", "Add `t.Parallel()` to independent subtests in Go tests", and "MUST use t.Run("Should...") pattern for ALL test cases". - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/network/delivery_test.go` around lines 502 - 525, -TestDeliveryCoordinatorRetryDelayUsesExponentialCap should run each table-driven -case as a named subtest: replace the direct loop over cases with per-case t.Run -calls (use the "Should ..." naming pattern) and call t.Parallel() inside each -subtest to run them concurrently; keep using the same -coordinator.retryDelayFor(...) and the same expected tc.want assertions but move -the comparison and t.Fatalf into the subtest body so failures show the specific -attempt name. Ensure you capture tc in the closure to avoid loop variable -capture and preserve the existing test logic and assertions. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `TestDeliveryCoordinatorRetryDelayUsesExponentialCap` is table-driven but evaluates every case in a plain loop. - - The fix is to run each case as an independent `Should...` subtest with `t.Parallel()` and retain the same retry delay assertions. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_012.md b/.compozy/tasks/release-adjustments/reviews-001/issue_012.md deleted file mode 100644 index be78c0e2a..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_012.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -status: resolved -file: internal/network/manager_test.go -line: 563 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59dk1m,comment:PRRC_kwDOR5y4QM67HMWz ---- - -# Issue 012: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Wrap this scenario in a `Should...` subtest.** - -The coverage looks good, but this new case skips the repo’s required `t.Run("Should...")` pattern that the rest of this file already uses. - -As per coding guidelines, "Use table-driven tests with subtests (`t.Run`) as default pattern for Go tests" and "MUST use t.Run("Should...") pattern for ALL test cases". - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/network/manager_test.go` around lines 489 - 563, Wrap the body of -TestManagerAuditsBusyQueueOverflowAsRejected in a t.Run subtest whose name -begins with "Should" (e.g. t.Run("Should audit busy queue overflow as rejected", -func(t *testing.T) { ... })). Move the existing test logic (context setup, -cfg/MAXQueueDepth, newFakeDeliveryPrompter, recordingAuditWriter, NewManager, -JoinChannel, Send calls, waitForCondition, auditor.rejectedForMessage and -manager.Status assertions) into that subtest, call t.Parallel() inside the -subtest (not just at the top-level), and keep existing cleanup/Shutdown logic -intact so behavior of NewManager, Send, JoinChannel, waitForCondition and -manager.Status is unchanged. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `TestManagerAuditsBusyQueueOverflowAsRejected` runs directly in the top-level body while surrounding tests use `Should...` scenario wrappers. - - The fix is to move the existing setup, send calls, audit assertions, and shutdown cleanup into a `Should...` subtest with `t.Parallel()`. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_013.md b/.compozy/tasks/release-adjustments/reviews-001/issue_013.md deleted file mode 100644 index c444b8080..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_013.md +++ /dev/null @@ -1,48 +0,0 @@ ---- -status: resolved -file: internal/session/manager.go -line: 341 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59dk1o,comment:PRRC_kwDOR5y4QM67HMW2 ---- - -# Issue 013: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Wrap supervision validation failures with constructor context.** - -`applyRuntimeDefaults()` returns `m.supervision.Validate()` verbatim, so boot failures lose the `session:` context that the rest of `NewManager()` adds. Wrap that error before returning it. - -As per coding guidelines, "Use explicit error returns with wrapped context: `fmt.Errorf("context: %w", err)`". - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/session/manager.go` around lines 338 - 341, applyRuntimeDefaults() -currently returns m.supervision.Validate() directly, which loses the "session:" -context from NewManager on failures; change applyRuntimeDefaults so after -setting defaults it captures the error from m.supervision.Validate() and returns -a wrapped error (e.g. fmt.Errorf("session: %w", err)) so validation failures -include the session constructor context; reference m.supervision, -aghconfig.SessionSupervisionConfig{}, -aghconfig.DefaultSessionSupervisionConfig(), applyRuntimeDefaults and Validate() -when making the change. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `applyRuntimeDefaults` returns `m.supervision.Validate()` directly, so invalid supervision configuration lacks the `session:` constructor context. - - The fix is to wrap validation errors with `fmt.Errorf("session: %w", err)` and add constructor error coverage. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_014.md b/.compozy/tasks/release-adjustments/reviews-001/issue_014.md deleted file mode 100644 index 956fe7ba2..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_014.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -status: resolved -file: internal/session/prompt_activity.go -line: 50 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4172207861,nitpick_hash:b43c36abf652 -review_hash: b43c36abf652 -source_review_id: "4172207861" -source_review_submitted_at: "2026-04-24T17:07:23Z" ---- - -# Issue 014: Reconsider context.Background() fallback. -## Review Comment - -The coding guidelines specify avoiding `context.Background()` outside `main` and focused tests. While this is defensive, consider requiring a non-nil context from callers instead. - -As per coding guidelines, "Include `context.Context` as first argument to functions crossing runtime boundaries — avoid `context.Background()` outside `main` and focused tests". - -## Triage - -- Decision: `VALID` -- Notes: - - `newPromptActivitySupervisor` defensively replaces a nil context with `context.Background()`, even though the public prompt path already rejects nil contexts before calling it. - - The fix is to remove the fallback and rely on the validated caller context instead of hiding a bad internal call. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_015.md b/.compozy/tasks/release-adjustments/reviews-001/issue_015.md deleted file mode 100644 index 816bbfe34..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_015.md +++ /dev/null @@ -1,49 +0,0 @@ ---- -status: resolved -file: internal/store/globaldb/global_db_session.go -line: 642 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59dk1r,comment:PRRC_kwDOR5y4QM67HMW6 ---- - -# Issue 015: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Don’t silently clear `activity_json` on serialization failure.** - -Returning `""` here makes the write path look successful while dropping the activity payload. Bubble this up as an error from `RegisterSession` / `UpdateSessionState` instead of coercing it to an empty column. - - -As per coding guidelines, `**/*.go`: Use explicit error returns with wrapped context: `fmt.Errorf("context: %w", err)`. - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@internal/store/globaldb/global_db_session.go` around lines 634 - 642, The -helper sessionLivenessActivityJSON must stop swallowing JSON marshal errors; -change its signature to return (string, error), have it return wrapped errors -using fmt.Errorf("session liveness activity marshal: %w", err) when json.Marshal -fails (keep the nil checks and CloneSessionActivityMeta), and update all callers -such as RegisterSession and UpdateSessionState to propagate and handle the error -(returning it up the write path so the caller can fail instead of writing an -empty activity_json). Ensure all propagated errors are wrapped with contextual -messages per guidelines. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `sessionLivenessActivityJSON` currently returns an empty string on JSON marshal failure, making the write path look successful while dropping runtime activity. - - The fix is to return `(string, error)`, propagate wrapped errors through `RegisterSession` and `UpdateSessionState`, and add failing-path tests using an unmarshalable `time.Time`. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_016.md b/.compozy/tasks/release-adjustments/reviews-001/issue_016.md deleted file mode 100644 index 40b6ef82c..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_016.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -status: resolved -file: internal/testutil/e2e/config_seed_test.go -line: 85 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4172207861,nitpick_hash:61160659c827 -review_hash: 61160659c827 -source_review_id: "4172207861" -source_review_submitted_at: "2026-04-24T17:07:23Z" ---- - -# Issue 016: Please convert this new test to t.Run("Should...") style. -## Review Comment - -Coverage is useful; only the test-case structure is out of line with the suite’s required pattern. - -As per coding guidelines "Use table-driven tests with subtests (`t.Run`) as default pattern for Go tests" and "MUST use t.Run(\"Should...\") pattern for ALL test cases". - -## Triage - -- Decision: `VALID` -- Notes: - - `TestSeedConfigPersistsSessionSupervisionOverlay` runs the scenario directly in the top-level test body. - - The fix is to wrap the scenario in a `Should...` subtest and keep the independent subtest parallel. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_017.md b/.compozy/tasks/release-adjustments/reviews-001/issue_017.md deleted file mode 100644 index 72ac02ec2..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_017.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -status: resolved -file: internal/testutil/e2e/runtime_harness.go -line: 419 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4172207861,nitpick_hash:9a33cc5c3935 -review_hash: 9a33cc5c3935 -source_review_id: "4172207861" -source_review_submitted_at: "2026-04-24T17:07:23Z" ---- - -# Issue 017: Consider consolidating the "address already in use" detection. -## Review Comment - -The HTTP port retry check uses `"address already in use"` which could match both TCP and UDS errors on some systems. The UDS check is more specific with `"listen unix"` + `"bind: file exists"`. This works but the overlap could cause both flags to be true for certain edge cases. - -## Triage - -- Decision: `VALID` -- Notes: - - `readinessFailureRetryReasons` treats any process log containing `address already in use` as an HTTP port conflict, even when the log could describe a Unix socket bind failure. - - The fix is to split HTTP and UDS conflict detection into protocol-specific predicates so TCP and Unix listener conflicts cannot both be inferred from the same generic text. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_018.md b/.compozy/tasks/release-adjustments/reviews-001/issue_018.md deleted file mode 100644 index b2a46e282..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_018.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -status: resolved -file: web/e2e/automation.spec.ts -line: 38 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4172207861,nitpick_hash:1558f2a4e64b -review_hash: 1558f2a4e64b -source_review_id: "4172207861" -source_review_submitted_at: "2026-04-24T17:07:23Z" ---- - -# Issue 018: Align the test name with current behavior. -## Review Comment - -The scenario no longer edits/saves automation; it validates prefilled form data and closes. Rename the test (or restore edit-save assertions) to keep intent explicit. - -Also applies to: 93-98 - -## Triage - -- Decision: `VALID` -- Notes: - - The Playwright scenario opens the edit form to inspect prefilled values, closes it, and then triggers a run; it no longer edits and saves automation. - - The fix is to rename the test so the name matches the actual inspect-and-trigger behavior. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_019.md b/.compozy/tasks/release-adjustments/reviews-001/issue_019.md deleted file mode 100644 index 8b411ff89..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_019.md +++ /dev/null @@ -1,71 +0,0 @@ ---- -status: resolved -file: web/e2e/automation.spec.ts -line: 68 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59dk1s,comment:PRRC_kwDOR5y4QM67HMW8 ---- - -# Issue 019: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -**Scope the completion assertion to the seeded run row.** - -Line 68 currently validates completion at history-panel level, so it can pass for the wrong run. - - -
-Suggested fix - -```diff -- await expect(automationUI.runHistory).toContainText(/completed/i); -+ await expect(automationUI.run(seeded.baselineRun.id)).toContainText(/completed/i); -``` -
- - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion - await expect(automationUI.run(seeded.baselineRun.id)).toContainText(/completed/i); -``` - -
- - - -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/e2e/automation.spec.ts` at line 68, The assertion currently checks -automationUI.runHistory at the panel level and can match the wrong run; instead -scope the check to the specific seeded run row by locating the seeded row -element (e.g., a helper or selector like automationUI.getSeededRunRow(seedId) or -find the row by the seeded run name/id) and assert that that single row contains -/completed/i (replace the top-level automationUI.runHistory reference with the -scoped seeded-run element such as automationUI.getSeededRunRow(seedId) and call -toContainText(/completed/i) on it). -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - The current completion assertion checks the entire run history panel, so a different completed run could satisfy it. - - The fix is to assert `/completed/i` against `automationUI.run(seeded.baselineRun.id)` after confirming the seeded row is visible. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_020.md b/.compozy/tasks/release-adjustments/reviews-001/issue_020.md deleted file mode 100644 index afb626986..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_020.md +++ /dev/null @@ -1,55 +0,0 @@ ---- -status: resolved -file: web/e2e/automation.spec.ts -line: 91 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59dk1x,comment:PRRC_kwDOR5y4QM67HMXC ---- - -# Issue 020: _⚠️ Potential issue_ | _🟠 Major_ -## Review Comment - -_⚠️ Potential issue_ | _🟠 Major_ - -**Avoid `force: true` here; it can mask broken edit UX.** - -Line 91 bypasses actionability checks, so this test may pass when the user cannot actually click Edit. - - -
-Suggested fix - -```diff -- await automationUI.editAutomationButton.click({ force: true }); -+ await expect(automationUI.editAutomationButton).toBeEnabled(); -+ await automationUI.editAutomationButton.click(); -``` -
- -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/e2e/automation.spec.ts` at line 91, Remove the use of force:true on -automationUI.editAutomationButton.click so the test doesn't bypass actionability -checks; instead wait/assert the button is actionable (e.g., -automationUI.editAutomationButton.should('be.visible').and('not.be.disabled') or -equivalent) and then call automationUI.editAutomationButton.click() normally, -ensuring any overlays/modals are closed before clicking and keeping the selector -automationUI.editAutomationButton as the target. -``` - -
- - - - - -## Triage - -- Decision: `VALID` -- Notes: - - `automationUI.editAutomationButton.click({ force: true })` bypasses Playwright actionability checks. - - The fix is to assert the edit button is enabled and click it normally. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_021.md b/.compozy/tasks/release-adjustments/reviews-001/issue_021.md deleted file mode 100644 index 9c4c9a1c4..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_021.md +++ /dev/null @@ -1,340 +0,0 @@ ---- -status: resolved -file: web/e2e/tasks.spec.ts -line: 158 -author: coderabbitai[bot] -provider_ref: thread:PRRT_kwDOR5y4QM59dk1z,comment:PRRC_kwDOR5y4QM67HMXE ---- - -# Issue 021: _⚠️ Potential issue_ | _🟡 Minor_ -## Review Comment - -_⚠️ Potential issue_ | _🟡 Minor_ - -
-🧩 Analysis chain - -🏁 Script executed: - -```shell -#!/bin/bash -set -euo pipefail - -# Verify whether fixture IDs contain regex metacharacters that could affect dynamic RegExp assertions. -fixture="$(fd -a 'browser_session_lifecycle_fixture.json' | head -n1)" -if [[ -z "${fixture:-}" ]]; then - echo "browser_session_lifecycle_fixture.json not found" - exit 1 -fi - -python - "$fixture" <<'PY' -import json, re, sys - -path = sys.argv[1] -with open(path, "r", encoding="utf-8") as f: - data = json.load(f) - -regex_meta = re.compile(r'[.^$*+?()[\]{}|\\]') -ids = [] - -def walk(node): - if isinstance(node, dict): - for k, v in node.items(): - if k == "id" and isinstance(v, str): - ids.append(v) - walk(v) - elif isinstance(node, list): - for item in node: - walk(item) - -walk(data) - -print(f"Found {len(ids)} ids in {path}") -for value in ids: - tag = "META_CHAR_PRESENT" if regex_meta.search(value) else "ok" - print(f"{tag}\t{value}") -PY -``` - -Repository: compozy/agh - -Length of output: 163 - ---- - -🏁 Script executed: - -```shell -cat -n web/e2e/tasks.spec.ts | head -n 170 | tail -n 30 -``` - -Repository: compozy/agh - -Length of output: 1719 - ---- - -🏁 Script executed: - -```shell -# Search for where 'seeded' is defined or populated in the test file -rg -n "seeded\s*=" web/e2e/tasks.spec.ts | head -n 20 -``` - -Repository: compozy/agh - -Length of output: 121 - ---- - -🏁 Script executed: - -```shell -# Find fixture files that might be used in this test -fd -e json web/e2e/fixtures | head -n 20 -``` - -Repository: compozy/agh - -Length of output: 425 - ---- - -🏁 Script executed: - -```shell -# Find where seedBrowserTasksOperatorFlow is defined -rg -n "seedBrowserTasksOperatorFlow" web/e2e --type ts -``` - -Repository: compozy/agh - -Length of output: 556 - ---- - -🏁 Script executed: - -```shell -# Search for fixture files -fd . web/e2e/fixtures -``` - -Repository: compozy/agh - -Length of output: 473 - ---- - -🏁 Script executed: - -```shell -# Check the actual fixture data to see ID formats -find web/e2e -name "*.json" -type f -``` - -Repository: compozy/agh - -Length of output: 37 - ---- - -🏁 Script executed: - -```shell -# Examine the seedBrowserTasksOperatorFlow function -sed -n '735,850p' web/e2e/fixtures/runtime-seed.ts -``` - -Repository: compozy/agh - -Length of output: 3804 - ---- - -🏁 Script executed: - -```shell -# Check what types of IDs are being generated - look for ID creation patterns -rg -n "id\s*:" web/e2e/fixtures/runtime-seed.ts | head -n 30 -``` - -Repository: compozy/agh - -Length of output: 1559 - ---- - -🏁 Script executed: - -```shell -# Look for task and run creation in the seeding function -rg -B5 -A10 "runningTask\|runningRun" web/e2e/fixtures/runtime-seed.ts | head -n 80 -``` - -Repository: compozy/agh - -Length of output: 37 - ---- - -🏁 Script executed: - -```shell -# Check the createBrowserTask function to see ID format -rg -B5 -A15 "async function createBrowserTask" web/e2e/fixtures/runtime-seed.ts -``` - -Repository: compozy/agh - -Length of output: 632 - ---- - -🏁 Script executed: - -```shell -# Look for task creation endpoint and response type -rg -B3 -A3 "TaskPayload\|Task\b" web/e2e/fixtures/runtime-seed.ts | head -n 50 -``` - -Repository: compozy/agh - -Length of output: 37 - ---- - -🏁 Script executed: - -```shell -# Check what TaskRun type looks like - find its definition -rg -n "interface TaskRun\|type TaskRun" --type ts | head -n 5 -``` - -Repository: compozy/agh - -Length of output: 37 - ---- - -🏁 Script executed: - -```shell -# Find TaskRecord import in runtime-seed.ts -rg "import.*Task" web/e2e/fixtures/runtime-seed.ts | head -n 20 -``` - -Repository: compozy/agh - -Length of output: 37 - ---- - -🏁 Script executed: - -```shell -# Search for TaskRecord type definition across the codebase -fd . --type f \( -name "*.ts" -o -name "*.tsx" \) | xargs rg "interface TaskRecord|type TaskRecord" | head -n 5 -``` - -Repository: compozy/agh - -Length of output: 446 - ---- - -🏁 Script executed: - -```shell -# Check Playwright documentation pattern - look for other uses of toHaveURL with RegExp -rg "toHaveURL\(" web/e2e --type ts | head -n 10 -``` - -Repository: compozy/agh - -Length of output: 733 - ---- - - - -
- -**Make URL assertion consistent with line 163—use `expect.poll()` for pathname comparison instead of dynamic RegExp.** - -Line 158 constructs a dynamic RegExp from variable content, which is brittle if IDs ever contain regex metacharacters (`.`, `+`, `*`, etc.). Line 163 in the same test already demonstrates the safer pattern. Apply the same approach here for consistency and robustness. - -```diff -- await expect(appPage).toHaveURL(new RegExp(`${activeRunPath}$`)); -+ await expect.poll(() => new URL(appPage.url()).pathname).toBe(activeRunPath); -``` - - - -
-📝 Committable suggestion - -> ‼️ **IMPORTANT** -> Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements. - -```suggestion - const activeRunPath = `/tasks/${seeded.runningTask.id}/runs/${seeded.runningRun.id}`; - const activeRunLink = tasksUI.dashboardActiveRunLink(seeded.runningRun.id); - await expect(activeRunLink).toBeVisible(); - await expect(activeRunLink).toHaveAttribute("href", activeRunPath); - await appPage.goto(runtime.url(activeRunPath), { - waitUntil: "domcontentloaded", - }); - await expect(tasksUI.runDetailContent).toBeVisible(); - await expect.poll(() => new URL(appPage.url()).pathname).toBe(activeRunPath); -``` - -
- - - -
-🧰 Tools - -
-🪛 ast-grep (0.42.1) - -[warning] 157-157: Regular expression constructed from variable input detected. This can lead to Regular Expression Denial of Service (ReDoS) attacks if the variable contains malicious patterns. Use libraries like 'recheck' to validate regex safety or use static patterns. -Context: new RegExp(`${activeRunPath}$`) -Note: [CWE-1333] Inefficient Regular Expression Complexity [REFERENCES] - - https://owasp.org/www-community/attacks/Regular_expression_Denial_of_Service_-_ReDoS - - https://cwe.mitre.org/data/definitions/1333.html - -(regexp-from-variable) - -
- -
- -
-🤖 Prompt for AI Agents - -``` -Verify each finding against the current code and only fix it if needed. - -In `@web/e2e/tasks.spec.ts` around lines 150 - 158, The URL assertion using a -dynamic RegExp is brittle; replace the final expect(appPage).toHaveURL(new -RegExp(`${activeRunPath}$`)) with the same pathname-poll pattern used on line -163: use expect.poll to repeatedly read appPage.url(), parse the URL's pathname, -and assert it equals activeRunPath so the test compares pathnames (referencing -activeRunPath, appPage, and tasksUI.runDetailContent for locating the -assertion). -``` - -
- - - - - -## Triage - -- Decision: `valid` -- Root cause: `web/e2e/tasks.spec.ts` builds `new RegExp(`${activeRunPath}$`)` from a URL path containing runtime-generated task/run IDs. Those IDs are opaque values, so regex metacharacters in an ID would change the assertion semantics instead of matching the literal route. -- Fix approach: compare the current page `pathname` to the literal `activeRunPath` value instead of turning the path into a regex. -- Resolution: replaced the dynamic URL regex assertion with the same literal pathname polling pattern used later in the spec. Targeted Tasks E2E and full `make verify` passed after the code change. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_022.md b/.compozy/tasks/release-adjustments/reviews-001/issue_022.md deleted file mode 100644 index 7161a0639..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_022.md +++ /dev/null @@ -1,23 +0,0 @@ ---- -status: resolved -file: web/src/systems/bridges/components/bridge-detail-panel.test.tsx -line: 73 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4172207861,nitpick_hash:02027608129f -review_hash: 02027608129f -source_review_id: "4172207861" -source_review_submitted_at: "2026-04-24T17:07:23Z" ---- - -# Issue 022: Consider unique defaults in makeRoute to avoid duplicate-key noise in tests. -## Review Comment - -If multiple default routes are generated in one test, repeated `session_id`/`routing_key_hash` can create duplicate row keys and flaky assertions. - -## Triage - -- Decision: `valid` -- Root cause: `makeRoute` returns the same default `session_id` and `routing_key_hash` for every route. `BridgeEventStreamSection` keys rows as `${route.session_id}:${route.routing_key_hash}`, so tests that render multiple default routes can emit duplicate React keys and make row assertions noisy or flaky. -- Fix approach: make the route helper defaults deterministic but unique per call, while preserving explicit overrides for tests that need specific IDs. -- Resolution: `makeRoute` now generates deterministic unique default route IDs and hashes, and the component test asserts multiple default routes have distinct row identities. Targeted bridge tests and full `make verify` passed after the code change. diff --git a/.compozy/tasks/release-adjustments/reviews-001/issue_023.md b/.compozy/tasks/release-adjustments/reviews-001/issue_023.md deleted file mode 100644 index 5dbeca1bd..000000000 --- a/.compozy/tasks/release-adjustments/reviews-001/issue_023.md +++ /dev/null @@ -1,25 +0,0 @@ ---- -status: resolved -file: web/src/systems/bridges/components/bridge-detail-panel.tsx -line: 375 -severity: nitpick -author: coderabbitai[bot] -provider_ref: review:4172207861,nitpick_hash:3281cce86b36 -review_hash: 3281cce86b36 -source_review_id: "4172207861" -source_review_submitted_at: "2026-04-24T17:07:23Z" ---- - -# Issue 023: Align the session metadata line with the metadata type rule. -## Review Comment - -The new `session` line is metadata; style the label with uppercase + tracking to match the design system. - -As per coding guidelines, `web/src/**/*.{tsx,css}` requires **JetBrains Mono for metadata with uppercase and tracking 0.06em+**. - -## Triage - -- Decision: `valid` -- Root cause: the route session metadata line renders a lowercase `session` label with no tracking. AGH metadata labels must use JetBrains Mono, uppercase text, and letter spacing of at least `0.06em`. -- Fix approach: split the metadata label from the session ID, style only the label as uppercase tracked mono text, and preserve the session ID casing exactly. -- Resolution: the route metadata line now renders a tracked uppercase `SESSION` label while keeping the session ID unchanged. The bridge component test asserts the label styling, and full `make verify` passed after the code change. diff --git a/.compozy/tasks/tools-registry/_tasks.md b/.compozy/tasks/tools-registry/_tasks.md new file mode 100644 index 000000000..024909160 --- /dev/null +++ b/.compozy/tasks/tools-registry/_tasks.md @@ -0,0 +1,30 @@ +# Tool Registry - Task List + +**GREENFIELD (alpha):** implement the final executable tool-registry model directly. Do not add compatibility aliases, descriptor-only backends, dotted tool IDs, or fallback execution paths for old state. + +Source artifacts: `_techspec.md`, ADR-001 through ADR-010, `analysis/synthesis.md`, and the approved task decomposition from 2026-04-28. + +## Tasks + +| # | Title | Status | Complexity | Dependencies | +|---|-------|--------|------------|--------------| +| 01 | Core Tool Contracts and Canonical ToolID | pending | high | - | +| 02 | Tools Config Lifecycle and Agent Grammar | pending | high | task_01 | +| 03 | Registry Indexing, Toolsets, Policy, and Projections | pending | critical | task_01, task_02 | +| 04 | Dispatch Pipeline, Hooks, Budgets, and Observability | pending | critical | task_03 | +| 05 | Native Go Built-In Providers | pending | high | task_04 | +| 06 | Extension Manifest Tool Metadata and Reconciliation | pending | high | task_03 | +| 07 | Extension Runtime Protocol and TypeScript SDK Tools | pending | critical | task_04, task_06 | +| 08 | Public Go Extension SDK | pending | critical | task_07 | +| 09 | Daemon-Owned MCP Call-Through and Auth Diagnostics | pending | critical | task_03, task_04 | +| 10 | Hosted AGH MCP Session Exposure and Approval Bridge | pending | critical | task_05, task_09 | +| 11 | API Contracts, HTTP/UDS Routes, and Codegen | pending | critical | task_05, task_07, task_09, task_10 | +| 12 | CLI Operator Commands | pending | high | task_11 | +| 13 | Web Operator Tool Diagnostics Surface | pending | high | task_11, task_12 | +| 14 | Site Documentation and Generated References | pending | high | task_13 | +| 15 | QA Plan and Test Coverage | pending | high | task_14 | +| 16 | Real-Scenario QA Execution | pending | critical | task_15 | + +## MVP Boundary + +Tasks 01-14 implement the Tool Registry MVP. Tasks 15-16 prepare and execute release-grade QA. diff --git a/.compozy/tasks/tools-registry/_techspec.md b/.compozy/tasks/tools-registry/_techspec.md new file mode 100644 index 000000000..8d632d8a4 --- /dev/null +++ b/.compozy/tasks/tools-registry/_techspec.md @@ -0,0 +1,1361 @@ +# Tool Registry Foundation TechSpec + +## Executive Summary + +This TechSpec designs AGH's Tool Registry as a daemon-owned runtime service, not as a static list of built-in commands. The registry will unify tool identity, discovery, availability, policy, execution, hooks, telemetry, extension descriptors, MCP adapters, and session-visible exposure through one central dispatch pipeline. + +There is no existing `_prd.md` for this task. The scope is based on the user request, competitor research under `.compozy/tasks/tools-registry/analysis/`, accepted ADRs under `.compozy/tasks/tools-registry/adrs/`, current AGH code exploration, and the prior autonomous skills/tools registry gap analysis. + +The primary architectural trade-off is to make AGH-native tools visible to sessions through an AGH-hosted local MCP server in the MVP instead of trying to invent an ACP-specific registry. ACP does not define a callable tool registry; it defines session lifecycle, MCP bootstrap, permission callbacks, and tool-call observations. MCP provides the programmatic `Tool.name`, so AGH will expose its canonical `ToolID` directly as the hosted MCP tool name. + +The foundation will support executable native/bundled tools, executable extension-host tools, and executable MCP-backed tools. Built-in AGH tools execute in-process through `native_go` handles compiled into the daemon. Third-party TypeScript and Go extension tools execute out-of-process through the existing extension runtime, a new `tool.provider` capability, `provide_tools` reconciliation, and `tools/call` RPC. MCP-backed tools execute through daemon-owned MCP clients that consume the existing MCP config/auth subsystem. Descriptor-only is an unavailable/error state, not the MVP contract for extension or MCP tools. + +## MVP Boundary Statement + +MVP boundary: implementation steps 1-16 build the Tool Registry foundation, AGH-hosted MCP session exposure, native bootstrap tools, executable TypeScript/Go extension-host tools, executable daemon-owned MCP call-through, shared CLI/HTTP/UDS surfaces, policy/availability enforcement, hooks, observability, docs, SDK updates, and verification. This MVP proves the registry as an executable daemon primitive without replacing every ACP provider-native tool. + +Post-MVP work deferred to later TechSpecs: + +- direct driver-specific tool injection outside hosted MCP; +- full shell/browser/file tool replacement for ACP runtimes; +- remote peer tool execution over AGH Network; +- provider-specific deferred schema loading such as Anthropic `tool_reference`; +- broad marketplace signing/trust overhaul; +- skill install/remove/update tools; +- bridge SDK executable tool adapters; +- direct in-process plugin loading for third-party Go or TypeScript code; +- client-supplied ACP `mcpServers` as session-scoped registry sources. + +Explicitly out of scope for this TechSpec: + +- in-process third-party extension handlers; +- silent compatibility aliases for dotted tool IDs; +- policy bypasses for `approve-all`; +- partial surface delivery where CLI/HTTP ships without UDS, docs, codegen, and hosted MCP parity; +- storing matchable ownership or authorization state inside opaque JSON metadata blobs. + +Backend delivery boundary: + +| Backend kind | MVP delivery | Invocation behavior | +|---|---|---| +| `native_go` | Descriptor, availability, policy, and full dispatch through `Registry.Call` | Executable in-process only for daemon-compiled AGH built-ins | +| `extension_host` | Manifest-authoritative descriptor, runtime reconciliation, extension health, source policy, and full dispatch through `Registry.Call` | Executable out-of-process through existing extension subprocess runtime, `tool.provider`, and `tools/call`; TypeScript and Go SDKs wrap handlers as functions | +| `mcp` | Descriptor discovery, source provenance, health/auth diagnostics, collision handling, source policy, and full dispatch through `Registry.Call` | Executable through daemon-owned MCP clients using existing MCP config and `internal/mcp/auth` status/token interfaces | +| `subprocess` | Not a public registry backend kind in MVP | Rejected by manifest validation; third-party Go/TypeScript tools use `extension_host`, whose implementation is subprocess-isolated | +| `bridge` | Reserved post-MVP backend kind | Rejected by MVP validation unless a later bridge TechSpec enables it | + +AGH-hosted MCP is different from `mcp` backend tools. Hosted MCP is the session exposure transport for AGH registry tools; `mcp` backend tools are external tools contributed by MCP servers and are executable only through daemon-owned MCP client adapters after the same registry policy, source, approval, hook, and session-lineage gates pass. + +## Architectural Boundaries + +`internal/daemon` remains the only composition root. It may import and compose `internal/tools`, `internal/skills`, `internal/mcp`, `internal/extension`, `internal/hooks`, `internal/session`, `internal/network`, `internal/task`, `internal/api/*`, and config/resource stores. No package may import `internal/daemon`. + +Package import boundaries: + +- `internal/tools` owns `ToolID`, descriptors, backend kinds, providers, handles, registry, policy interfaces, availability, dispatch contracts, and result normalization. It must not import `internal/daemon`, `internal/api/*`, `internal/cli`, `internal/extension`, `internal/session`, `internal/network`, or `internal/task`. +- `internal/catalog`, if added, is a thin composition-facing facade over `internal/tools` and `internal/skills`. It must not own tool dispatch or policy. +- `internal/extension` may publish manifest-authoritative tool descriptors and expose live out-of-process extension tool invokers through public registry contracts. It must not execute third-party tool handlers in-process and must not import registry internals beyond public `internal/tools` descriptor/provider contracts. +- `internal/mcp` may adapt external MCP tools, call external MCP servers through daemon-owned clients, and host the AGH MCP stdio proxy. All AGH-owned calls must enter `internal/tools.Registry.Call` through UDS or an injected interface; MCP code must not duplicate dispatch policy. +- `internal/mcp/auth` already owns remote MCP OAuth 2.1 + PKCE, redacted status, token refresh/logout, and durable token storage through `internal/store/globaldb`. The Tool Registry may consume redacted auth status through a daemon-injected interface, but it must not reimplement OAuth flows, open the MCP auth token store directly, or persist remote MCP token material. +- `internal/api/core` owns transport-independent handlers. `internal/api/httpapi` and `internal/api/udsapi` only register routes and transport concerns. +- `internal/cli` calls UDS/HTTP client methods and does not import runtime registry implementations. +- `internal/hooks` owns typed hook payloads and execution. Hooks dispatch at the registry call site; no code may tail event tables to trigger tool hooks. +- `internal/store` may persist session lineage permission atoms and events. It must not decide tool policy; it validates and stores normalized atoms. +- `internal/session` can receive hosted MCP server config and session projections through interfaces. It must not implement a parallel tool registry. + +Boundaries to update in implementation: + +- If `internal/catalog` or a new `internal/mcp` subpackage is added, update `magefile.go` package boundary checks in the same change. +- Any OpenAPI/contract change must co-ship generated `openapi/agh.json` and `web/src/generated/agh-openapi.d.ts`. +- Any CLI surface must co-ship HTTP/UDS parity unless the spec explicitly marks the surface CLI-only. This TechSpec has no CLI-only tool surfaces. + +## System Architecture + +### Component Overview + +| Component | Responsibility | Boundary | +|---|---|---| +| `internal/tools` runtime registry | Owns canonical `ToolID`, descriptors, backend kinds, providers, handles, availability projection, policy evaluation, dispatch, result normalization, and telemetry hooks | Does not import `daemon`, `api`, or `cli`; composed by `internal/daemon` | +| Cold `tool` resources | Persist desired-state tool metadata from extensions, bundles, and future dynamic producers | Metadata only; no function pointers or execution handles | +| Built-in `native_go` provider | Registers AGH-native tools such as tool search, skill view, network peers/send, and bounded task tools | In-process only because it ships inside the daemon binary | +| Extension-host provider | Converts extension-published tool resources into runtime descriptors, reconciles them with `provide_tools`, and invokes handlers over extension subprocess RPC | Does not execute extension code in-process; fails closed on manifest/runtime mismatch | +| Extension SDKs | TypeScript and Go helper APIs let extension authors define tools with functions while the runtime remains out-of-process | SDKs generate/reconcile manifest descriptors and register `tools/call` handlers | +| MCP adapter provider | Normalizes MCP-backed descriptors, health, auth status, source provenance, and executes calls through daemon-owned MCP clients | Fails closed on health, auth, policy, approval, hook, or name collision problems | +| Existing MCP auth runtime | Supplies redacted remote MCP OAuth 2.1 + PKCE status for external MCP diagnostics | Owned by `internal/mcp/auth`; registry must not duplicate OAuth flow, token storage, or token refresh/logout | +| `internal/catalog` facade | Thin cross-domain list/search/view facade over tools and skills | Optional coordination layer; runtime tool dispatch remains in `internal/tools` | +| Policy engine | Combines ACP approval mode, session lineage, agent policy, source/risk defaults, registry allow/deny, toolsets, availability, and hooks | Produces structured effective decisions, never a single ambiguous boolean | +| AGH-hosted MCP proxy | Exposes session-callable AGH tools as MCP tools using canonical `ToolID` names | Runs through daemon-provided `agh tool mcp --session --bind-nonce ` and proxies to daemon over UDS | +| API/CLI surfaces | Expose machine-readable list/search/info/invoke/status behavior | Shared contracts in `internal/api/contract`, handlers in `internal/api/core`, HTTP/UDS parity | +| Hook integration | Runs `tool.pre_call`, `tool.post_call`, and `tool.post_error` around registry dispatch | Hooks can deny, narrow, patch, redact, or annotate, but cannot bypass policy | +| Observability | Emits durable events and metrics for registration, projection, decisions, calls, failures, conflicts, truncation, and policy denials | Redacts secrets and raw tokens | + +Data flow: + +1. Extensions, built-ins, MCP servers, and future dynamic providers register cold descriptors and, where executable, runtime handles. +2. `internal/daemon` composes providers into `internal/tools.Registry`. +3. Registry indexes descriptors by canonical `ToolID`. +4. Operator surfaces can list all tools with status and reason codes. +5. Session/model-visible surfaces request a session projection and receive only callable tools for that effective context. +6. Every invocation enters `Registry.Call`, which validates schema, recomputes availability/policy, runs hooks, calls the `native_go`, `extension_host`, or `mcp` handle, normalizes output, persists/observes, and returns a bounded result. + +## Implementation Design + +### Core Interfaces + +These are final-shape interface contracts for implementation planning. Implementers may add helper types, but registry dispatch must preserve these method responsibilities and must not reintroduce metadata-only runtime providers. + +```go +type Registry interface { + List(ctx context.Context, scope Scope) ([]ToolView, error) + Search(ctx context.Context, scope Scope, q SearchQuery) ([]ToolView, error) + Get(ctx context.Context, scope Scope, id ToolID) (ToolView, error) + Call(ctx context.Context, scope Scope, req CallRequest) (ToolResult, error) +} + +type Handle interface { + Descriptor() Descriptor + Availability(ctx context.Context, scope Scope) Availability + Call(ctx context.Context, req CallRequest) (ToolResult, error) +} +``` + +Supporting contracts: + +```go +type BackendKind string + +const ( + BackendNativeGo BackendKind = "native_go" + BackendExtensionHost BackendKind = "extension_host" + BackendMCP BackendKind = "mcp" + BackendBridge BackendKind = "bridge" +) +``` + +```go +type Provider interface { + ID() SourceRef + List(ctx context.Context, scope Scope) ([]Descriptor, error) + Resolve(ctx context.Context, scope Scope, id ToolID) (Handle, bool, error) +} +``` + +```go +type NativeToolFunc func(ctx context.Context, scope Scope, req CallRequest) (ToolResult, error) +``` + +```go +type ExtensionToolInvoker interface { + ProvideTools(ctx context.Context, extensionID string) ([]ExtensionToolRuntimeDescriptor, error) + CallTool(ctx context.Context, extensionID string, req ExtensionToolCallRequest) (ToolResult, error) +} +``` + +```go +type MCPCallExecutor interface { + ListTools(ctx context.Context, source SourceRef) ([]MCPToolDescriptor, error) + CallTool(ctx context.Context, source SourceRef, req MCPToolCallRequest) (ToolResult, error) +} +``` + +`MCPCallExecutor` is implemented by `internal/mcp`, not by `internal/tools`. It resolves bearer material through `internal/mcp/auth` internally, never exposes `mcpauth.TokenRecord` or raw headers to registry code, and returns only normalized `ToolResult` plus wrapped backend errors. + +Extension protocol additions: + +```go +const ( + CapabilityToolProvider = "tool.provider" + + ExtensionServiceMethodProvideTools = "provide_tools" + ExtensionServiceMethodToolsCall = "tools/call" +) +``` + +```go +var capabilityServiceMethods = map[Capability][]ExtensionServiceMethod{ + CapabilityToolProvider: { + ExtensionServiceMethodProvideTools, + ExtensionServiceMethodToolsCall, + }, +} +``` + +Wire request/response contracts: + +```go +type ExtensionProvideToolsResponse struct { + Tools []ExtensionToolRuntimeDescriptor `json:"tools"` +} +``` + +```go +type ExtensionToolCallRequest struct { + ToolID ToolID `json:"tool_id"` + Handler string `json:"handler"` + SessionID string `json:"session_id"` + Input json.RawMessage `json:"input"` +} +``` + +```go +type ExtensionToolCallResponse struct { + Result ToolResult `json:"result"` +} +``` + +```go +type MCPToolCallRequest struct { + ToolID ToolID `json:"tool_id"` + RawToolName string `json:"raw_tool_name"` + Input json.RawMessage `json:"input"` +} +``` + +```go +type MCPToolCallResponse struct { + Result ToolResult `json:"result"` +} +``` + +Wire-stable fields are `tool_id`, `handler`, `raw_tool_name`, schema digests, risk flags, and JSON input/result envelopes. Runtime-only fields such as latency, retry count, process id, transport connection id, and raw auth/header material must stay out of wire structs and are added by daemon telemetry or redacted diagnostics only. + +```go +type MCPAuthStatus struct { + ServerName string + Status string + AuthType string + ClientID string + Scopes []string + ExpiresAt *time.Time + Refreshable bool + TokenPresent bool + Diagnostic string +} + +type MCPAuthStatusProvider interface { + Status(ctx context.Context, source SourceRef) (MCPAuthStatus, error) +} + +type PolicyEvaluator interface { + Evaluate(ctx context.Context, scope Scope, d Descriptor) (EffectiveToolDecision, error) +} +``` + +```go +type ResultLimiter interface { + Apply(ctx context.Context, d Descriptor, result ToolResult) (ToolResult, error) +} + +type HookRunner interface { + PreCall(ctx context.Context, call CallRequest) (CallRequest, EffectiveToolDecision, error) + PostCall(ctx context.Context, call CallRequest, result ToolResult) (ToolResult, error) + PostError(ctx context.Context, call CallRequest, err error) error +} +``` + +Error conventions: + +- `ErrToolNotFound` +- `ErrToolConflict` +- `ErrToolUnavailable` +- `ErrToolDenied` +- `ErrToolApprovalRequired` +- `ErrToolInvalidInput` +- `ErrToolResultTooLarge` +- `ErrToolBackendFailed` + +All production errors must wrap with `%w` where applicable and must map to deterministic API/CLI error codes. + +### Data Models + +`ToolID` + +- Canonical public ID used by registry, policy, CLI, HTTP, UDS, hooks, telemetry, and hosted MCP. +- Format: `( "__" )*` +- Segment regex: `[a-z][a-z0-9_]*` +- Max length: 64. +- Lowercase ASCII only. +- `__` is reserved as namespace separator. +- No dots, hyphens, uppercase, empty segments, or dual wire aliases. +- External MCP/extension names that sanitize to more than 64 characters are rejected as conflicted with `id_too_long`; AGH does not truncate, hash-suffix, or create shadow aliases. + +Examples: + +- `agh__tool_list` +- `agh__tool_search` +- `agh__tool_info` +- `agh__skill_list` +- `agh__skill_search` +- `agh__skill_view` +- `agh__network_peers` +- `agh__network_send` +- `agh__task_list` +- `agh__task_read` +- `mcp__github__create_issue` +- `ext__linear__search` + +`Descriptor` + +- `id ToolID` +- `backend BackendRef` +- `display_title string` +- `description string` +- `input_schema json.RawMessage` +- `output_schema json.RawMessage` +- `source SourceRef` +- `visibility Visibility` +- `risk RiskClass` +- `read_only bool` +- `destructive bool` +- `open_world bool` +- `requires_interaction bool` +- `concurrency_safe bool` +- `max_result_bytes int64` +- `toolsets []ToolsetID` +- `tags []string` +- `search_hints []string` + +`BackendRef` + +- `kind BackendKind` +- `extension_id string` +- `handler string` +- `mcp_server string` +- `mcp_tool string` +- `native_name string` +- `requires_capabilities []string` + +`ExtensionToolRuntimeDescriptor` + +- `id ToolID` +- `handler string` +- `input_schema_digest string` +- `output_schema_digest string` +- `read_only bool` +- `risk RiskClass` +- `capabilities []string` + +Schema digest contract: + +- `input_schema_digest` and `output_schema_digest` are lowercase hex `sha256` digests over the JSON Schema subtree only. +- The bytes hashed are RFC 8785 JCS-canonicalized JSON. Object keys are sorted by the canonicalization algorithm, number/string escaping follows JCS, and `$ref` values are hashed literally rather than resolved during digesting. +- The digest excludes surrounding manifest metadata such as `ToolID`, handler, source, risk, and toolsets. +- TypeScript SDK, Go SDK, and daemon manifest validation must share byte-vector fixtures under `sdk/typescript/test-fixtures/digest/`, `sdk/go/test-fixtures/digest/`, and `internal/extension/testdata/digest/`. +- A digest mismatch is a hard `extension_runtime_mismatch`. There is no loose fallback, serializer-specific fallback, or warning-only mode. + +`SourceRef` + +- `kind`: `builtin`, `mcp`, `extension`, `dynamic` +- `owner`: daemon, extension id, MCP server id, bundle id, skill id, or provider id +- `raw_server_name` +- `raw_tool_name` +- `resource_id` +- `resource_version` +- `workspace_id` +- `scope` + +`Availability` + +States are composable, not a single boolean: + +- `registered` +- `enabled` +- `available` +- `authorized` +- `executable` +- `conflicted` + +Reason codes include: + +- `dependency_missing` +- `backend_unhealthy` +- `backend_not_executable` +- `extension_inactive` +- `extension_runtime_mismatch` +- `extension_capability_missing` +- `mcp_unreachable` +- `mcp_auth_unconfigured` +- `mcp_auth_required` +- `mcp_auth_expired` +- `mcp_auth_invalid` +- `mcp_auth_refresh_failed` +- `source_disabled` +- `policy_denied` +- `approval_required` +- `approval_timed_out` +- `approval_canceled` +- `session_denied` +- `hook_denied` +- `schema_invalid` +- `conflicted_id` +- `conflicted_sanitized_name` +- `id_too_long` +- `result_budget_exceeded` + +MCP-backed descriptors may attach a redacted `MCPAuthStatus` to operator-visible tool views only. The status mirrors the existing `internal/mcp/auth.StatusValue` values (`unconfigured`, `needs_login`, `authenticated`, `expired`, `invalid`) plus registry reason codes, and may include `server_name`, `auth_type`, `client_id`, `scopes`, `expires_at`, `refreshable`, `token_present`, and `diagnostic`. It must never include access tokens, refresh tokens, OAuth authorization codes, PKCE verifiers, client secrets, approval tokens, or hosted MCP bind nonces. Session/model-visible projections do not include `MCPAuthStatus`; they hide or deny the tool through `Availability` reason codes. + +`EffectiveToolDecision` + +- `visible_to_operator bool` +- `visible_to_session bool` +- `callable bool` +- `approval_required bool` +- `system_permission_mode` +- `session_policy_result` +- `agent_policy_result` +- `registry_policy_result` +- `source_policy_result` +- `availability_result` +- `hook_result` +- `reason_codes []string` + +`ToolResult` + +- `content []ToolContent` +- `structured any` +- `preview string` +- `artifacts []ArtifactRef` +- `metadata map[string]any` +- `redactions []Redaction` +- `truncated bool` +- `bytes int64` +- `duration_ms int64` + +`Toolset` + +Toolsets are separate named resources/config entries, not overloaded as tools. They use a typed `ToolsetID`, include exact `ToolID`s and patterns, and may include other toolsets recursively. Policy fields must distinguish `tools` and `toolsets` to avoid ambiguity. + +`ToolsetID` uses the same grammar as `ToolID`: lowercase ASCII segments separated by reserved `__`, maximum 64 characters, and no dots, hyphens, uppercase, or empty segments. + +`dynamic` source kind is reserved because the existing source enum already has it, but MVP has no dynamic producer and no dynamic validation surface. Dynamic tools remain unavailable unless a later TechSpec defines their producer, policy, and provenance model. + +### Data-Model Field Rationale + +| Field or key | Shape | Purpose | Storage decision | +|---|---|---|---| +| `ToolID` | string, provider-safe `__`-segmented id | Single policy/dispatch/audit identity across registry, CLI, HTTP, UDS, hooks, telemetry, and hosted MCP | Typed field, never inferred from display title | +| `Descriptor.backend` | structured `BackendRef` | Binds the descriptor to the only allowed executable backend path | Typed struct; dispatch never infers backend from source prefix or metadata | +| `Descriptor.source` | structured `SourceRef` | Preserve raw external provenance without making prefixes the only source of truth | Typed struct; raw names are subfields, not alternate IDs | +| `Descriptor.input_schema` | JSON Schema object | Validate call input before dispatch | JSON schema payload is appropriate because schema content is externally structured and opaque to AGH queries | +| `Descriptor.output_schema` | JSON Schema object | Optional structured output contract | JSON schema payload is appropriate for the same reason as input schema | +| `Descriptor.visibility` | enum | Separate internal/operator/session/model surfaces | Typed enum, queryable and policy-relevant | +| `Descriptor.risk` | enum | Classify read/mutate/network/open-world behavior | Typed enum, never free-text metadata | +| `Descriptor.read_only` | bool | Drives `approve-reads` and safety checks | Typed bool; misclassification is a security bug | +| `Descriptor.destructive` | bool | Forces stricter policy and approval behavior | Typed bool, not metadata | +| `Descriptor.open_world` | bool | Marks tools that can reach arbitrary external state | Typed bool, not metadata | +| `Descriptor.requires_interaction` | bool | Distinguishes autonomous-safe calls from interactive calls | Typed bool, not metadata | +| `Descriptor.max_result_bytes` | int64 | Enforces result budget consistently across surfaces | Typed numeric field with config default | +| `Descriptor.toolsets` | `[]ToolsetID` | Supports recursive named bundles without overloading individual tools | Typed list; expanded to concrete `ToolID`s for lineage | +| `MCPAuthStatus` | redacted status object | Lets operator surfaces explain remote MCP login/expiry without exposing credentials | Derived from `internal/mcp/auth`; never persisted by the registry | +| `EffectiveToolDecision.reason_codes` | `[]string` enum values | Operator diagnostics and deterministic error contracts | Typed enum strings; no prose-only decisions | +| `[tools].enabled` | bool | Global registry execution switch | Config key with validation/defaults | +| `[tools].hosted_mcp_enabled` | bool | Allows session exposure through AGH-hosted MCP | Config key with validation/defaults | +| `[tools].default_max_result_bytes` | int64 | Default output cap when descriptor is silent | Config key with validation/defaults | +| `[tools.policy].external_default` | enum | Default executable policy for extension/MCP/dynamic tools | Config key, not hidden in metadata | +| `agent.tools` | `[]ToolID/pattern` | Allow concrete tools/patterns for an agent | Frontmatter/config field resolved to concrete lineage atoms | +| `agent.toolsets` | `[]ToolsetID` | Allow named bundles for an agent | Frontmatter/config field expanded before session lineage | +| `agent.deny_tools` | `[]ToolID/pattern` | Explicit narrowing layer for an agent | Frontmatter/config field, not runtime-only state | +| `extension.resources.tools.*.backend` | structured backend metadata | Declares the manifest-authoritative runtime binding for `extension_host` or `mcp` execution | Extension manifest fields, not in-process function pointers; daemon rejects mismatched runtime descriptors | +| `extension.resources.tools.*.handler` | string | Names the extension SDK handler used by `tools/call` | Manifest field validated against `provide_tools`; not executable by itself | +| `ExtensionToolRuntimeDescriptor.*_schema_digest` | lowercase hex SHA-256 | Reconciles live SDK handlers against manifest schemas across TypeScript, Go, and daemon validation | Runtime-only field computed from RFC 8785 canonical JSON fixtures | +| `ExtensionToolRuntimeDescriptor` | redacted runtime descriptor | Lets daemon confirm a running extension provides exactly the manifest-declared handler/schema/risk shape | Runtime-only reconciliation result; not persisted as source of truth | +| `MCPToolHandle` | daemon-owned client handle | Calls remote MCP tools without exposing remote credentials to descriptors or sessions | Runtime-only handle injected by `internal/mcp`; token material stays behind `internal/mcp/auth` | + +No new SQLite columns are required for the MVP registry foundation. Existing session lineage stores concrete tool permission atoms; implementation should validate those atoms as canonical `ToolID`s. Existing remote MCP OAuth tokens already live in `globaldb.mcp_auth_tokens` through `mcpauth.TokenStore`; registry work must not add token fields to tool descriptors, resources, events, or metadata. Hosted MCP bind nonces are ephemeral launch correlation values and must not be stored in `mcp_auth_tokens`; if a later design needs durable hosted-proxy credential state, it must add a separate typed table with its own lifecycle and redaction tests. If a later task needs durable queryable tool-call history beyond append-only events, it must add a typed side table such as `tool_calls` rather than placing queryable call state in a session metadata JSON blob. + +### Side-Table vs JSON Decisions + +| Domain state | Decision | Rationale | +|---|---|---| +| Tool descriptors | Typed resource specs plus runtime descriptors | Descriptors are matchable by id/source/risk/visibility and must not live as opaque metadata blobs | +| Toolsets | Typed config/resource records | Toolset membership affects policy and lineage; it must be queryable and expandable deterministically | +| Tool call events | Append-only event payloads for MVP; typed side table only if queryable history is required | Events are the operational ledger; indexed call history would be matchable state and must not be hidden in JSON metadata | +| Source provenance | Structured `SourceRef` fields | Raw MCP/extension names are needed for debugging and collision handling | +| Input/output schemas | JSON Schema blobs | Schema contents are inherently opaque external contracts and are not AGH ownership state | +| Tool result structured payload | JSON payload plus typed envelope fields | Result body can be arbitrary, but status, bytes, truncation, redaction, and tool id are typed envelope fields | +| Policy decisions | Typed `EffectiveToolDecision` | Authorization is matchable and auditable; it cannot be a JSON bag | +| Availability reasons | Typed reason-code list | Operator diagnostics and tests need deterministic matching | + +### API Endpoints + +All endpoints are implemented once in `internal/api/core` and registered by HTTP and UDS transports. + +| Method | Path | Description | +|---|---|---| +| `GET` | `/api/tools` | List operator-visible tools with availability/policy reason codes | +| `POST` | `/api/tools/search` | Search tools by id, title, description, source, tags, and toolsets | +| `GET` | `/api/tools/{id}` | Return descriptor, availability, policy view, schema, and source provenance | +| `POST` | `/api/tools/{id}/invoke` | Invoke a tool through registry dispatch | +| `GET` | `/api/sessions/{id}/tools` | Return session/model-visible callable projection | +| `POST` | `/api/sessions/{id}/tools/search` | Search only within effective session-callable projection | +| `GET` | `/api/toolsets` | List named toolsets and expansion status | +| `GET` | `/api/toolsets/{id}` | Inspect one toolset expansion and conflicts | + +Invoke request: + +```json +{ + "session_id": "sess_...", + "workspace_id": "ws_...", + "input": {}, + "approval_token": "optional-local-approval-reference" +} +``` + +`approval_token` is an opaque local approval reference issued by the daemon approval surface for CLI/HTTP/UDS calls. AGH stores only a hash, never logs or emits the raw value, redacts it from SSE/events/errors, scopes it to one tool decision, and treats it as separate from `claim_token`. Hosted MCP does not accept client-supplied `approval_token`; it uses the Hosted MCP Approval Bridge below. + +Invoke response: + +```json +{ + "tool_id": "agh__skill_view", + "status": "completed", + "result": {}, + "truncated": false, + "duration_ms": 23, + "events": [] +} +``` + +Status codes: + +- `200` completed/listed. +- `202` approval required or async dispatch accepted, only if the tool is explicitly async. +- `400` invalid `ToolID`, invalid schema input, or malformed request. +- `403` denied by ACP ceiling, session lineage, registry policy, source policy, or hook. +- `404` not found or hidden from caller context. +- `409` conflicted canonical ID or sanitized external name. +- `422` registered but unavailable or not executable. +- `500` internal daemon error. +- `502` backend adapter failure. + +CLI parity: + +- `agh tool list -o json` +- `agh tool search -o json` +- `agh tool info -o json` +- `agh tool invoke --input -o json` +- `agh toolsets list -o json` +- `agh toolsets info -o json` +- `agh tool mcp --session --bind-nonce ` for the daemon-spawned hosted MCP stdio proxy + +## Integration Points + +### ACP + +ACP does not impose a callable tool registry pattern. AGH must not use ACP `ToolCall.title` as a policy identity or dispatch key. + +AGH will integrate with ACP by: + +- passing the hosted AGH MCP server in ACP `mcpServers` during session creation/load when the selected agent supports MCP; +- preserving ACP tool-call observations as session events keyed by `toolCallId`; +- mapping AGH-owned tool calls back into ACP lifecycle updates where relevant; +- treating ACP `ToolKind` as risk/display metadata, not identity; +- keeping `permissions.mode` as the system/session approval ceiling. + +Current-state caveat: `internal/acp.toSDKMCPServers` currently emits stdio-only `acpsdk.McpServer` values. MVP registry work must keep hosted AGH MCP as a stdio-only injected server and must not imply remote MCP HTTP/SSE ACP parity until a later implementation adds tested HTTP/SSE conversion, redacted Authorization/header handling, and provider capability checks. + +### Hosted MCP + +MVP exposure path: + +```text +agent session -> ACP mcpServers -> agh tool mcp --session --bind-nonce -> UDS -> daemon Tool Registry +``` + +The hosted MCP server lists only session-callable tools. It exposes MCP `Tool.name` equal to AGH canonical `ToolID`. It does not expose unavailable, unauthorized, or conflicted tools to the model-visible surface. + +Hosted MCP authentication: + +- On session creation/load, the daemon records a short-lived hosted MCP launch record keyed by `session_id`, `workspace_id`, an opaque non-secret `hosted_mcp_bind_nonce`, expiry, and expected AGH binary path. +- The `hosted_mcp_bind_nonce` is a correlation nonce, not a bearer secret and not claim-token-equivalent. It may traverse ACP `mcpServers[].args` because it is insufficient without UDS peer credentials; raw `claim_token`, remote MCP OAuth tokens, and approval tokens never traverse this path. +- At startup, `agh tool mcp --session --bind-nonce ` performs a UDS bind RPC. The daemon accepts the bind only when the nonce matches a live launch record, the Unix-domain socket peer credentials identify the same OS user, the peer executable matches the expected AGH binary, and the record has not expired. +- If the platform cannot provide peer credentials or executable validation, hosted MCP binding fails closed and the session receives no hosted registry projection on that platform. +- The daemon binds the UDS connection to exactly one session/workspace projection and rejects any later client-supplied `session_id` or `workspace_id`. +- The launch record is invalidated on first successful bind, session end, proxy disconnect, or TTL expiry, whichever happens first. +- A foreign process calling `agh tool mcp --session ` without a valid nonce plus matching UDS peer credentials receives a deterministic permission error and no tool projection. +- Redaction tests must cover ACP payload diagnostics, process diagnostics, crash bundles, logs, SSE/events, settings output, tool results, and MCP responses. The nonce may appear in AGH-owned diagnostics as a redacted/correlatable launch id, but it must never be described as a claim-token-equivalent bearer secret. + +Hosted MCP approval bridge: + +- Hosted MCP projections include only tools that are callable without a new approval prompt or tools whose session has a live daemon-mediated approval channel. +- When `EffectiveToolDecision.approval_required=true` and ACP `session/request_permission` is available, `Registry.Call` derives a context with `[tools.policy].approval_timeout_seconds`, issues the ACP permission request, and blocks the MCP `tools/call` response until approved, denied, timed out, canceled, or the hosted MCP stdio/UDS connection closes. +- When no approval channel is available, hosted MCP hides the tool from `tools/list` if that can be determined during projection. If a call still reaches dispatch, it returns `ErrToolApprovalRequired` with reason codes `approval_required` and `approval_unreachable`. +- Approval timeout returns `ErrToolApprovalRequired` with `approval_required` and `approval_timed_out`. Hosted MCP proxy disconnect or stdio close cancels the derived context and returns `approval_required` plus `approval_canceled`. +- Hosted MCP cannot satisfy approval using client-supplied arguments. CLI/HTTP/UDS may use `approval_token`; hosted MCP must use the daemon approval bridge. + +Hosted MCP lifecycle: + +- The stdio proxy is spawned by the ACP runtime from AGH-provided `mcpServers` config and is scoped to one AGH session. +- The proxy exits when stdio closes, when the session stops, or when the launch record expires before successful bind. +- On ACP `session/load`, the daemon mints a fresh bind nonce and provides a fresh hosted MCP entry for that resumed session. +- The proxy never accepts a client-supplied workspace id. The daemon derives workspace id from the bound session at projection time and dispatch time. + +### Existing MCP Config And Auth + +AGH already has an MCP server configuration and remote-auth subsystem. The Tool Registry must consume those surfaces instead of defining a parallel MCP model: + +- `internal/config/provider.go` defines `MCPServer`, `MCPServerTransport` (`stdio`, `http`, `sse`), and `MCPAuthConfig` for OAuth 2.1 + PKCE metadata/client settings. Token material is explicitly outside config. +- `internal/config/mcpjson.go` loads `mcp.json` sidecars using `mcpServers` or `mcp_servers`. +- `internal/config/mcp_resource.go` validates `mcp_server` desired-state resources. +- `internal/mcp/auth` owns metadata discovery, PKCE state, authorization-code exchange, refresh, redacted status, logout/revocation, and `StatusValue`. +- `internal/store/globaldb/global_db_mcp_auth.go` persists remote MCP OAuth tokens behind `mcpauth.TokenStore` with encryption/redaction boundaries. +- `internal/cli/mcp_auth.go` provides the existing agent-operable commands `agh mcp auth login`, `agh mcp auth status`, and `agh mcp auth logout`. +- `internal/settings` and `internal/api/contract/settings.go` already expose redacted MCP auth status for settings surfaces. + +The hosted MCP bind nonce described above is not a remote MCP OAuth token and is not a bearer credential by itself. It is a daemon-minted correlation value for AGH's local stdio proxy, validated together with UDS peer credentials and the expected AGH binary path. Remote MCP OAuth tokens remain owned by `internal/mcp/auth` and `globaldb`. Registry descriptors, events, tool results, and MCP proxy arguments must never mix those credentials or reuse one lifecycle for the other. + +Remote MCP call-through uses an `MCPCallExecutor` implemented inside `internal/mcp`. `internal/tools` depends only on the executor interface and redacted `MCPAuthStatusProvider`; it must not import `internal/mcp/auth`, open `mcpauth.TokenStore`, receive raw bearer strings, or construct Authorization headers. The executor resolves bearer material internally, applies transport-specific headers in memory for the outbound MCP request, and maps failures back to redacted registry errors. + +External MCP-backed tool availability must derive auth diagnostics from the existing auth service: + +| `internal/mcp/auth.StatusValue` | Registry reason code | Session projection behavior | +|---|---|---| +| `unconfigured` | `mcp_auth_unconfigured` | Hide external MCP tools unless the server is public and executable support exists | +| `needs_login` | `mcp_auth_required` | Hide from model-visible projection; operator view points to `agh mcp auth login ` | +| `authenticated` | none | Auth does not block availability; calls may proceed only through the daemon-owned MCP adapter after registry policy passes | +| `expired` | `mcp_auth_expired` | Hide from model-visible projection; operator view points to `agh mcp auth status --refresh ` | +| `invalid` | `mcp_auth_invalid` | Hide from model-visible projection; operator view points to logout/login repair | + +Implementation correction required before the registry consumes MCP resource catalogs: current `internal/daemon/tool_mcp_resources.go` clones MCP server records through `cloneDaemonMCPServer`, which preserves only `Name`, `Command`, `Args`, and `Env`. Registry work that depends on remote MCP resources must update that clone path and its tests to preserve `Transport`, `URL`, and `Auth`; otherwise remote MCP auth metadata will be silently dropped from tool diagnostics. + +### Extensions + +Extension-installed tools are possible and should be first-class. + +Current foundation already has: + +- `extension.toml` `resources.tools` +- `resources.publish.families = ["tools"]` +- resource projection into daemon tool records +- extension health/status infrastructure +- Host API capability checks + +This TechSpec extends that by adding manifest-authoritative backend metadata, runtime reconciliation, and executable out-of-process handlers: + +```toml +[resources.tools.search] +id = "ext__linear__search" +description = "Search Linear issues" +read_only = true +backend.kind = "extension_host" +backend.handler = "search" +toolsets = ["linear__read"] +``` + +TypeScript extensions define the matching runtime handler through `@agh/extension-sdk`: + +```ts +extension.tool("search", { + readOnly: true, + inputSchema: z.object({ query: z.string() }), +}, async ({ input, context }) => { + return { content: [{ type: "text", text: await searchLinear(input.query, context) }] }; +}); +``` + +Go extensions define the same handler through the public Go extension SDK: + +```go +ext.Tool("search", aghsdk.ToolOptions{ + ReadOnly: true, + InputSchema: searchInputSchema, +}, func(ctx context.Context, req aghsdk.ToolRequest[SearchInput]) (aghsdk.ToolResult, error) { + return searchLinear(ctx, req.Input) +}) +``` + +Extensions must not freely impersonate `agh__*` or another extension namespace. Raw manifest names remain in `SourceRef`. `extension.toml` is the source of truth; SDK registration is runtime proof that the live extension process implements the manifest-declared handler and compatible schemas. + +Execution boundaries: + +- `native_go`: full MVP dispatch through in-process daemon code compiled with AGH. +- `extension_host`: full MVP dispatch through out-of-process extension runtime, `tool.provider`, `provide_tools`, and `tools/call`. +- `mcp`: full MVP dispatch through daemon-owned MCP client adapters using existing MCP config/auth. +- `subprocess`: not a separate public backend kind; TypeScript and Go extension SDKs use the existing subprocess runtime behind `extension_host`. +- `bridge`: future bridge adapter, rejected by MVP validation unless a later TechSpec enables it. + +No in-process third-party extension handlers in MVP. A Go function authored by an extension runs in the extension's subprocess binary through the Go SDK, not inside the daemon. + +### MCP Sources + +MCP-backed tools use: + +```text +mcp____ +``` + +The registry must preserve raw server/tool names in `SourceRef`. Sanitization collisions fail closed and mark the candidate tool `conflicted`. + +AGH-managed MCP sources in MVP are the existing validated projections, not raw file scans: + +- top-level and workspace MCP config plus global/workspace `mcp.json` sidecars; +- provider and agent MCP server declarations; +- skill MCP declarations resolved by `internal/skills.MCPResolver`, including the existing `allowed_marketplace_mcp` trust gate; +- extension `resources.mcp_servers` records resolved by `internal/extension/resource_publication.go`; +- future client-supplied ACP `mcpServers` only if a later TechSpec makes them session-scoped tool sources with explicit source trust and collision handling. + +Top-level, provider, agent, and `mcp.json` declarations use `aghconfig.MCPServer` and may carry remote `transport`, `url`, and `auth` fields. Current skill and extension MCP declarations are stdio subprocess declarations with `name`, `command`, `args`, and `env`; registry work must not infer remote OAuth support from them until their manifests are explicitly extended. + +External MCP descriptor discovery must consume existing `aghconfig.MCPServer` config/resource projections and `internal/mcp/auth` redacted status. It must not read raw config files directly, bypass strict `mcp.json` decoding, bypass skill sidecar symlink hardening, bypass marketplace MCP consent, or bypass extension resource grants. + +External MCP-backed tools are executable in the MVP only through daemon-owned MCP client adapters. They are session-callable when descriptor discovery succeeds, the source is explicitly allowed, existing MCP auth status is usable, the registry policy/approval/session/hook gates pass, and dispatch can call the remote MCP server without exposing token material. Operator surfaces still show unavailable MCP tools with deterministic diagnostics when health, auth, source, policy, collision, or schema checks fail. + +### Hooks + +Existing tool hook concepts become part of central registry dispatch: + +- `tool.pre_call` +- `tool.post_call` +- `tool.post_error` + +Payloads should use canonical `tool_id`. Existing `tool_name` / `tool_namespace` usage should be replaced in the registry path to avoid dual identity. + +### Skills + +Built-in skill tools prove progressive disclosure: + +- `agh__skill_list` +- `agh__skill_search` +- `agh__skill_view` + +These call into `internal/skills.Registry`, respect workspace overlays, reuse content verification, and enforce result budgets. Install/remove/update tools are intentionally out of MVP unless supply-chain policy/scanning is expanded. + +`agh__skill_view` applies registry result budgeting. If content exceeds the descriptor limit, the result returns `truncated=true`, a typed `next_offset`, and an artifact/reference strategy rather than silently dropping content. + +### Network And Tasks + +MVP network tools: + +| ToolID | Read-only | Destructive | Open-world | Authority route | +|---|---:|---:|---:|---| +| `agh__network_peers` | true | false | false | Existing network peer/list service | +| `agh__network_send` | false | false | true | Existing network send service with channel/session policy checks | + +MVP task tools: + +| ToolID | Read-only | Destructive | Open-world | Authority route | +|---|---:|---:|---:|---| +| `agh__task_list` | true | false | false | `task.Service.ListTasks` | +| `agh__task_read` | true | false | false | `task.Service.GetTask` | +| `agh__task_create` | false | false | false | `task.Service.CreateTask` | +| `agh__task_child_create` | false | false | false | `task.Service.CreateChildTask` | +| `agh__task_update` | false | false | false | `task.Service.UpdateTask` | +| `agh__task_cancel` | false | true | false | `task.Service.CancelTask` | +| `agh__task_run_list` | true | false | false | `task.Service.ListTaskRuns` | + +Excluded task tools: + +- `agh__task_claim` +- `agh__task_release` +- `agh__task_complete` +- `agh__task_fail` +- `agh__task_run_start` +- `agh__task_run_complete` +- `agh__task_run_cancel` + +Those excluded tools cross claim/lease/session lifecycle authority. They require a separate task-execution TechSpec because `task.Service.ClaimNextRun`, `Spawn`, and session manager terminal-state transitions are authoritative primitives and must not be wrapped by generic agent-callable tools. + +All network and task tools must route through existing network/task services and existing authorization rules. Mutating tools must not be classified as read-only. + +`agh__task_child_create` must call `task.Service.CreateChildTask`, and lineage subset enforcement remains in that service-level authority path. Registry policy may narrow the call before dispatch, but it must not become the authoritative child-permission expander or allow a child task/session to widen beyond the parent. + +## Extensibility Integration Plan + +### Extension Manifests + +Update extension manifest tool declarations with runtime metadata: + +- `id` +- `backend.kind` +- `backend.handler` +- `backend.server` +- `backend.tool` +- `requires_env` +- `required_capabilities` +- `risk` +- `destructive` +- `open_world` +- `requires_interaction` +- `max_result_bytes` +- `toolsets` +- `tags` +- `visibility` + +Extension-published descriptors are installed through existing resource publication, then normalized by the registry provider. For `extension_host` tools, the extension process must advertise `tool.provider`, expose `provide_tools`, and implement `tools/call`. The daemon marks the tool executable only when the runtime descriptor matches the manifest-authoritative `id`, handler name, schema digests, and risk flags. + +Extension-published MCP servers currently use `resources.mcp_servers` with stdio-only `command`, `args`, and `env` fields through `internal/extension/resource_publication.go`. This TechSpec does not add remote OAuth fields to extension MCP server declarations in MVP. If a future extension wants to publish remote authenticated MCP servers, that future TechSpec must extend the extension manifest schema to mirror `aghconfig.MCPServer` transport/auth fields and reuse `internal/mcp/auth`; it must not introduce extension-local token storage. + +For `mcp` extension tool descriptors, `backend.server` must resolve to an existing authorized MCP server source in the same extension/config scope, and `backend.tool` must match a discovered MCP tool. Missing, unauthorized, unhealthy, unauthenticated, or conflicted backend servers keep the tool operator-visible but unavailable with deterministic reason codes. A missing or unauthorized backend server cannot make the tool session-callable. + +For `extension_host` descriptors, `backend.handler` must match a runtime handler exposed by the extension SDK. Missing handlers, schema digest mismatches, risk flag mismatches, inactive extensions, or missing `tool.provider` grants keep the tool operator-visible but unavailable with `extension_runtime_mismatch`, `extension_capability_missing`, or `extension_inactive`. + +### Hooks + +Add or update hook payload schemas to include: + +- `tool_id` +- `display_title` +- `source` +- `risk` +- `read_only` +- `destructive` +- `open_world` +- `session_id` +- `workspace_id` +- `decision` +- `reason_codes` +- `input_digest` +- `result_digest` + +Hooks can deny or patch only through typed return contracts. They cannot raise permissions above ACP/session policy. + +### Skills, Tools, Resources, Bundles + +Add toolsets as named resources/config entries: + +- built-in `agh__bootstrap` +- built-in `agh__catalog` +- built-in `agh__coordination` +- built-in `agh__tasks` +- extension-provided toolsets such as `linear__read` + +Bundles may include toolsets in the future, but must expand to concrete `ToolID`s during session projection to preserve lineage narrowing. + +Skill MCP sidecars (`mcp.json`) remain MCP server declarations, not executable registry tools by themselves. Current skill MCP declarations carry only `name`, `command`, `args`, and `env`; they are stdio declarations. The registry may use those declarations as external MCP descriptor sources only after preserving skill sidecar symlink hardening, applying the existing skill trust gate (`allowed_marketplace_mcp`), and adding remote auth diagnostics from `internal/mcp/auth` only when the underlying source is an auth-capable `aghconfig.MCPServer`. + +### Bridge SDKs + +No direct bridge SDK execution in MVP. The registry design reserves a `bridge` backend kind, but the adapter is not required until bridge-managed tools need execution. + +### AGH Network + +No remote peer tool execution in MVP. Peer discovery may later advertise loaded toolsets or tool summaries, but remote dispatch requires a separate trust and authorization design. + +### Docs For Extension Authors + +Add docs covering: + +- canonical `ToolID` rules; +- extension tool manifest shape; +- TypeScript `extension.tool(...)` authoring; +- Go SDK function-based tool authoring; +- backend kinds; +- why third-party handlers are out-of-process; +- manifest/runtime reconciliation failures and how to debug `provide_tools`; +- result budgets and redaction; +- availability reason codes; +- how to debug conflicted/unavailable tools; +- CLI/HTTP/UDS management paths. + +## Agent Manageability Plan + +Agents must be able to inspect and operate the registry without the web UI. + +CLI: + +- `agh tool list -o json` +- `agh tool search -o json` +- `agh tool info -o json` +- `agh tool invoke --input -o json` +- `agh toolsets list -o json` +- `agh toolsets info -o json` +- `agh tool mcp --session --bind-nonce ` +- Existing remote MCP auth commands remain the management path for external MCP credentials: `agh mcp auth login -o json`, `agh mcp auth status [server] -o json`, and `agh mcp auth logout -o json`. + +HTTP and UDS parity: + +- Same contract types. +- Same reason codes. +- Same redaction. +- Same policy decisions. +- UDS is the preferred local machine path for CLI and hosted MCP proxy. +- Existing MCP settings endpoints remain the management path for server config/status: `GET /api/settings/mcp-servers`, `PUT /api/settings/mcp-servers/:name`, and `DELETE /api/settings/mcp-servers/:name` over HTTP and UDS. +- Tool registry operator views may embed or link redacted settings `auth_status`; they must not create duplicate MCP auth commands or expose token material. + +Discovery behavior: + +- Operator surfaces show all registered tools, including unavailable/unauthorized/conflicted entries. +- Session/model-visible surfaces show only callable tools for that effective session. +- Dispatch always recomputes policy and availability even if discovery already hid unsafe tools. + +Deterministic errors: + +- Errors include `code`, `message`, `tool_id`, `reason_codes`, and redacted structured details. +- Policy errors must identify the denying layer: `system_permission_mode`, `session_lineage`, `agent_policy`, `registry_policy`, `source_policy`, `availability`, or `hook`. +- MCP auth errors identify the MCP server name and redacted status/reason code, never token material. Tool registry surfaces may recommend the existing `agh mcp auth ...` repair command, but they do not start OAuth login/logout flows in MVP. + +E2E manageability checks: + +- CLI list/search/info matches HTTP and UDS for the same workspace/session. +- Hosted MCP `tools/list` equals `GET /api/sessions/{id}/tools`. +- Denied tools are visible in operator list but absent from session projection. +- Extension-installed tool descriptor appears after install and disappears after disable/remove. + +## Config Lifecycle + +### Global `config.toml` + +Add: + +```toml +[tools] +enabled = true +hosted_mcp_enabled = true +default_max_result_bytes = 262144 + +[tools.hosted_mcp] +bind_nonce_ttl_seconds = 30 + +[tools.policy] +external_default = "disabled" +approval_timeout_seconds = 120 +trusted_sources = [] +``` + +Semantics: + +- `tools.enabled=false` disables AGH-owned registry dispatch and hosted MCP exposure, but operator diagnostics can still show static resources where safe. +- `hosted_mcp_enabled=true` allows AGH to inject/offer the local hosted MCP proxy for sessions. +- `bind_nonce_ttl_seconds=30` bounds the hosted MCP launch record lifetime before UDS peer-credential binding. +- `default_max_result_bytes` applies when a descriptor does not specify a smaller limit. +- `external_default="disabled"` means extension/MCP/dynamic executable tools are registered and operator-visible, but not session-callable until enabled by explicit tool, toolset, source-tier, or agent grants. Built-in AGH bootstrap tools remain enabled by default subject to ACP/session policy. +- `approval_timeout_seconds=120` bounds daemon-mediated approvals for hosted MCP, CLI, HTTP, and UDS calls. +- `trusted_sources=[]` is an explicit source allowlist for external read-only auto-approval. Empty means no extension/MCP source can rely on `approve-reads` without an explicit per-tool, toolset, source, or agent grant. + +Allowed `external_default` values: + +- `disabled` +- `ask` +- `enabled` + +MVP default is `disabled`. + +`approve-reads` does not auto-approve `extension` or `mcp` source tools unless the source is present in `trusted_sources`, even when the descriptor declares `read_only=true`. Mutating, destructive, open-world, or interaction-requiring extension/MCP tools cannot become callable through `approve-reads`. They require explicit policy grants by `ToolID`, toolset, source, or agent plus the effective ACP/session ceiling, approval bridge when required, session lineage, and hook revalidation. + +### Existing MCP Config And Auth Lifecycle + +No new `config.toml` keys are added for remote MCP OAuth tokens. Existing MCP lifecycle remains authoritative: + +- MCP server definitions continue to come from top-level `[mcp_servers]`, provider `[providers..mcp_servers]`, agent-local `mcp_servers`, global/workspace `mcp.json`, skill sidecars, and extension `resources.mcp_servers`. +- Remote MCP auth configuration continues to use `MCPAuthConfig` fields (`type`, metadata/issuer/authorization/token/revocation URLs, `client_id`, `client_secret_env`, `scopes`) on remote MCP servers. +- Access tokens and refresh tokens continue to live only in the `internal/mcp/auth` token store backed by `internal/store/globaldb`; they are not copied into registry config, session lineage, tool descriptors, events, or extension manifests. +- The registry may read redacted MCP auth status to produce operator diagnostics and availability reason codes, but cannot mutate auth state. Login, refresh, and logout remain `agh mcp auth ...` operations in MVP. +- The hosted MCP bind nonce is ephemeral process/session launch state, not `config.toml` state and not part of the MCP OAuth token store. + +### Agent Definitions + +Keep the existing `tools` field but harden its meaning: + +- `tools`: exact canonical `ToolID`s or approved wildcard patterns. +- `toolsets`: named toolset IDs. +- `deny_tools`: exact IDs or patterns that always narrow permissions. + +Session lineage should persist concrete resolved `ToolID` atoms, not broad unresolved wildcard patterns. Child session permissions must remain subsets of parent session permissions. + +Invalid existing lineage atoms reject session spawn/load with a typed validation error. Greenfield posture applies: AGH does not silently normalize old atoms, and local databases that predate this TechSpec require a fresh `AGH_HOME` rather than compatibility migration shims. + +### Tool Pattern Grammar + +Allowed policy pattern forms: + +- exact canonical IDs, for example `agh__skill_view`; +- namespace-prefix wildcards ending in `*`, for example `agh__skill_*` or `mcp__github__*`; +- toolset IDs in `toolsets`, never in `tools`. + +Disallowed forms: + +- regular expressions; +- suffix wildcards such as `*__search`; +- mid-segment wildcards such as `agh__*__view`; +- uppercase, dots, hyphens, or empty segments; +- wildcard forms that would match across a reserved `__` boundary ambiguously. + +Pattern matching runs against canonical `ToolID` only. Display titles, raw MCP tool names, and extension manifest names do not participate in policy matching. + +### Validation + +Config validation must reject: + +- invalid `ToolID` patterns; +- unknown toolset IDs when a config is resolved in a concrete workspace; +- `__` misuse; +- extension attempts to publish under reserved `agh__*`; +- global defaults that would expose external tools without source policy support; +- `trusted_sources` entries that do not resolve to known extension/MCP source refs; +- approval timeouts or hosted MCP bind nonce TTLs outside daemon min/max bounds; +- result byte limits below zero or above a daemon maximum. + +### Docs And Generated Surfaces + +Update: + +- CLI docs for `agh tool` and `agh toolsets`; +- existing MCP auth CLI docs when registry diagnostics reference `agh mcp auth ...`; +- settings docs for `permissions.mode` to clarify ceiling behavior; +- settings MCP server docs for redacted `auth_status` reuse in tool diagnostics; +- extension author docs; +- site docs for Tool Registry architecture; +- OpenAPI contract and generated web types. + +## Impact Analysis + +| Component | Impact Type | Description and Risk | Required Action | +|---|---|---|---| +| `internal/tools` | Modified/new | Becomes runtime registry owner, not just metadata definitions | Add `ToolID`, descriptors, providers, registry, policy, dispatch | +| `internal/config` | Modified/consumed | Existing `MCPServer` transport/auth config is the source of truth for MCP resources | Preserve `transport`, `url`, and `auth`; do not move OAuth config under `[tools]` | +| `internal/resources` | Modified | Cold tool resource remains desired state but must carry canonical ID/source metadata | Update codecs, validators, tests | +| `internal/extension` | Modified | Extension tools gain backend metadata, manifest/runtime reconciliation, and executable out-of-process invocation | Extend manifest types, protocol capabilities, `provide_tools`, `tools/call`, validation, lifecycle, and publication tests | +| `internal/mcp` | Modified/new | Hosted MCP proxy exposes registry tools; MCP adapter normalizes and executes external tools | Add MCP list/call bridge through UDS/registry and daemon-owned remote MCP client call-through | +| `internal/mcp/auth` | Consumed | Existing remote MCP OAuth/PKCE status drives external MCP availability diagnostics | Inject redacted status provider; do not duplicate token store or OAuth flows | +| `internal/acp` | Modified | Session creation/load must include hosted AGH MCP where applicable; permission mode becomes registry ceiling; current MCP conversion is stdio-only | Wire session projection, keep hosted MCP stdio-only in MVP, and avoid implying remote HTTP/SSE ACP parity | +| `internal/store` | Modified | Session lineage `Tools` atoms become canonical resolved `ToolID`s | Validate IDs and preserve subset checks | +| `internal/hooks` | Modified | Tool hook payloads should use canonical `tool_id` | Update payloads, matchers, docs, tests | +| `internal/api/contract` | New/modified | Shared DTOs for tools/toolsets/calls/errors | Add contract types and codegen | +| `internal/api/core` | New/modified | Transport-independent tool handlers | Implement list/search/info/invoke/session projection | +| `internal/api/httpapi` | Modified | Register HTTP routes | Thin transport registration only | +| `internal/api/udsapi` | Modified | Register UDS routes | Thin transport registration only | +| `internal/cli` | Modified/consumed | Agent-manageable `agh tool` and `agh toolsets` commands; existing `agh mcp auth` remains the MCP credential path | Add structured output and UDS client methods; link diagnostics to existing auth commands | +| `internal/settings` | Consumed | Existing MCP server list includes redacted `auth_status` | Reuse status shape for operator diagnostics; no duplicate settings status model | +| `internal/skills` | Modified | Skill list/search/view tools call into skills registry; skill MCP sidecars may inform external MCP descriptor sources | Preserve skill sidecar symlink hardening and `allowed_marketplace_mcp` trust filtering | +| `internal/network` | Modified | Network tools call peers/send through existing service | Ensure mutating calls enforce policy | +| `internal/task` | Modified | Bounded task tools call task service | Keep TaskManager authority model intact | +| `sdk/typescript` | Modified | Extension authors define tools using TypeScript functions | Add `extension.tool(...)`, schema digesting, `provide_tools`, and `tools/call` handler support | +| `sdk/go` | New | Extension authors define tools using Go functions in subprocess extensions | Add public Go extension SDK mirroring TypeScript tool-provider APIs | +| `sdk/create-extension` | Modified | Templates should scaffold executable tool providers | Add TypeScript and Go tool-extension templates plus manifest examples | +| `web/` | Modified | Settings/help surfaces may display registry policy state, tool diagnostics, and existing redacted MCP `auth_status` | Only render truthful daemon-backed status; no invented remote login controls | +| `packages/site` | Modified | Public docs for registry, extension tools, approval ceiling | Add docs and CLI reference updates | +| `.compozy/tasks/tools-registry/*` | New | Research, ADRs, final TechSpec | Keep analysis paths referenced in later tasks | + +## Test Strategy + +### Unit Tests + +Test: + +- `ToolID` validation, parsing, wildcard matching, and collision rejection. +- External name sanitization and fail-closed collision behavior. +- `Descriptor` validation and schema size limits. +- Availability state transitions and reason-code composition. +- Policy matrix across `deny-all`, `approve-reads`, and `approve-all`. +- Source defaults for built-in, extension, MCP, and dynamic tools. +- Agent allow/deny/toolset expansion. +- Session lineage concrete `ToolID` subset validation. +- Dispatch pipeline ordering. +- JSON schema input validation. +- Result truncation and redaction. +- Hook deny/patch/result behavior. +- Extension manifest backend validation. +- MCP auth status mapping from `internal/mcp/auth.StatusValue` to registry availability reason codes. +- MCP server resource cloning/projection preserves `Transport`, `URL`, and `Auth` when remote MCP resources flow into registry diagnostics. + +Mocks are acceptable for provider I/O boundaries, but policy/dispatch correctness must be tested with real registry instances. + +### Integration Tests + +Test: + +- Extension manifest declares a tool and it appears in operator registry projection. +- Disabling/removing an extension removes or marks the tool unavailable. +- TypeScript extension declares a manifest-authoritative `extension_host` tool, registers a matching SDK function, and dispatch succeeds through `Registry.Call`. +- Go extension declares a manifest-authoritative `extension_host` tool, registers a matching SDK function, and dispatch succeeds through `Registry.Call`. +- Extension runtime descriptor mismatches for handler, schema digest, risk flags, or missing `tool.provider` grant keep the tool operator-visible and session-hidden with deterministic reason codes. +- MCP-backed extension tool resolves to an authorized MCP source and dispatch succeeds through the daemon-owned MCP adapter when auth, source policy, approval, hooks, and session lineage pass. +- MCP-backed extension tool remains operator-visible but unavailable when its backend server is missing, unauthorized, unhealthy, unauthenticated, or conflicted. +- Remote MCP server with `needs_login`, `expired`, and `invalid` auth statuses appears only in operator diagnostics with redacted `MCPAuthStatus` and is hidden from session/model projections. +- Existing `agh mcp auth status --refresh -o json` changes operator diagnostics without requiring a registry-owned OAuth flow. +- `agh tool info ` and `GET /api/tools/{id}` show redacted MCP auth diagnostics that match `/api/settings/mcp-servers` `auth_status` for the same server. +- Remote OAuth token values never appear in tool CLI/API/UDS/MCP responses, SSE payloads, event payloads, logs, or process diagnostics. +- A fake remote MCP server that requires `Authorization` proves the header is injected only inside `internal/mcp` and never appears in `internal/tools` errors, logs, events, or result envelopes. +- Hosted MCP bind nonces never grant access without UDS peer credentials, and AGH-owned diagnostics never describe them as claim-token-equivalent bearer secrets. +- Hosted MCP binding fails closed when UDS peer credentials or executable validation are unavailable. +- Remote MCP configs are not converted to blank stdio ACP servers; hosted-session injection remains the AGH stdio proxy while remote MCP calls happen inside the daemon MCP adapter. +- Built-in `agh__skill_view` calls real skills registry content. +- CLI/HTTP/UDS list/search/info parity. +- `agh tool mcp --session --bind-nonce ` `tools/list` matches session projection. +- Hosted MCP rejects a proxy bind without the session-bound token. +- Hosted MCP derives workspace id from session id and rejects client-supplied workspace context. +- Hosted MCP routes approval-required calls through ACP `session/request_permission` when available and fails closed with `approval_unreachable` when unavailable. +- Hosted MCP approval-required calls time out with `approval_timed_out` and cancel with `approval_canceled` when the proxy disconnects mid-approval. +- `approve-reads` exposes read-only tools but blocks mutating tools without approval. +- Mutating, destructive, and open-world extension/MCP tools execute only with explicit `ToolID`/toolset/source/agent grants plus ACP ceiling, approval bridge, session lineage, and hook revalidation. +- `approve-reads` does not auto-approve external read-only tools from untrusted extension/MCP sources. +- `approve-all` does not bypass explicit registry denies or session lineage narrowing. +- `deny-all` blocks execution while operator surfaces still show diagnostics. +- The concrete MVP task tools call only their listed `task.Service` methods; excluded claim/complete/release tools are absent. +- `agh__task_child_create` cannot widen child permissions beyond parent lineage because `task.Service.CreateChildTask` remains the enforcement point. +- Hooks can deny pre-call and redact post-call output. +- Conflicted tool IDs are operator-visible and session-hidden. +- Result budget truncation is identical across CLI, HTTP, UDS, and MCP. + +### E2E Tests + +Use the AGH runtime E2E harness: + +- Start daemon with isolated `AGH_HOME`. +- Create a session with hosted AGH MCP enabled. +- Verify the agent session receives registry-backed MCP tools. +- Invoke a safe built-in tool through the hosted MCP path. +- Verify events, telemetry, CLI status, HTTP status, and UDS status agree. +- Install a test extension publishing a tool descriptor and MCP backend metadata. +- Install TypeScript and Go test extensions publishing executable `extension_host` tools. +- Verify operator diagnostics, runtime reconciliation, session visibility, successful invocation, disable/remove lifecycle, and conflict handling. +- Configure a local MCP test server and verify descriptor discovery plus a real `tools/call` through the daemon MCP adapter. +- Configure a remote OAuth-backed MCP server in isolated `AGH_HOME`, exercise `agh mcp auth login/status/logout` against a local OAuth test server, and verify registry tool diagnostics never expose access tokens, refresh tokens, authorization codes, PKCE verifiers, or approval tokens. +- acpmock fixtures and Playwright selectors for hosted MCP tool-call lifecycle ship in the same PR; matchers use structured `tool_id` metadata, never rendered prompt substrings. +- Per-package coverage must stay at or above 80%; race-sensitive packages run under `-race` with `CGO_ENABLED=1` in the Linux-Race CI lane. + +Run full `make verify` before completing implementation tasks. + +## Implementation Steps + +### Build Order + +Ordered implementation sequence respecting dependencies: + +1. Add canonical `ToolID`, descriptor, backend kind, source, availability, result, and error contracts in `internal/tools` - no dependencies. +2. Replace metadata-only provider semantics with executable runtime provider/handle interfaces - depends on step 1. +3. Implement registry indexing, collision detection, MCP auth diagnostic mapping, and operator/session projections - depends on steps 1-2 and existing `internal/mcp/auth`. +4. Implement policy evaluator with ACP ceiling, agent policy, session lineage, source defaults, allow/deny, and toolsets - depends on step 3. +5. Implement dispatch pipeline with schema validation, availability recheck, hooks, budgets, handle call, normalization, and telemetry seams - depends on steps 3-4. +6. Add built-in provider for bootstrap AGH tools: `agh__tool_*`, `agh__skill_*`, `agh__network_*`, and only the enumerated MVP task tools - depends on step 5. +7. Add extension manifest backend metadata, manifest-authoritative validation, and runtime reconciliation contracts - depends on steps 1-3 and existing extension resources. +8. Add extension protocol capability `tool.provider`, wire-stable `provide_tools`/`tools/call` request-response structs, schema digest conformance fixtures, and invocation through the existing subprocess manager - depends on steps 5 and 7. +9. Add TypeScript SDK `extension.tool(...)`, schema digesting, and handler registration - depends on step 8. +10. Add public Go extension SDK with function-based tool helpers equivalent to TypeScript SDK - depends on step 8. +11. Add daemon-owned MCP descriptor discovery and `MCPCallExecutor` call-through adapter using existing MCP config/auth and token redaction boundaries - depends on steps 3-5 and existing `internal/mcp/auth`. +12. Add hosted MCP stdio proxy command `agh tool mcp --session --bind-nonce`, UDS peer-credential bind, approval bridge timeout/cancellation, and existing MCP resource/auth preservation - depends on steps 3-6 and 11. +13. Add API contract DTOs and `internal/api/core` handlers - depends on steps 3-6, 8, and 11. +14. Wire HTTP, UDS, CLI commands, and UDS client methods - depends on step 13. +15. Wire hooks and canonical `tool_id` payload updates end-to-end, including typed hook payloads, matchers, fixture builders, extension-author docs, and no dual identity mid-PR - depends on step 5. +16. Add config lifecycle, validation, generated docs, CLI docs, site docs, integration/E2E coverage, and run `make verify` - depends on all prior steps. + +### Technical Dependencies + +Blocking dependencies that must be resolved before implementation: + +- Existing ACP `permissions.mode` behavior in `internal/acp/permission.go`. +- Existing session lineage permission atoms in `internal/store/session_lineage.go`. +- Existing extension resource publication and `resources.publish.families = ["tools"]`. +- Existing extension subprocess lifecycle, JSON-RPC `process.Call`, Host API capability checks, and TypeScript `Extension.handle(...)` handler pattern. +- Existing hooks payload system. +- Existing MCP server configuration/resource lifecycle in `internal/config/provider.go`, `internal/config/mcpjson.go`, `internal/config/mcp_resource.go`, `internal/skills/mcp.go`, and `internal/extension/resource_publication.go`. +- Existing MCP auth lifecycle in `internal/mcp/auth`, `internal/store/globaldb/global_db_mcp_auth.go`, `internal/cli/mcp_auth.go`, `internal/settings`, and `internal/daemon/settings.go`. +- Existing skills registry APIs. +- Existing task/network services. +- OpenAPI/codegen path for web contract updates. + +### Safety Invariants + +1. Every AGH-owned tool call enters `internal/tools.Registry.Call`; CLI, HTTP, UDS, hosted MCP, extension, and session paths cannot bypass the registry dispatch pipeline. +2. Dispatch recomputes availability and `EffectiveToolDecision` at call time, even when discovery already filtered the tool. +3. ACP `permissions.mode` is always a ceiling. Registry, source, agent, session, and hook policy can narrow authority but cannot raise it above the effective ACP/session mode. +4. `approve-all` skips approval prompts only for otherwise allowed tools; it does not bypass explicit denies, source grants, session lineage, conflicts, unavailable backends, or hooks. +5. `approve-reads` applies only to registry-classified read-only tools. Extension/MCP read-only tools also require an explicit trusted source or per-tool/toolset/source/agent grant. Mutating, destructive, open-world, network-send, and task-write tools cannot inherit read approval by display title or ACP kind. +6. Session lineage stores concrete canonical `ToolID` atoms after toolset expansion; child sessions can only receive a subset of parent concrete atoms. +7. Tool ID collisions fail closed. A conflicted tool is operator-visible with reason codes and absent from session/model-visible projections. +8. Extension-installed tools can become executable in MVP only when the manifest-authoritative descriptor, source policy, extension health, runtime `provide_tools` descriptor, and `tools/call` handler all agree. +9. Third-party extension tool handlers never run in-process in the daemon during MVP; TypeScript and Go function handlers run inside supervised extension subprocesses. +10. Hooks dispatch at the call site and cannot tail event tables, spawn parallel queues, or mutate durable ownership state outside typed hook contracts. +11. Tool result limiting and redaction run before results cross CLI, HTTP, UDS, MCP, SSE, logs, memory, or event payloads. +12. Raw `claim_token`, MCP auth tokens, OAuth codes, PKCE verifiers, and secret bindings never appear in tool inputs/outputs persisted or emitted by AGH-owned surfaces. +13. Hosted MCP `tools/list` is a projection of `GET /api/sessions/{id}/tools`; divergence is a test failure. +14. Operator-visible diagnostics are not security boundaries. Hidden session projection plus dispatch-time revalidation is the security boundary. +15. Toolsets are expanded deterministically and cycle-checked before session projection; recursive expansion cannot happen lazily during dispatch. +16. Hosted MCP cannot bind to a session without a daemon-minted, single-use, session-bound bind nonce plus successful UDS peer credential and AGH binary validation. The nonce is not a bearer secret, is never accepted from tool input, and its launch record is invalidated on bind/session end/disconnect or `[tools.hosted_mcp].bind_nonce_ttl_seconds`. +17. Hosted MCP `tools/call` cannot pass an `approval_required` decision unless the daemon completes an ACP/session-mediated approval equivalent to CLI/HTTP/UDS approval semantics within `[tools.policy].approval_timeout_seconds`; timeout and proxy disconnect return `approval_timed_out` or `approval_canceled`. +18. No `agh__task_*` tool may bypass `task.Service.ClaimNextRun`, `Spawn`, session manager terminal-state authority, or task lifecycle authority. Claim/release/complete/fail/run-start operations are excluded from MVP tools. +19. External `extension_host` and `mcp` backend tools are executable only through their registered runtime handles; missing handlers, missing MCP clients, missing capabilities, source denies, auth failures, or runtime mismatches fail closed before user code or remote tools run. +20. Remote MCP OAuth/PKCE credentials are owned only by `internal/mcp/auth` and its `TokenStore`; the registry may consume redacted status and call through a narrow `internal/mcp/auth`-owned interface, but cannot persist, log, refresh, revoke, or copy access/refresh tokens. Raw tokens stay out of descriptors, resources, events, API responses, CLI output, MCP responses, and tool results. +21. Hosted MCP bind nonces and remote MCP OAuth tokens have separate issuers, storage, lifetimes, redaction labels, and failure codes. A `hosted_mcp_bind_nonce` is not sufficient to bind an AGH hosted MCP proxy without UDS peer credential validation, must never satisfy a remote MCP server auth check, and a remote MCP OAuth token must never bind an AGH hosted MCP proxy. + +## Monitoring and Observability + +Events: + +- `tool.registry.refresh_started` +- `tool.registry.refresh_completed` +- `tool.registered` +- `tool.updated` +- `tool.removed` +- `tool.conflicted` +- `tool.availability_changed` +- `tool.policy_evaluated` +- `tool.call_started` +- `tool.call_completed` +- `tool.call_failed` +- `tool.call_denied` +- `tool.result_truncated` + +Required fields: + +- `tool_id` +- `display_title` +- `source_kind` +- `source_owner` +- `workspace_id` +- `session_id` +- `parent_session_id` +- `root_session_id` +- `agent_name` +- `risk` +- `read_only` +- `destructive` +- `open_world` +- `approval_mode` +- `decision` +- `reason_codes` +- `duration_ms` +- `result_bytes` +- `truncated` +- `correlation_id` + +Metrics: + +- registered tools by source kind +- conflicted tools by source kind +- available/callable tools by session +- calls started/completed/failed/denied +- approval-required counts +- result truncation counts +- backend latency by source kind +- hook-denied counts +- policy-denied counts + +Redaction: + +- Never log raw claim tokens, MCP auth tokens, OAuth codes, PKCE verifiers, secret bindings, or full tool payloads when marked sensitive. +- Use digests for large inputs/results. +- Preserve enough metadata to debug policy and availability without exposing secrets. + +## Technical Considerations + +### Key Decisions + +Decision: Tool Registry is an AGH daemon runtime service, not an ACP registry. + +Rationale: ACP has no callable tool registry and relies on MCP for tool discovery. + +Trade-off: AGH must maintain its own registry semantics and expose them through MCP/session surfaces. + +Decision: Use one canonical `ToolID` with `__` namespace separators. + +Rationale: Avoid dotted/internal plus wire alias ambiguity and stay compatible with stricter provider naming limits. + +Trade-off: `agh__skill_view` is less visually elegant than `agh.skill.view`, but it prevents dual identity bugs. + +Decision: Built-ins execute in-process; TypeScript and Go extension tools execute out-of-process through `extension_host`. + +Rationale: Daemon safety and extension isolation matter more than plugin convenience. + +Trade-off: Extension tool latency and setup are higher, but failure containment is better and extension authors still get function-based APIs. + +Decision: Remote MCP-backed tools execute in MVP through daemon-owned MCP clients. + +Rationale: MCP config/auth already exists and the registry must not become useful only for descriptors. + +Trade-off: The MVP must implement stricter auth redaction and adapter tests, but operators get one coherent tool model. + +Decision: ACP `permissions.mode` is the approval ceiling. + +Rationale: Avoid contradictory policy layers and keep existing settings truthful. + +Trade-off: Registry policy must compute effective decisions rather than simple allow/deny flags. + +Decision: Operator and session projections differ. + +Rationale: Operators need diagnostics; models should see only callable tools. + +Trade-off: More projection logic, but less model confusion and fewer unsafe calls. + +Decision: Keep cold resource records separate from runtime handles. + +Rationale: Resource/projector system is good for installed metadata; executable dispatch needs function/protocol handles and live health. + +Trade-off: More types, but cleaner lifecycle and safer extension support. + +### Known Risks + +Risk: Mutating tools mislabeled as read-only. + +Mitigation: Descriptor validation, review tests, policy matrix tests, and hook/audit visibility. + +Risk: External tool name collisions force late breaking changes. + +Mitigation: Enforce canonical `ToolID` grammar and fail-closed collision handling in MVP. + +Risk: Hosted MCP path diverges from CLI/HTTP/UDS behavior. + +Mitigation: Hosted MCP proxies through UDS into the same registry dispatch path. + +Risk: Extension tools become visible before safe runtime execution is confirmed. + +Mitigation: Operator-visible only until manifest/runtime reconciliation, availability, source policy, and backend handle all pass. + +Risk: `approve-all` is misunderstood as "execute everything." + +Mitigation: Docs and UI copy must clarify it auto-approves otherwise allowed calls; it does not bypass explicit denies, source grants, availability, lineage, or hooks. + +Risk: Toolsets create ambiguous policy language. + +Mitigation: Keep `tools` and `toolsets` as separate typed fields; expand toolsets to concrete `ToolID`s for session lineage. + +Risk: Result payloads leak secrets or overwhelm context. + +Mitigation: Central result limiter, redaction metadata, digesting, and output budgets. + +### Delete Targets + +Because AGH is greenfield alpha, the implementation should hard-cut ambiguous old tool concepts instead of adding compatibility bridges: + +- Replace metadata-only `ToolProvider.Tools(ctx)` as the runtime extension point with provider/descriptor/handle contracts. +- Remove descriptor-only MVP wording and `backend_not_implemented` behavior for `extension_host` and `mcp` tools. +- Remove any public standalone `subprocess` backend in favor of `extension_host` subprocess isolation. +- Replace `internal/tools.Tool.Name` as a registry identity with canonical `ToolID` in new public contracts. +- Remove any new public use of dotted tool IDs or separate MCP wire aliases. +- Replace hook policy identity based on `tool_name` + `tool_namespace` with canonical `tool_id` for registry-owned tool calls, including `internal/hooks/payloads.go` `ToolPreCallPayload`, `ToolPostCallPayload`, and `ToolPostErrorPayload`. +- Update docs, tests, CLI/API examples, and task artifacts that refer to dotted IDs or dual aliasing. + +## Architecture Decision Records + +- [ADR-001: Extension Tool Execution Boundary](adrs/adr-001-extension-tool-execution-boundary.md) - extension tools are manifest-first, executable, and out-of-process in MVP. +- [ADR-002: Session Tool Exposure Path](adrs/adr-002-session-tool-exposure-path.md) - expose AGH registry tools through hosted local MCP plus shared CLI/HTTP/UDS. +- [ADR-003: Runtime Registry Package Boundary](adrs/adr-003-runtime-registry-package-boundary.md) - `internal/tools` owns runtime registry and dispatch; `internal/catalog` remains thin. +- [ADR-004: MVP Native Tool Scope](adrs/adr-004-mvp-native-tool-scope.md) - bootstrap catalog/skill tools plus selected network/task tools. +- [ADR-005: ACP Approval Policy Integration](adrs/adr-005-acp-approval-policy-integration.md) - ACP approval mode is the system/session ceiling. +- [ADR-006: Tool Visibility By Surface](adrs/adr-006-tool-visibility-by-surface.md) - operator projections show diagnostics; model projections show callable tools only. +- [ADR-007: Canonical Tool ID Format](adrs/adr-007-canonical-tool-id-format.md) - one provider-safe `ToolID` using reserved `__` namespace separators. +- [ADR-008: Manifest-Authoritative Extension Tool Descriptors](adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md) - `extension.toml` is source of truth and runtime descriptors reconcile against it. +- [ADR-009: Public Go Extension Tool SDK](adrs/adr-009-public-go-extension-tool-sdk.md) - Go extensions get function-based subprocess SDK APIs equivalent to TypeScript. +- [ADR-010: Remote MCP Call-Through](adrs/adr-010-remote-mcp-call-through.md) - remote MCP tools are executable in MVP through daemon-owned MCP adapters. + +## Nits + +Peer review round 1 nits and disposition: + +- `N-001` ToolsetID grammar: addressed in Data Models by sharing the `ToolID` grammar. +- `N-002` Tool pattern grammar: addressed in Config Lifecycle with explicit allowed/disallowed pattern forms. +- `N-003` `approval_token` semantics: addressed in API Endpoints and Hosted MCP Approval Bridge. +- `N-004` `dynamic` source kind: addressed in Data Models as reserved with no MVP producer. +- `N-005` hosted MCP lifecycle: addressed in Hosted MCP lifecycle. +- `N-006` `agh__skill_view` result budget: addressed in Integration Points / Skills. +- `N-007` hook identity migration co-ship: addressed in Implementation Steps step 11. +- `N-008` invalid existing session lineage atoms: addressed in Config Lifecycle / Agent Definitions. +- `N-009` hosted MCP workspace resolution: addressed in Hosted MCP lifecycle/authentication. +- `N-010` MVP tool risk classification: addressed in Network And Tasks tables. + +Peer review round 2 blockers and nits disposition: + +- `B-001` Extension wire contracts: addressed in Core Interfaces with protocol constants, capability-method mapping, and `provide_tools` / `tools/call` request-response structs. +- `B-002` Schema digest canonicalization: addressed in Data Models with RFC 8785 JCS canonicalization, lowercase SHA-256 digests, and shared SDK/daemon fixtures. +- `B-003` Remote MCP bearer injection: addressed in Core Interfaces and Existing MCP Config/Auth with `MCPCallExecutor` owned by `internal/mcp`. +- `B-004` Hosted MCP bind-token contradiction: addressed by replacing bearer bind tokens with non-secret bind nonces plus UDS peer credential and AGH binary validation. +- `B-005` Approval bridge wait: addressed with `[tools.policy].approval_timeout_seconds`, `approval_timed_out`, `approval_canceled`, and proxy-disconnect cancellation behavior. +- `N-001` Approval timeout and bind nonce TTL defaults: addressed in Config Lifecycle and Safety Invariants. +- `N-002` Go SDK path: addressed in ADR-009 by committing to `sdk/go`. +- `N-003` Runtime contract fixture updates: addressed in Test Strategy with acpmock and Playwright fixture requirements. +- `N-004` Coverage/race discipline: addressed in Test Strategy. +- `N-005` Long sanitized external IDs: addressed in ToolID and reason codes with `id_too_long`. +- `N-006` Hook payload delete targets: addressed in Delete Targets. +- `N-007` External read-only trust: addressed in Config Lifecycle with `trusted_sources`. +- `N-008` Child task lineage authority: addressed in Network And Tasks and Integration Tests. diff --git a/.compozy/tasks/tools-registry/adrs/adr-001-extension-tool-execution-boundary.md b/.compozy/tasks/tools-registry/adrs/adr-001-extension-tool-execution-boundary.md new file mode 100644 index 000000000..0140985b1 --- /dev/null +++ b/.compozy/tasks/tools-registry/adrs/adr-001-extension-tool-execution-boundary.md @@ -0,0 +1,91 @@ +# ADR-001: Extension Tool Execution Boundary + +## Status + +Accepted + +## Date + +2026-04-28 + +## Context + +AGH's Tool Registry must let operators and agents create real tools through the same extension model AGH already uses for hooks, memory backends, bridge adapters, and subprocess-hosted capabilities. + +The previous interpretation of this ADR kept extension tools descriptor-only in the MVP. That is insufficient: it lets AGH list extension tools but does not let a TypeScript or Go extension author define a callable tool. It also conflicts with the current extension runtime, which already supports subprocess JSON-RPC, capability negotiation, Host API grants, health checks, service methods, and TypeScript `Extension.handle(...)` handlers. + +The registry still must preserve daemon safety. Third-party extension code must not run in-process inside the daemon. Cold resource records must not persist function pointers or dynamic handler state. + +## Decision + +The Tool Registry MVP supports three executable backend classes: + +- `native_go`: in-process Go function tools compiled into the daemon and registered by first-party/built-in providers at the daemon composition root. +- `extension_host`: out-of-process tools implemented by installed extensions through the existing subprocess JSON-RPC extension runtime. +- `mcp`: remote or local MCP tools called through daemon-owned MCP client adapters that consume existing MCP config and `internal/mcp/auth` redacted credential state. + +Extension-host tools are first-class executable tools in the MVP. An extension declares `capabilities.provides = ["tool.provider"]`, publishes manifest-authoritative `resources.tools` descriptors, and implements the negotiated `tools/call` service method. The daemon only dispatches to an extension tool when the extension is enabled, active, healthy, negotiated `tool.provider`, implements `tools/call`, and passes registry policy, source grants, availability, hooks, and session lineage checks. + +Third-party extension code never runs in the daemon process. Third-party Go tools use a subprocess Go SDK, not Go `plugin`, cgo-loaded shared libraries, reflection injection, or manifest function pointers. + +## Alternatives Considered + +### Descriptor-only extension tools + +- **Description**: Extension manifests publish tool metadata, but calls return unavailable until a later TechSpec implements backend dispatch. +- **Pros**: Smaller MVP and easier security review. +- **Cons**: Fails the product requirement that extensions can create tools. Leaves TypeScript and Go extension authors with no executable tool path. +- **Why rejected**: The user explicitly rejected this as too weak, and subagent/codebase research confirmed existing extension runtime can support executable subprocess handlers. + +### In-process third-party handlers + +- **Description**: Let trusted extensions register Go or JavaScript handlers directly in the daemon process. +- **Pros**: Low call latency and a simple function-call programming model. +- **Cons**: Expands daemon compromise risk, creates a plugin ABI, complicates lifecycle isolation, and pressures AGH toward unsafe `plugin`/reflection behavior. +- **Why rejected**: Built-ins can use `native_go`; third-party extension code must cross a process/protocol boundary. + +### TypeScript-only extension tools + +- **Description**: Support executable TypeScript extension tools first; defer Go subprocess authoring. +- **Pros**: Reuses the existing TypeScript SDK quickly. +- **Cons**: Does not satisfy the requirement that extensions define tools using Go functions or TypeScript. +- **Why rejected**: The MVP must include a public Go subprocess SDK for tool providers. + +## Consequences + +### Positive + +- Extension tools become real executable capabilities in the MVP. +- Built-in Go tools and third-party extension tools share one registry, policy path, hosted MCP exposure path, hook path, telemetry path, and result-budget path. +- The design reuses AGH's existing extension runtime instead of inventing a descriptor-only side channel. +- Remote MCP tools become governed by AGH policy and observability instead of living in provider-private tool universes. + +### Negative + +- MVP scope grows: registry execution, extension protocol, TypeScript SDK, Go SDK, MCP client call-through, redaction, and E2E coverage must ship together. +- Remote MCP call-through adds auth, transport, timeout, and result-normalization complexity. +- Extension descriptor/runtime reconciliation needs strict validation to avoid mismatch bugs. + +### Risks + +- Misclassified mutating tools could bypass approval expectations. Mitigation: descriptor validation, source grants, policy matrix tests, and dispatch-time revalidation. +- Extension tools could leak secrets through results or errors. Mitigation: central redaction and result limiting before crossing CLI, HTTP, UDS, MCP, SSE, logs, or events. +- MCP OAuth tokens could leak through registry views. Mitigation: consume only redacted `internal/mcp/auth` status and never copy token material into descriptors, resources, events, or responses. + +## Implementation Notes + +- `internal/tools` owns `native_go`, `extension_host`, and `mcp` backend contracts. +- `internal/daemon` wires first-party Go function providers and injects extension/MCP adapters. +- `internal/extension/protocol` adds `CapabilityProvideToolProvider = "tool.provider"` and `ExtensionServiceMethodToolsCall = "tools/call"`. +- `@agh/extension-sdk` adds `extension.tool(descriptor, handler)`. +- A new public Go subprocess SDK adds the equivalent Go helper for defining tools with Go functions. +- Hosted MCP remains the session exposure transport, but every call enters `internal/tools.Registry.Call`. + +## References + +- `.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md` +- `.compozy/tasks/tools-registry/analysis/synthesis.md` +- `internal/extension/manager.go` +- `internal/extension/protocol/host_api.go` +- `sdk/typescript/src/extension.ts` +- `internal/subprocess/handshake.go` diff --git a/.compozy/tasks/tools-registry/adrs/adr-002-session-tool-exposure-path.md b/.compozy/tasks/tools-registry/adrs/adr-002-session-tool-exposure-path.md new file mode 100644 index 000000000..21ddbeea0 --- /dev/null +++ b/.compozy/tasks/tools-registry/adrs/adr-002-session-tool-exposure-path.md @@ -0,0 +1,64 @@ +# ADR-002: Session Tool Exposure Path + +## Status + +Accepted + +## Context + +The Tool Registry must be agent-manageable, not only an internal daemon API. AGH manages external ACP-compatible runtimes, so it cannot assume direct control over every provider's internal model API tool loop. + +The registry still needs a model-visible path for session-callable tools such as `agh__tool_search`, `agh__skill_view`, `ext__linear__search`, and `mcp__github__create_issue`. Competitor research suggests MCP is the most portable first boundary: + +- Claude Code and other runtimes already understand MCP tools. +- MCP keeps tool exposure protocol-based rather than driver-specific. +- MCP can be backed by the same registry dispatch path as CLI, HTTP, and UDS. +- Runtimes without MCP can still reach AGH through CLI/UDS fallback surfaces. + +## Decision + +The MVP will expose session-callable registry tools through an AGH-hosted local MCP server plus shared CLI, HTTP, and UDS contracts. + +The daemon will own one registry contract and dispatch pipeline. Surfaces call into that same pipeline: + +- hosted MCP server for model-visible AGH, extension-host, and MCP-backed tools in runtimes that support MCP, +- CLI commands for operator and agent fallback use, +- HTTP API for web/operator clients, +- UDS API for local trusted clients and internal AGH tools. + +Direct driver/ACP injection can be added later as an optimization for runtimes that support it, but it is not the MVP exposure path. + +## Consequences + +Every session-callable tool must be representable as an MCP tool without losing policy, availability, hook, telemetry, source provenance, auth redaction, approval, and result-budget behavior. + +The registry contract types must be shared below all surfaces rather than copied into each transport. + +Session start should be able to attach the hosted AGH MCP server for agents whose runtime supports MCP. Agents/runtimes without MCP remain supported through CLI/UDS fallback. + +The hosted MCP server is not an execution backend. It is an exposure transport. `tools/list` returns the effective session projection, and every `tools/call` re-enters `internal/tools.Registry.Call`, which resolves `native_go`, `extension_host`, or `mcp` handles and revalidates policy at dispatch time. + +The TechSpec must define how the hosted MCP server is authorized, scoped to the session/workspace, and prevented from bypassing registry dispatch. + +Live catalog deltas can be designed as a later driver capability. The MVP can refresh the hosted MCP server's tool list and expose search/list tools through the registry. + +## Rejected Alternatives + +### CLI/UDS only + +This would be simpler and still agent-operable through terminal tools, but it would not provide native tool calls for runtimes with MCP support and would leave the main "last mile" gap partially open. + +### Direct ACP/driver injection first + +This could be cleaner for a single provider, but it is less portable and would force the first implementation into provider-specific behavior. + +### HTTP/UDS only + +This would build the management API but delay the session-visible tool surface, making the foundation less useful to autonomous agents. + +## Evidence + +- `.compozy/tasks/tools-registry/analysis/analysis_claude-code.md`: MCP tools are adapted into the same local tool contract and refreshed dynamically. +- `.compozy/tasks/tools-registry/analysis/analysis_openclaw.md`: MCP is a provider backend for plugin/bundle tools. +- `.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md`: AGH already resolves MCP sidecars and has CLI/HTTP/UDS-style management surfaces elsewhere. +- `.compozy/tasks/tools-registry/analysis/analysis_claude_code_ideas.md`: AGH should avoid assuming direct LLM API control while still exposing AGH-owned tools through provider-neutral surfaces. diff --git a/.compozy/tasks/tools-registry/adrs/adr-003-runtime-registry-package-boundary.md b/.compozy/tasks/tools-registry/adrs/adr-003-runtime-registry-package-boundary.md new file mode 100644 index 000000000..a2d51a6b9 --- /dev/null +++ b/.compozy/tasks/tools-registry/adrs/adr-003-runtime-registry-package-boundary.md @@ -0,0 +1,62 @@ +# ADR-003: Runtime Registry Package Boundary + +## Status + +Accepted + +## Context + +AGH already has `internal/tools`, but it currently defines only metadata records and a list-only provider interface. The Tool Registry foundation needs runtime concerns: executable handles, provider adapters, availability, policy projection, dispatch, hooks, telemetry, result budgeting, and hosted MCP exposure. + +The registry also needs to compose with skills for agent-facing discovery operations such as `agh__tool_search`, `agh__skill_list`, and `agh__skill_view`. That creates a package-boundary question: should the runtime registry live in `internal/tools`, in a new broad `internal/catalog`, or in daemon orchestration code? + +## Decision + +`internal/tools` will own the runtime Tool Registry contracts and execution path. + +The package will evolve from metadata-only records into the home for: + +- tool descriptors and source/provenance types, +- executable handles and providers, +- availability and reason codes, +- policy projection for tool views, +- central `Registry.Call` dispatch, +- result normalization and result-budget metadata, +- provider adapters for `native_go`, `extension_host`, `mcp`, and future bridges. + +A thin `internal/catalog` facade may compose `internal/tools` and `internal/skills` for cross-domain list/search/view surfaces and for AGH-native tools such as `agh__tool_search` and `agh__skill_view`. + +The daemon composition root will wire providers and dependencies, but it will not own registry logic. + +## Consequences + +Tool execution remains in the tool domain instead of a broad catalog domain. This keeps dispatch, policy, availability, and telemetry testable without booting the daemon. + +`internal/catalog` stays small and read-oriented. It coordinates cross-domain discovery and progressive disclosure, but does not become a second execution engine. + +Existing `internal/tools.Tool` resource compatibility must be handled through clear type splits rather than by adding runtime function fields to resource records. The TechSpec must distinguish cold resource specs from runtime descriptors and handles: + +- cold `tool` resources describe desired state and provenance; +- runtime `Descriptor` values normalize policy, schema, source, backend, and risk metadata; +- runtime `Handle` values execute through Go function closures, extension subprocess calls, or MCP client calls. + +Daemon boot must register providers explicitly, following AGH's composition-root discipline. + +`internal/tools` must not import `internal/extension`, `internal/mcp`, `internal/api/*`, `internal/cli`, or `internal/daemon`. Those adapters are injected by daemon wiring behind interfaces defined in `internal/tools`. + +## Rejected Alternatives + +### New `internal/catalog` owns everything + +This would centralize skills and tools, but it would also mix skill content loading, tool dispatch, extension adapter logic, search, and policy into one large package too early. + +### Daemon-owned registry logic + +Keeping registry logic inside daemon orchestration would reduce package churn, but it would make dispatch harder to test and would couple tool semantics to boot wiring. + +## Evidence + +- `.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md`: `internal/tools` is currently metadata-only while daemon boot already owns resource projection. +- `.compozy/tasks/tools-registry/analysis/analysis_goclaw.md`: GoClaw keeps executable tool contracts and policy close to the tools package. +- `.compozy/tasks/tools-registry/analysis/synthesis.md`: recommends `internal/tools` for runtime contracts and a thin catalog facade for cross-skill/tool search. +- `internal/CLAUDE.md`: AGH favors interfaces where consumed and composition-root wiring over daemon package logic accumulation. diff --git a/.compozy/tasks/tools-registry/adrs/adr-004-mvp-native-tool-scope.md b/.compozy/tasks/tools-registry/adrs/adr-004-mvp-native-tool-scope.md new file mode 100644 index 000000000..7c56c5821 --- /dev/null +++ b/.compozy/tasks/tools-registry/adrs/adr-004-mvp-native-tool-scope.md @@ -0,0 +1,90 @@ +# ADR-004: MVP Tool Scope + +## Status + +Accepted + +## Context + +The Tool Registry MVP must prove more than metadata listing. It must demonstrate that AGH-owned tools can be discovered, authorized, invoked through the hosted MCP surface, called through CLI/HTTP/UDS contracts, routed through one dispatch pipeline, and observed. + +The smallest read-only bootstrap toolset would prove catalog mechanics, but it would not prove meaningful autonomy. AGH's product direction requires agents to manage coordination and task flows, so network and task tools should be represented in the first implementation slice. + +At the same time, skill installation is a separate supply-chain surface. It requires stricter install policy, trust tiers, approval flows, and scanner decisions. It should not be bundled into the first registry execution proof unless the supply-chain work is explicitly scoped. + +## Decision + +The MVP tool scope includes four executable groups: + +1. Built-in `native_go` tools for AGH catalog, skill, network, and bounded task operations. +2. Installed extension-host tools implemented through TypeScript or Go subprocess SDKs. +3. Remote/local MCP tools discovered from existing MCP config/resource sources and called through daemon-owned MCP clients. +4. The AGH-hosted MCP exposure proxy that presents the effective session projection for all callable groups. + +The built-in `native_go` scope includes: + +- `agh__tool_list` +- `agh__tool_search` +- `agh__tool_info` +- `agh__skill_list` +- `agh__skill_search` +- `agh__skill_view` +- `agh__network_peers` +- `agh__network_send` +- `agh__task_list` +- `agh__task_read` +- `agh__task_create` +- `agh__task_child_create` +- `agh__task_update` +- `agh__task_cancel` +- `agh__task_run_list` + +Claim/release/complete/fail/run-start task operations are excluded from this MVP because they cross claim-token, lease, spawn, and session lifecycle authority. They require a separate task execution TechSpec. + +Skill install/remove/update tools are not included in the MVP native tool scope unless a later decision explicitly adds the required supply-chain and approval work. + +The MVP must also include executable proof fixtures: + +- a TypeScript extension defining at least one read-only tool and one mutating tool through `extension.tool(...)`; +- a Go subprocess extension defining equivalent tools through the public Go extension SDK; +- an MCP test server with read-only and mutating tools, auth status coverage, and remote call-through. + +## Consequences + +The MVP must include both read-only and mutating tools. The registry must model risk, read-only/destructive/open-world flags, permission checks, and policy gates from the first implementation. + +Native, extension-host, and MCP tools must use the same registry dispatch path as catalog tools. They must not call around policy, availability, hooks, result budgeting, auth redaction, or telemetry. + +The hosted MCP server must expose only the tool subset authorized for the session. Agent-visible discovery must not advertise network/task tools to sessions that lack the required permission atoms. + +QA must include real scenario coverage for: + +- listing and searching tools, +- viewing a skill body through `agh__skill_view`, +- listing peers, +- sending a network message through `agh__network_send` with permission enforcement, +- creating/updating or otherwise exercising the bounded task tool set, +- invoking a TypeScript extension-host tool through CLI/HTTP/UDS/hosted MCP, +- invoking a Go SDK extension-host tool through CLI/HTTP/UDS/hosted MCP, +- invoking a remote MCP-backed tool through CLI/HTTP/UDS/hosted MCP, +- proving unauthorized sessions cannot see or call mutating/destructive tools. + +## Rejected Alternatives + +### Read-only bootstrap only + +This would be safer and simpler, but it would leave the registry unproven for AGH's coordination and autonomy use cases. + +### Bootstrap plus skill install + +This would improve agent self-service, but it introduces supply-chain risk that belongs in a dedicated policy/scanning/install decision. + +### Foundation only + +This would create the architecture without proving the agent-first experience that motivated the Tool Registry work. + +## Evidence + +- `.compozy/tasks/tools-registry/analysis/synthesis.md`: recommends a small bootstrap set and identifies network/task tools as later groups. +- `.compozy/tasks/autonomous/analysis/analysis_skills_tools_registry.md`: prior gaps identify network and task tools as strategically important agent-callable surfaces. +- `.compozy/tasks/hermes/analysis/analysis_tools_security.md`: mutating and open-world tools require stronger permission and security gates before broad exposure. diff --git a/.compozy/tasks/tools-registry/adrs/adr-005-acp-approval-policy-integration.md b/.compozy/tasks/tools-registry/adrs/adr-005-acp-approval-policy-integration.md new file mode 100644 index 000000000..6e6184dd4 --- /dev/null +++ b/.compozy/tasks/tools-registry/adrs/adr-005-acp-approval-policy-integration.md @@ -0,0 +1,103 @@ +# ADR-005: ACP Approval Policy Integration + +## Status + +Accepted + +## Context + +AGH already has a system-level ACP tool approval policy exposed in settings and enforced by the ACP tool host: + +- `deny-all` +- `approve-reads` +- `approve-all` + +The Tool Registry will add per-tool metadata and policy, including read-only, destructive, open-world, source, toolset, session permission atoms, and extension/MCP grants. If this registry policy is designed as a parallel approval system, AGH can produce contradictory states such as: + +- system policy says `deny-all`, but a tool-level policy says allow; +- system policy says `approve-reads`, but a mutating tool claims read-only; +- system policy says `approve-all`, but a session/agent intentionally narrows permissions; +- hosted MCP exposes a tool the ACP host would later block. + +The registry policy must integrate with the existing ACP policy rather than bypass it. + +## Decision + +The ACP `permissions.mode` policy is the system approval ceiling for session-visible tool execution. + +Tool Registry policy operates below that ceiling as a more granular filter. It can narrow, classify, require approval, or deny a tool, but it cannot silently grant more authority than the effective system/session ACP policy allows. + +The effective decision order is: + +1. Resolve the system/session ACP approval mode. +2. Resolve agent/session lineage tool permission atoms. +3. Resolve registry visibility and allow/deny/toolset policy. +4. Resolve source/risk defaults for built-in, extension, MCP, and dynamic tools. +5. Resolve tool descriptor risk flags: read-only, destructive, open-world, requires interaction. +6. Run availability checks. +7. Run pre-call hooks. +8. Dispatch only if the combined decision is allowed or explicitly approved. + +`approve-all` is permissive but not a bypass of explicit denies. It removes automatic ACP prompting for allowed tools, but registry deny rules, unavailable state, session lineage restrictions, hooks, and source grants still apply. + +`approve-reads` auto-approves only tools classified as read-only by the registry and allowed by session policy. Extension and MCP read-only tools also require an explicit trusted source or per-tool/toolset/source/agent grant because their read-only claim is externally authored. Mutating, destructive, open-world, network-send, task-write, extension-host write, and MCP write tools require explicit allow by `ToolID`, toolset, or source policy plus approval when the effective approval mode requires it. + +`deny-all` denies by default. The registry may still list operator-visible tools with reasons, but session-visible execution requires explicit approval/allowance through the existing ACP approval path or an equivalent local approval surface. + +Agent frontmatter and session lineage can lower permissions relative to system policy. They cannot raise permissions above the system approval ceiling. + +The MVP permits mutating, open-world, and destructive extension-host and MCP tools, but only when all gates pass: + +1. The descriptor classifies `read_only`, `destructive`, `open_world`, and `requires_interaction` correctly. +2. The source tier is allowed for the effective workspace/session. +3. The concrete `ToolID` or expanded toolset is allowed by registry/session policy. +4. ACP `permissions.mode` does not deny the call. +5. The approval bridge succeeds within the configured timeout when approval is required. +6. The backend is available, healthy, authorized, and non-conflicted. +7. Hooks do not deny or narrow the call. +8. Dispatch revalidates all gates immediately before execution. + +## Consequences + +The registry must compute an `EffectiveToolDecision` instead of storing a single boolean. The decision should include: + +- system approval mode, +- session/agent policy result, +- registry policy result, +- source/risk default result, +- availability result, +- hook result, +- final visibility decision, +- final execution decision, +- user/operator-facing reason codes. + +The hosted AGH MCP server must expose only tools allowed by the effective visibility decision for that session. Dispatch must still revalidate the effective execution decision. + +Tool descriptors must classify read-only vs mutating accurately. A mutating, destructive, or open-world tool mislabeled as read-only is a correctness and security bug. + +The web settings UI text remains true: `approve-all` auto-approves tool calls, but agents and registry policy can lower permissions. The TechSpec should clarify that "auto-approved" does not mean "all registered tools are visible and executable regardless of registry policy." + +## Rejected Alternatives + +### Registry policy bypasses ACP policy + +This would create inconsistent behavior between ACP-native tools, AGH-hosted MCP tools, and CLI/UDS calls. It would also make the existing settings UI misleading. + +### Registry policy replaces ACP policy + +This would require redesigning existing ACP permission handling and settings before the Tool Registry can ship. The MVP should integrate with the existing model and extend it. + +### Tool-level policy alone controls execution + +Per-tool policy is necessary but insufficient. Session lineage, system approval mode, hooks, availability, and source grants all affect whether a call is safe and authorized. + +Daemon-mediated approval waits are bounded by `[tools.policy].approval_timeout_seconds`. Timeout and caller/proxy disconnect fail closed with deterministic reason codes; no registry tool call may wait indefinitely on an operator or ACP permission response. + +## Evidence + +- `internal/acp/permission.go:75-132`: ACP policy defaults, path validation, and decisions for `approve-all`, `approve-reads`, and `deny-all`. +- `internal/acp/tool_host.go:64-83`: local tool host is constructed with an ACP permission mode. +- `web/src/routes/_app/settings/general.tsx:307-315`: settings UI describes the three tool approval policies. +- `.compozy/tasks/tools-registry/analysis/analysis_claude-code.md`: permission should be an ordered pipeline rather than a tool-local boolean. +- `.compozy/tasks/tools-registry/analysis/analysis_goclaw.md`: runtime grants should be rechecked at execution time. +- `.compozy/tasks/tools-registry/analysis/synthesis.md`: dispatch must recheck availability and authorization and use one central pipeline. diff --git a/.compozy/tasks/tools-registry/adrs/adr-006-tool-visibility-by-surface.md b/.compozy/tasks/tools-registry/adrs/adr-006-tool-visibility-by-surface.md new file mode 100644 index 000000000..b9f20bd15 --- /dev/null +++ b/.compozy/tasks/tools-registry/adrs/adr-006-tool-visibility-by-surface.md @@ -0,0 +1,73 @@ +# ADR-006: Tool Visibility by Surface + +## Status + +Accepted + +## Context + +The Tool Registry will track more states than "exists" or "does not exist." A tool may be registered but disabled, unauthorized, unavailable, unhealthy, missing configuration, missing an MCP backend, blocked by session policy, denied by ACP approval mode, or conflicted by name. + +Different consumers need different views: + +- operators need diagnostics and reason codes to fix configuration and extension problems; +- agents need a low-noise callable surface that does not invite impossible calls; +- dispatch still needs to revalidate because discovery visibility is not a security boundary. + +## Decision + +Operator surfaces show unavailable and unauthorized tools with reason codes. Session-visible and model-visible surfaces expose only tools that are visible and callable for the effective session context. + +Operator surfaces include: + +- CLI, +- HTTP API, +- Web UI, +- privileged UDS/operator views. + +Session-visible/model-visible surfaces include: + +- the AGH-hosted MCP tool list exposed to an agent session, +- any future direct ACP/driver tool injection, +- non-privileged session-scoped UDS catalog views. + +The registry must compute both: + +- `OperatorToolView`: includes all registered tools plus state, reason codes, source/provenance, policy diagnostics, conflict diagnostics, and availability details. +- `SessionToolView`: includes only tools that pass effective visibility and execution preconditions for that session. + +Dispatch must revalidate the full effective execution decision even when a tool was present in `SessionToolView`. + +## Consequences + +Agents are not shown tools that they cannot call in the current session. This avoids prompt/tool-call noise and reduces attempts to invoke unavailable tools. + +Operators can still debug why a tool is not appearing to an agent, including whether the cause is ACP approval mode, session lineage, agent policy, source grants, extension health, MCP health, missing config, or a conflict. + +CLI/HTTP endpoints need an explicit scope or view mode. For example: + +- operator default: include unavailable tools and reasons; +- session-scoped query: return the same filtered view that the hosted MCP server would expose. + +The hosted MCP server must use `SessionToolView`, not raw registry contents. + +## Rejected Alternatives + +### Everyone sees unavailable tools + +This improves agent planning transparency but increases noise and risks inducing models to call tools that the daemon will reject. + +### Hide unavailable tools everywhere + +This is clean for fail-closed execution, but it makes operator troubleshooting poor and hides extension/MCP/config problems. + +### Configurable per surface in MVP + +This offers maximum flexibility but creates a larger behavior matrix before the registry foundation is stable. + +## Evidence + +- `.compozy/tasks/tools-registry/analysis/analysis_hermes.md`: availability filtering is useful for model-visible definitions. +- `.compozy/tasks/tools-registry/analysis/analysis_claude-code.md`: request-time tool pools are context-specific and filtered before model exposure. +- `.compozy/tasks/tools-registry/analysis/analysis_openclaw.md`: lifecycle and policy states need diagnostics while agent projections should be policy-filtered. +- `.compozy/tasks/tools-registry/analysis/synthesis.md`: discovery can hide unavailable/unauthorized tools from agents while operator surfaces show reasons, but dispatch still rechecks. diff --git a/.compozy/tasks/tools-registry/adrs/adr-007-canonical-tool-id-format.md b/.compozy/tasks/tools-registry/adrs/adr-007-canonical-tool-id-format.md new file mode 100644 index 000000000..e7426d793 --- /dev/null +++ b/.compozy/tasks/tools-registry/adrs/adr-007-canonical-tool-id-format.md @@ -0,0 +1,123 @@ +# ADR-007: Canonical Tool ID Format + +## Status + +Accepted. + +## Context + +AGH needs one stable tool identifier that can be used across the runtime registry, policy rules, CLI, HTTP, UDS, telemetry, hooks, audit logs, and the AGH-hosted MCP surface. + +Earlier options considered dotted internal IDs such as `agh.skill.view` plus a separate MCP-safe wire alias such as `agh_skill_view`. ACP/MCP compatibility research showed this would work technically, but it creates two strong names for the same tool and increases the chance of policy, audit, telemetry, or dispatch code using the wrong identity. + +The identifier also needs to remain compatible with provider and host constraints. MCP allows dots in tool names, but common model tool/function APIs are stricter and accept letters, digits, underscores, and hyphens with a 64-character limit. AGH should choose a lowest-common-denominator format for callable tool IDs rather than rely on a more permissive protocol layer. + +Claude Code uses the `mcp__server__tool` convention for MCP tools. This gives an explicit namespace boundary while staying inside provider-safe characters. + +## Decision + +AGH will use one canonical public `ToolID` format across every surface: + +```text +( "__" )* +``` + +Each segment must match: + +```text +[a-z][a-z0-9_]* +``` + +Global constraints: + +- maximum length: 64 characters; +- lowercase ASCII only; +- digits allowed after the first character of each segment; +- `_` allowed inside a segment; +- `__` is reserved exclusively as a segment separator; +- no dot; +- no hyphen; +- no uppercase; +- no empty segment; +- no leading or trailing `_` inside a segment if it would create an empty separator ambiguity. + +Examples: + +```text +agh__tool_list +agh__tool_search +agh__tool_info +agh__skill_list +agh__skill_view +agh__network_peers +agh__network_send +agh__task_list +agh__task_read +ext__linear__search +ext__linear__create_issue +mcp__github__create_issue +mcp__context7__query_docs +``` + +`ToolID` is the identity used by: + +- registry descriptors; +- provider registration; +- policy allow/deny rules; +- toolsets; +- CLI commands; +- HTTP and UDS APIs; +- hooks; +- telemetry and audit logs; +- hosted MCP `Tool.name`; +- dispatch requests. + +AGH will not use a second wire alias for the same tool in the MVP. Display titles are UI-only and do not participate in policy, authorization, conflict resolution, or dispatch. + +Source/provenance remains structured metadata, not an alternate identity: + +```json +{ + "id": "mcp__github__create_issue", + "source": { + "kind": "mcp", + "serverName": "github", + "rawToolName": "create_issue" + } +} +``` + +AGH may show a shorter display title such as `Create Issue`, but the canonical ID remains `mcp__github__create_issue`. + +## Collision Rules + +Registration and session projection must fail closed: + +- If two providers produce the same `ToolID`, the later registration is rejected or marked `conflicted`. +- If sanitizing an external MCP/server/extension tool name would collide with an existing `ToolID`, the candidate tool is marked `conflicted` and is not exposed to model-visible surfaces. +- AGH must not silently truncate, overwrite, or choose "last writer wins". +- Operator surfaces may show conflicted tools with reason codes and provenance. +- Session/model-visible surfaces expose only non-conflicted callable tools. + +## Consequences + +Positive: + +- One identifier works across registry, wire, policy, telemetry, and dispatch. +- No dotted-to-wire alias mapping is needed in the MVP. +- Namespace boundaries remain visible through reserved `__`. +- The format is compatible with stricter provider tool-name constraints. +- Policy patterns stay simple, for example `agh__skill_*` and `mcp__github__*`. + +Tradeoffs: + +- Dotted names such as `agh.skill.view` are more visually familiar for namespace trees, but they are not provider-safe enough to use as callable IDs. +- `__` is less aesthetically clean than dots, but it avoids dual identity. +- Raw external names must be preserved in `SourceRef` for exact provenance and debugging. + +## Follow-Ups + +- The TechSpec must update all tool examples to this format. +- The registry validator must enforce the grammar. +- Extension and MCP adapters must sanitize external names deterministically and report conflicts. +- Policy matching must treat `__` as an identity segment separator and `_` as a normal segment character. diff --git a/.compozy/tasks/tools-registry/adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md b/.compozy/tasks/tools-registry/adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md new file mode 100644 index 000000000..5e9d66a07 --- /dev/null +++ b/.compozy/tasks/tools-registry/adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md @@ -0,0 +1,75 @@ +# ADR-008: Manifest-Authoritative Extension Tool Descriptors + +## Status + +Accepted + +## Date + +2026-04-28 + +## Context + +Extension tools need both install-time auditability and runtime ergonomics. If descriptors are runtime-only, AGH cannot inspect policy, risk, schemas, or source grants before launching extension code. If descriptors are manifest-only, SDK helpers cannot verify that the running extension actually implements the declared handlers. + +AGH already treats `extension.toml` as the install-time contract for resources, capabilities, Host API actions, subprocess launch, security grants, and bridge metadata. The Tool Registry should extend that model rather than create a second descriptor authority. + +## Decision + +Extension tool descriptors are manifest-authoritative with runtime reconciliation. + +`extension.toml` declares every extension-host tool under `resources.tools`. Each declaration includes the canonical `id`, display metadata, input/output schemas, risk flags, toolsets, result budget, and handler binding. + +The extension runtime may also expose descriptors through `provide_tools` or SDK metadata generated by `extension.tool(...)`, but those runtime descriptors are only a reconciliation signal. The daemon marks an extension-host tool executable only when the runtime descriptor matches the manifest-authoritative descriptor for identity, schema digests, risk flags, and handler binding. + +Schema digests are lowercase hex `sha256` over RFC 8785 JCS-canonicalized JSON Schema subtrees. The digest excludes surrounding manifest metadata and treats `$ref` values literally. TypeScript SDK, Go SDK, and daemon validation must share digest byte-vector fixtures. + +Runtime-only descriptors are rejected in the MVP. Manifest descriptors without a matching runtime handler remain operator-visible but unavailable with a deterministic reason code. + +## Alternatives Considered + +### Runtime-authoritative descriptors + +- **Description**: The manifest only declares `tool.provider`; the extension reports all tools after initialize. +- **Pros**: Flexible and ergonomic for SDK authors. +- **Cons**: Requires executing extension code before AGH knows tool risk, schemas, or source policy. +- **Why rejected**: Install-time auditability is required for policy, docs, and operator diagnostics. + +### Manifest-only descriptors + +- **Description**: The manifest is the only descriptor source; runtime implementation is checked only when a call happens. +- **Pros**: Simple policy model. +- **Cons**: Handler mismatch is discovered too late, and SDK helper bugs are harder to detect. +- **Why rejected**: Runtime reconciliation catches drift before session exposure. + +## Consequences + +### Positive + +- Operators and agents can inspect extension tool risk before extension code runs. +- SDK helpers can improve authoring ergonomics without becoming the source of truth. +- Tool install, enable, disable, conflict, and policy diagnostics remain deterministic. + +### Negative + +- Extension authors must keep manifest descriptors and SDK registration in sync. +- The daemon must compare runtime descriptors against manifest descriptors and report structured mismatch reasons. + +### Risks + +- Descriptor drift could make valid tools unavailable. Mitigation: SDK templates generate manifest snippets and tests assert manifest/runtime parity. +- Manifest fields could become verbose. Mitigation: SDK scaffolding and docs should generate the boilerplate. + +## Implementation Notes + +- Add `id`, `output_schema`, `risk`, `destructive`, `open_world`, `requires_interaction`, `max_result_bytes`, `toolsets`, `tags`, and `handler` fields to extension `ToolConfig`. +- Add `CapabilityToolProvider = "tool.provider"`, `ExtensionServiceMethodProvideTools = "provide_tools"`, and `ExtensionServiceMethodToolsCall = "tools/call"` to the subprocess protocol and capability-method map. +- Add deterministic mismatch reason codes such as `runtime_descriptor_missing`, `runtime_descriptor_mismatch`, and `handler_missing`. +- The TypeScript and Go SDKs must include tests that compare generated/runtime descriptors and JCS schema digests to manifest fixtures. + +## References + +- `internal/extension/manifest.go` +- `internal/extension/resource_publication.go` +- `sdk/typescript/src/extension.ts` +- `.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md` diff --git a/.compozy/tasks/tools-registry/adrs/adr-009-public-go-extension-tool-sdk.md b/.compozy/tasks/tools-registry/adrs/adr-009-public-go-extension-tool-sdk.md new file mode 100644 index 000000000..e0a142898 --- /dev/null +++ b/.compozy/tasks/tools-registry/adrs/adr-009-public-go-extension-tool-sdk.md @@ -0,0 +1,78 @@ +# ADR-009: Public Go Extension Tool SDK + +## Status + +Accepted + +## Date + +2026-04-28 + +## Context + +The Tool Registry must support extension authors defining tools using Go functions or TypeScript. AGH already has a TypeScript extension SDK, but Go extension authoring currently relies on direct protocol implementation or internal packages such as `internal/bridgesdk`, which are not public extension-author surfaces. + +Built-in daemon tools can use in-process Go functions, but third-party Go extension tools must remain out-of-process for safety and lifecycle isolation. + +## Decision + +The Tool Registry MVP includes a public Go subprocess extension SDK for tool providers. + +The SDK exposes an authoring API equivalent to TypeScript `extension.tool(descriptor, handler)`, but implemented as a Go subprocess runtime. It handles initialize/shutdown, health checks, `tool.provider` negotiation, `tools/call` dispatch, Host API client calls, typed errors, descriptor export for runtime reconciliation, and test harness support. + +In-process `native_go` remains limited to first-party/built-in tools compiled into the daemon and wired by `internal/daemon`. Third-party Go extension tools use the public Go SDK and execute as managed subprocesses. + +## Alternatives Considered + +### Protocol examples only + +- **Description**: Ship Go examples that implement JSON-RPC manually, without a reusable SDK. +- **Pros**: Smaller implementation. +- **Cons**: Weak developer experience and inconsistent correctness across extension authors. +- **Why rejected**: The user explicitly requested Go function authoring, not raw protocol plumbing. + +### Go `plugin` or shared-library handlers + +- **Description**: Load Go functions into the daemon process dynamically. +- **Pros**: Natural function-call authoring. +- **Cons**: Unsafe daemon process boundary, platform friction, ABI issues, and lifecycle complexity. +- **Why rejected**: Third-party code must stay out-of-process. + +### TypeScript-only extension authoring + +- **Description**: Support TypeScript tool handlers first and defer Go SDK work. +- **Pros**: Reuses existing SDK. +- **Cons**: Does not satisfy Go function authoring requirements. +- **Why rejected**: Go function authoring is in MVP scope. + +## Consequences + +### Positive + +- Go extension authors get a real function-based authoring path without compromising daemon isolation. +- TypeScript and Go SDKs share the same runtime protocol and test expectations. +- E2E tests can prove both language paths through the same registry dispatch pipeline. + +### Negative + +- MVP scope includes a new SDK package, scaffolding, tests, docs, and generated contract parity. +- Public SDK design must be stable enough for extension authors. + +### Risks + +- SDK and daemon protocol can drift. Mitigation: generate shared contract types and include cross-SDK conformance tests. +- Go SDK may accidentally import `internal/*` packages. Mitigation: place it under a public package path and test from an external-package perspective. + +## Implementation Notes + +- Add a public Go SDK package under `sdk/go`, mirroring `sdk/typescript`. +- Add a create-extension template for Go tool provider extensions. +- Add a Go SDK harness that can load a tool extension, mock Host API calls, and call `tools/call`. +- Do not use the SDK for daemon built-ins; built-ins use `native_go` providers. + +## References + +- `internal/subprocess/handshake.go` +- `internal/extension/manager.go` +- `sdk/typescript/src/extension.ts` +- `internal/bridgesdk/runtime.go` diff --git a/.compozy/tasks/tools-registry/adrs/adr-010-remote-mcp-call-through.md b/.compozy/tasks/tools-registry/adrs/adr-010-remote-mcp-call-through.md new file mode 100644 index 000000000..1e221eb60 --- /dev/null +++ b/.compozy/tasks/tools-registry/adrs/adr-010-remote-mcp-call-through.md @@ -0,0 +1,83 @@ +# ADR-010: Remote MCP Call-Through In MVP + +## Status + +Accepted + +## Date + +2026-04-28 + +## Context + +AGH already models MCP server configuration and remote auth. The previous Tool Registry spec treated MCP-backed tools as descriptors with availability diagnostics only. The revised MVP must make MCP tools executable through the same registry dispatch path as built-in and extension-host tools. + +Remote MCP call-through must not duplicate AGH's MCP auth model or leak OAuth tokens. ACP currently converts MCP servers to stdio-only session entries, so remote MCP execution should happen inside the daemon-owned registry adapter rather than by passing remote MCP config directly through ACP. + +## Decision + +Remote/local MCP tools are executable in the Tool Registry MVP. + +The daemon owns MCP client adapters that discover/list/call MCP tools from validated MCP configuration and resource sources. The adapters consume existing MCP config, transport, auth metadata, and redacted auth status from `internal/mcp/auth`. Token material remains owned by `internal/mcp/auth` and its `TokenStore`; registry descriptors and results never copy tokens. + +The call-through contract is an `MCPCallExecutor` implemented inside `internal/mcp`. `internal/tools` may depend on that interface, but it must not import `internal/mcp/auth`, open the token store, receive raw bearer strings, or construct Authorization headers. The executor resolves bearer material internally for each outbound request and returns only normalized results plus redacted errors. + +Hosted MCP remains AGH's session exposure transport. When an agent calls `mcp__...` through the hosted MCP server, the call re-enters `internal/tools.Registry.Call`; the registry then invokes the daemon-owned MCP client adapter after policy, auth, availability, hook, schema, and result-budget checks. + +## Alternatives Considered + +### Descriptor-only MCP tools + +- **Description**: Show MCP tool descriptors and auth diagnostics, but do not call them in MVP. +- **Pros**: Smaller security and transport surface. +- **Cons**: Leaves MCP as a second-class source and fails to prove external tool execution. +- **Why rejected**: The accepted MVP scope includes remote MCP call-through. + +### Pass remote MCP servers directly to ACP sessions + +- **Description**: Let providers connect directly to remote MCP servers. +- **Pros**: Less daemon adapter work. +- **Cons**: ACP conversion is currently stdio-only, policy/audit is provider-dependent, and AGH cannot centrally enforce result redaction or source grants. +- **Why rejected**: AGH needs one daemon-owned dispatch path. + +### Duplicate MCP auth in the registry + +- **Description**: Store MCP tokens or OAuth state with tool descriptors. +- **Pros**: Simple adapter lookup. +- **Cons**: Duplicates credential ownership and increases leak risk. +- **Why rejected**: `internal/mcp/auth` remains the sole credential owner. + +## Consequences + +### Positive + +- MCP tools become agent-callable through the same policy, visibility, hook, telemetry, and hosted MCP surfaces as built-ins and extension-host tools. +- Existing MCP auth and settings diagnostics remain authoritative. +- AGH can enforce a consistent `ToolID` and collision policy for MCP sources. + +### Negative + +- MVP must implement daemon-side MCP discovery/call clients, transport handling, timeout behavior, auth refresh/error mapping, and redaction tests. +- MCP adapters add more failure states to availability and session projection. + +### Risks + +- OAuth tokens could leak through registry output. Mitigation: registry consumes only redacted status and uses narrow `internal/mcp/auth` execution interfaces for bearer material. +- Remote MCP call latency could block hosted MCP responses. Mitigation: explicit timeouts, cancellation, and structured backend failure errors. +- External MCP tool names could collide after sanitization. Mitigation: fail-closed conflict handling and operator-visible diagnostics. + +## Implementation Notes + +- Reuse `aghconfig.MCPServer`, `internal/config/mcpjson.go`, `internal/config/mcp_resource.go`, skill MCP resolution, extension MCP resources, and `internal/mcp/auth`. +- Add `MCPCallExecutor` tests proving bearer headers are injected only inside `internal/mcp` and never cross `internal/tools` logs, errors, events, or results. +- Fix resource cloning paths that currently drop `Transport`, `URL`, or `Auth` before relying on remote MCP diagnostics/calls. +- Add MCP adapter tests for stdio, HTTP, SSE, auth-required, expired/invalid auth, collision, timeout, cancellation, and redaction. +- Hosted MCP never receives remote OAuth tokens; it receives only AGH-hosted session projection entries. + +## References + +- `internal/config/provider.go` +- `internal/mcp/auth` +- `internal/store/globaldb/global_db_mcp_auth.go` +- `internal/acp/client.go` +- `.compozy/tasks/tools-registry/analysis/analysis_acp_tool_registry_compatibility.md` diff --git a/.compozy/tasks/tools-registry/analysis/analysis_acp_tool_registry_compatibility.md b/.compozy/tasks/tools-registry/analysis/analysis_acp_tool_registry_compatibility.md new file mode 100644 index 000000000..b54be6bf7 --- /dev/null +++ b/.compozy/tasks/tools-registry/analysis/analysis_acp_tool_registry_compatibility.md @@ -0,0 +1,276 @@ +# Analysis: ACP Tool Registry Compatibility + +## Scope + +This analysis answers whether ACP imposes a tool registry pattern that AGH must follow, and which `.resources/*` projects materially use ACP in ways that affect the Tool Registry TechSpec. The research combines official ACP/MCP documentation with read-only subagent passes over `.resources/rayclaw`, `.resources/harnss`, `.resources/acpx`, `.resources/openclaw`, `.resources/opencode`, and an inventory pass across every top-level `.resources/*` project. + +## Executive Conclusion + +ACP does not define a durable, programmatic tool registry for callable tools. ACP defines session lifecycle, prompt streaming, client authority callbacks, permission requests, MCP server bootstrap fields, and observable tool-call events. Those tool-call events carry `toolCallId`, `title`, `kind`, `status`, locations, raw input, raw output, and content, but they do not carry a stable `name` field equivalent to MCP `Tool.name`. + +Therefore, AGH should not model its Tool Registry as an ACP registry, and should not use ACP `title` as a durable policy or collision key. + +For session-visible AGH tools, the strongest compatibility path remains the accepted MVP path: an AGH-hosted MCP server backed by the daemon Tool Registry. MCP supplies the externally callable `Tool.name`; ACP supplies the way ACP-compatible agents receive `mcpServers`, report tool execution, and request permission. + +The practical design correction is: + +- AGH should use one canonical provider-safe `ToolID` everywhere, using reserved double-underscore namespace separators, for example `agh__skill_view`, `mcp__github__create_issue`, or `ext__linear__search`. +- The same `ToolID` should be the hosted MCP `Tool.name`; AGH should not introduce a second wire alias in the MVP. +- ACP `title` is display-only and event-only. +- ACP `ToolKind` is a risk/display hint, not registry identity. +- `permissions.mode` remains the session approval ceiling; registry policy remains the granular layer below it. + +## Official Protocol Constraints + +### ACP + +Official ACP schema evidence: + +- ACP `session/new`, `session/load`, and `session/resume` include `mcpServers`; agents are expected to connect to those MCP servers for the session. Source: . +- ACP `ToolCall` is event/reporting data with `toolCallId`, `title`, `kind`, `status`, `rawInput`, `rawOutput`, `locations`, and `content`. The schema describes `title` as human-readable and `toolCallId` as unique within a session. It does not expose a durable callable `name` field. Source: . +- ACP `ToolKind` values are coarse categories such as `read`, `edit`, `delete`, `move`, `search`, `execute`, `think`, `fetch`, `switch_mode`, and `other`. The schema says these help clients pick icons and display progress, which is weaker than registry identity. Source: . +- ACP `session/request_permission` carries a `toolCall` object plus permission options. It is a permission bridge for a concrete tool call, not a registry discovery API. Source: . +- The official "ACP Registry" is an agent registry: a catalog of ACP-compatible agents and their install/run metadata, not a callable tool registry. Source: . + +### MCP + +Official MCP schema evidence: + +- MCP `tools/list` returns `Tool[]`. +- MCP `Tool` has `name`, optional `title`, optional `description`, `inputSchema`, optional `outputSchema`, annotations, execution metadata, and `_meta`. +- MCP describes `name` as intended for programmatic/logical use and `title` as intended for UI/end-user contexts. Source: . + +Implication: AGH should treat MCP `Tool.name` as the session wire name when exposing AGH registry tools through hosted MCP. ACP does not replace that name. + +## ACP Usage Inventory Across `.resources/*` + +| Project | ACP usage | Tool registry relevance | +|---|---|---| +| `.resources/acpx` | ACP client/orchestrator and conformance tooling. | Has an agent/adapter registry, not a callable tool registry. Passes `mcpServers`; models tool calls by `toolCallId`, title, kind, status, raw input/output. | +| `.resources/collaborator-ai` | ACP client/orchestrator. | No registry found. Uses ACP tool update titles for display. | +| `.resources/goclaw` | ACP client/orchestrator. | No formal registry. Uses method switches and permission heuristics for ACP callbacks. | +| `.resources/harnss` | ACP client/orchestrator with Electron bridge. | Has ACP agent registry and UI rendering adapters, not a tool registry. Converts configured MCP servers to ACP `McpServer[]`. | +| `.resources/hermes` | ACP server/agent implementation plus ACP client shim. | Relevant: registers ACP-provided MCP servers into Hermes agent state and valid tool names; maps Hermes tools to ACP `ToolKind` and titles. | +| `.resources/multica` | ACP client/orchestrator. | No formal registry. Parses titles such as `terminal:` and `read:` for UI normalization. | +| `.resources/openclaw` | ACP server/client/runtime bridge. | Has internal tool catalog and plugin/MCP surfaces, but main ACP bridge does not expose an ACP tool registry and rejects per-session `mcpServers`. | +| `.resources/opencode` | Native ACP server. | Has a real internal `ToolRegistry`, but ACP does not expose it as a registry API. Accepts ACP `mcpServers` and converts them into internal MCP config. | +| `.resources/paperclip` | Docs/reference only. | Conceptual ACP references only. | +| `.resources/rayclaw` | ACP client/orchestrator. | Exposes ACP control as local `acp_*` tools; ACP-reported tool calls are telemetry, not registry entries. | +| `.resources/sandbox-agent` | ACP adapter/proxy/client package. | Agent launch registry only; no ACP tool registry found. | +| `.resources/t3code` | ACP schema/client/runtime package. | Schema and runtime tracking for ACP tool events; no broad tool registry. | + +No meaningful ACP evidence was found in `.resources/cc-posts`, `.resources/chat`, `.resources/openfang`, `.resources/pi`, or `.resources/symphony`. `.resources/claude-code` had an `ACP` false positive inside an embedded/base64-like string, not implementation evidence. + +## Deep Dives + +### RayClaw + +RayClaw is an ACP client/orchestrator. It spawns configured ACP agents, runs JSON-RPC lifecycle calls (`initialize`, `session/new`, `session/prompt`, `session/end`), and exposes ACP orchestration to RayClaw's primary LLM through local wrapper tools named `acp_coding`, `acp_new_session`, `acp_prompt`, `acp_end_session`, `acp_list_sessions`, `acp_submit_job`, and `acp_job_status`. + +Those `acp_*` names are RayClaw's local tool registry convention, not ACP. RayClaw's ACP tool-call handling treats incoming `session/update` tool calls as observations and records them by title/raw input. It does not dispatch those reported ACP tool calls through RayClaw's local registry. + +Important evidence: + +- `.resources/rayclaw/src/acp.rs:436-453` initializes ACP with client capabilities, not a host tool registry. +- `.resources/rayclaw/src/acp.rs:752-843` handles `session/request_permission`. +- `.resources/rayclaw/src/acp.rs:846-984` parses ACP tool-call progress. +- `.resources/rayclaw/src/acp.rs:1521-1531` creates sessions with `mcpServers: []`. +- `.resources/rayclaw/src/tools/acp.rs:16-40` registers the local `acp_*` wrapper tools. +- `.resources/rayclaw/tests/acp_integration.rs:128-240` enforces local tool-name uniqueness, allowed characters, length, and collision checks. + +Transferable points: + +- Separate ACP orchestration tools from normal runtime tools. +- Treat ACP tool calls as child-agent telemetry unless AGH deliberately bridges them. +- Do not prefer `allow_always` as an automatic approval default the way RayClaw does under `auto_approve`; AGH should keep durable grants explicit. +- Reject or disambiguate sanitized name collisions rather than truncating. + +### Harnss + +Harnss is an ACP client/orchestrator with an Electron bridge and React UI. It converts renderer MCP server configs into ACP SDK `McpServer[]`, including stdio and remote transports, then passes them into `newSession` and `loadSession`. It also supports live reload through ACP `loadSession` when available. + +Harnss does not consume or expose an ACP tool registry. Its "registry" evidence is an ACP agent registry and a UI-side static MCP renderer table. ACP tool calls are converted into UI messages keyed by `toolCallId`, using title/kind/raw input/output normalization. + +Important evidence: + +- `.resources/harnss/electron/src/ipc/acp-sessions.ts:193-215` converts MCP configs to ACP `McpServer[]`. +- `.resources/harnss/electron/src/ipc/acp-sessions.ts:365-483` wires ACP connection callbacks, event forwarding, and permission bridge. +- `.resources/harnss/electron/src/ipc/acp-sessions.ts:521-546` starts ACP sessions with MCP servers. +- `.resources/harnss/electron/src/ipc/acp-sessions.ts:793-828` reloads sessions with MCP servers. +- `.resources/harnss/src/hooks/useACP.ts:194-337` converts ACP tool events into UI messages. +- `.resources/harnss/src/hooks/useACP.ts:413-473` handles ACP permission requests. +- `.resources/harnss/src/lib/engine/acp-adapter.ts:267-358` derives display/tool renderer names from ACP title/kind/raw input. +- `.resources/harnss/src/components/McpToolContent.tsx:83-138` supports SDK-style `mcp__Server__tool` names and ACP-style `Tool: Server/tool` titles in UI rendering. + +Transferable points: + +- Normalize ACP event data at the boundary into AGH's canonical tool-call observation model. +- Preserve raw ACP permission options; do not collapse manual allow/deny into "first allow" or "first reject" if the protocol provides multiple option IDs. +- Keep rendering names separate from policy names. +- Pass MCP servers as session bootstrap/load data when AGH chooses per-session MCP support. + +### ACPX + +ACPX is a headless ACP client/orchestrator. Its registry is an agent/adapter registry mapping names like `codex`, `claude`, `gemini`, and others to launch commands. This is not a callable tool registry. + +ACPX implements client authority callbacks such as filesystem read/write, terminal create/output/wait/kill/release, and `session/request_permission`. It parses `mcpServers` from config and passes them through to `session/new` and `session/load`. It does not discover or normalize MCP tools into a registry. + +Important evidence: + +- `.resources/acpx/src/agent-registry.ts:38-107` maps adapter names to commands. +- `.resources/acpx/src/mcp-servers.ts:100-177` parses MCP server configs. +- `.resources/acpx/src/acp/client.ts:475-538` wires ACP client callbacks and initialize capabilities. +- `.resources/acpx/src/acp/client.ts:638-693` passes `mcpServers` to `session/new` and `session/load`. +- `.resources/acpx/src/permissions.ts:98-152` implements coarse permission decisions. +- `.resources/acpx/src/session/conversation-model.ts:310-353` persists tool events keyed by tool call ID. +- `.resources/acpx/conformance/cases/021-prompt-post-success-drain.json:1-50` shows late tool updates can arrive after prompt success. + +Transferable points: + +- Keep agent/provider registries separate from Tool Registry. +- ACP compatibility includes filesystem and terminal callbacks where advertised; those callbacks must share AGH's registry policy engine or be routed through equivalent approval gates. +- Preserve distinct identities: AGH record IDs, ACP session IDs, provider-native session IDs, tool call IDs, and registry tool IDs. +- Add a protocol-aware drain/settle window for late `tool_call_update` events. + +### OpenClaw + +OpenClaw's main `openclaw acp` bridge is a Gateway-backed ACP server. It forwards prompts to the Gateway and translates Gateway events into ACP session updates. The main bridge advertises MCP HTTP/SSE support as disabled and rejects non-empty per-session `mcpServers`; its docs say MCP should be configured at the Gateway/agent layer. + +OpenClaw has rich internal tool catalogs and plugin/MCP surfaces, but the main ACP bridge does not expose them as an ACP tool registry. Tool identity in ACP is display/event identity: title formatting plus inferred `ToolKind`. + +Important evidence: + +- `.resources/openclaw/src/acp/server.ts:4-13` and `.resources/openclaw/src/acp/server.ts:104-122` bootstrap the ACP stdio server. +- `.resources/openclaw/src/acp/translator.ts:519-540` advertises ACP capabilities. +- `.resources/openclaw/src/acp/translator.ts:542-603` handles session creation/loading. +- `.resources/openclaw/src/acp/translator.ts:1417-1424` rejects non-empty `mcpServers`. +- `.resources/openclaw/src/acp/translator.ts:848-940` maps Gateway tool events to ACP tool updates. +- `.resources/openclaw/src/acp/event-mapper.ts:297-342` formats tool titles and infers tool kind. +- `.resources/openclaw/src/agents/tool-catalog.ts:20-37` and `.resources/openclaw/src/agents/tool-catalog.ts:306-393` define a separate internal tool catalog. +- `.resources/openclaw/extensions/acpx/src/runtime-internals/mcp-proxy.mjs:33-64` shows the ACPX extension can inject MCP servers into embedded ACP sessions, unlike the main gateway bridge. + +Transferable points: + +- AGH must explicitly choose whether its ACP bridge accepts per-session `mcpServers` like OpenCode/Harnss/ACPX or rejects them like OpenClaw's gateway bridge. +- If AGH supports both runtime-managed MCP and ACP-provided MCP servers, precedence and collision rules must be explicit. +- Do not use substring heuristics for registry policy where explicit tool metadata is available. + +### OpenCode + +OpenCode implements a native ACP server and has a real internal `ToolRegistry`. This is the strongest local reference for how an agent can maintain a rich internal registry while ACP still sees only session lifecycle, MCP bootstrap, tool-call updates, and permission requests. + +OpenCode accepts ACP per-session `mcpServers`, stores them in ACP session state, converts them into internal MCP config, and adds them through its SDK. Its internal MCP naming pattern exposes MCP tools as `sanitize(server) + "_" + sanitize(tool)` while preserving the original MCP tool name for the actual call. + +Important evidence: + +- `.resources/opencode/packages/opencode/src/cli/cmd/acp.ts:23-60` bootstraps `opencode acp`. +- `.resources/opencode/packages/opencode/src/acp/types.ts:1-16` and `.resources/opencode/packages/opencode/src/acp/session.ts:8-75` store ACP session state, including `mcpServers`. +- `.resources/opencode/packages/opencode/src/acp/agent.ts:534-578` advertises MCP support. +- `.resources/opencode/packages/opencode/src/acp/agent.ts:584-687` accepts MCP servers on session creation/loading. +- `.resources/opencode/packages/opencode/src/acp/agent.ts:1216-1254` converts ACP MCP servers into internal MCP config. +- `.resources/opencode/packages/opencode/src/mcp/index.ts:115-146` and `.resources/opencode/packages/opencode/src/mcp/index.ts:618-651` implement sanitized server/tool naming while preserving raw MCP names. +- `.resources/opencode/packages/opencode/src/tool/tool.ts:34-43` and `.resources/opencode/packages/opencode/src/tool/registry.ts:163-207` define internal tool definitions and registry behavior. +- `.resources/opencode/packages/opencode/src/acp/agent.ts:273-455` emits ACP tool-call lifecycle updates. +- `.resources/opencode/packages/opencode/src/acp/agent.ts:190-271` bridges internal permission events to ACP `session/request_permission`. + +Transferable points: + +- Keep AGH's internal registry richer than ACP. +- Store ACP-provided MCP servers in session state, not global daemon config. +- Preserve raw MCP server/tool names separately from the canonical AGH `ToolID`. +- Emit a stable ACP lifecycle, preferably `pending -> in_progress -> completed/failed`, even when the underlying runtime first reports a running event. +- Do not rely on a single-underscore sanitized naming scheme without collision diagnostics. + +### Hermes, Multica, GoClaw, Sandbox-Agent, T3Code + +These projects reinforce the same split: + +- `.resources/hermes` is relevant because it registers ACP-provided MCP servers into agent state and valid tool names, then maps tool events into ACP kinds/titles. It has useful registry ideas, but ACP remains the session/event layer. +- `.resources/multica` and `.resources/goclaw` normalize ACP tool titles/kinds for display and permission heuristics; neither shows a protocol-level tool registry. +- `.resources/sandbox-agent` has ACP HTTP-to-stdio adapter and launch registry logic, but no callable ACP tool registry. +- `.resources/t3code` provides ACP schema/client/runtime tracking for tool-call events, not a broad registry. + +## Design Implications For AGH + +1. ACP compatibility is not a reason to avoid a daemon Tool Registry. ACP leaves tool discovery/execution models to the agent/runtime, or to MCP servers supplied to the session. + +2. AGH should expose daemon-owned session tools through an AGH-hosted MCP server in the MVP. This matches the accepted ADR-002 direction and aligns with ACP's `mcpServers` field. + +3. The registry's canonical ID must not be ACP `title`. Use one stable provider-safe `ToolID` across AGH and hosted MCP. + +4. The registry should store one canonical callable identity plus metadata: + - `ToolID`: provider-safe lower snake segments separated by reserved `__`, for example `agh__skill_view`. + - `DisplayTitle`: user-facing title only. + - `SourceRef`: structured provenance, for example built-in, MCP server, extension ID, bundle ID, provider ID. + +5. Collision handling must be fail-closed: + - Canonical `ToolID` collision: provider registration error or conflicted diagnostic. + - Sanitized external-name collision: tool is not exposed to the session until disambiguated. + - Display title collision: allowed, because titles are not policy identities. + +6. Operator and session projections should remain separate: + - Operator surfaces show unavailable, unauthorized, and conflicted tools with reason codes. + - Session/model surfaces expose only callable tools after availability, authorization, approval ceiling, and collision checks. + +7. ACP permission policy integration must remain ceiling-based: + - `deny-all` denies by default. + - `approve-reads` auto-approves only registry-classified read-only tools and ACP read/search callbacks AGH classifies as read-only. + - `approve-all` skips approval prompts for otherwise allowed tools, but does not bypass registry deny rules, extension grants, session lineage, source trust, availability, hooks, or conflict checks. + +8. ACP filesystem and terminal callbacks, if AGH advertises them, must not bypass Tool Registry policy. Either route them through the registry as first-class built-in tools or share the same policy/approval engine with equivalent telemetry and hooks. + +9. ACP `ToolKind` should be explicit metadata on AGH descriptors. Heuristics from title/kind are fallback-only for external ACP events that AGH observes but does not own. + +10. AGH should persist observed ACP tool calls separately from registry definitions. Observations are keyed by `toolCallId` within a session and carry title/kind/status/raw input/output. Registry entries are keyed by canonical `ToolID`. + +11. AGH should support late tool-call updates after prompt completion by draining the ACP event stream for a bounded window or until protocol-specific completion conditions are met. + +12. AGH should decide explicitly whether to accept third-party ACP `mcpServers` from clients: + - If accepted, store them as session-scoped tool sources with clear precedence and conflict policy. + - If rejected, document the OpenClaw-style stance and require MCP sources to be configured through AGH's registry/config lifecycle. + - For this TechSpec, the safer MVP path is AGH-managed hosted MCP first, with acceptance of client-supplied MCP servers as a compatibility extension only if collision and source-trust rules are implemented. + +## Accepted Naming And Collision Recommendation + +Adopt one canonical public `ToolID` format: + +- Canonical ID: provider-safe lower snake segments separated by reserved double underscore, for example `agh__skill_view`, `agh__tool_search`, `mcp__github__create_issue`, `ext__linear__search`. +- Hosted MCP `Tool.name`: same as the canonical `ToolID`. +- Display title: human-readable and non-unique, for example `View Skill`. +- Source/provenance: structured fields, not inferred solely from name prefixes. +- No shadowing: providers cannot replace an existing canonical ID unless they are the same source updating the same record. +- No silent truncation: if sanitization or length rules would collide, registration/session projection marks the tool conflicted and hides it from the session surface. +- Policy, dispatch, telemetry, hooks, CLI, HTTP, UDS, and hosted MCP all use the same `ToolID`. + +This keeps AGH's internal registry expressive while respecting MCP wire compatibility and ACP's event-oriented model. + +## Evidence Summary + +Official protocol sources: + +- ACP schema: +- ACP agent registry: +- MCP draft schema: + +Primary local evidence: + +- `.resources/rayclaw/src/acp.rs` +- `.resources/rayclaw/src/tools/acp.rs` +- `.resources/rayclaw/tests/acp_integration.rs` +- `.resources/harnss/electron/src/ipc/acp-sessions.ts` +- `.resources/harnss/src/hooks/useACP.ts` +- `.resources/harnss/src/lib/engine/acp-adapter.ts` +- `.resources/acpx/src/acp/client.ts` +- `.resources/acpx/src/mcp-servers.ts` +- `.resources/acpx/src/permissions.ts` +- `.resources/openclaw/src/acp/translator.ts` +- `.resources/openclaw/src/acp/event-mapper.ts` +- `.resources/opencode/packages/opencode/src/acp/agent.ts` +- `.resources/opencode/packages/opencode/src/tool/registry.ts` +- `.resources/opencode/packages/opencode/src/mcp/index.ts` + +Subagent inventory evidence: + +- ACP implementations/usages: `acpx`, `collaborator-ai`, `goclaw`, `harnss`, `hermes`, `multica`, `openclaw`, `opencode`, `paperclip`, `rayclaw`, `sandbox-agent`, `t3code`. +- No meaningful ACP evidence: `cc-posts`, `chat`, `openfang`, `pi`, `symphony`. +- Excluded false positive: `claude-code`. diff --git a/.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md b/.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md new file mode 100644 index 000000000..bd1c51994 --- /dev/null +++ b/.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md @@ -0,0 +1,150 @@ +# AGH Current State: Tool Registry Foundation + +## Overview + +AGH already has the cold side of a tool catalog, but not the runtime side. +`internal/tools` defines a canonical `tool` resource shape and the daemon projects tool records through the generic resources system. Extensions can publish static tool metadata from `extension.toml`. Sessions can also carry concrete permission atoms for tools in lineage metadata. + +What is missing is the executable registry: a central service that can answer which tools exist for a specific agent/session, whether each tool is available now, whether the caller is allowed to use it, and how to dispatch the call through one uniform pipeline. + +This matters because AGH's product premise is agent-first manageability. A tool registry is incomplete if it only helps internal Go code list metadata or if each ACP runtime owns a private tool universe that AGH cannot inspect, govern, or extend. + +## Existing Mechanisms + +### Tool resource metadata + +`internal/tools/tool.go` defines `ToolSource` values for `builtin`, `mcp`, `extension`, and `dynamic`, plus a small `Tool` record: + +- `Name` +- `Description` +- `InputSchema` +- `ReadOnly` +- `Source` + +The only provider interface is: + +```go +type ToolProvider interface { + Tools(ctx context.Context) ([]Tool, error) +} +``` + +There is no `Call`, `Availability`, `CheckPermission`, `Aliases`, `Namespace`, `IsConcurrencySafe`, `IsDestructive`, `MaxResultBytes`, `Owner`, `Visibility`, or provenance-rich source metadata. + +### Desired-state resources + +`internal/tools/resource.go` defines `ToolResourceKind = "tool"` and validates tool records as JSON-object specs with a maximum size of 256 KiB. This is a good base for persisted inventory, desired-state reconciliation, and extension-published tool metadata. + +The daemon already has a generic `resourceCatalog[T]` in `internal/daemon/tool_mcp_resources.go` and a `newToolProjector` that projects reconciled `tool` records into a daemon-local snapshot. This catalog is descriptive and revisioned, but it is not executable. + +### Extension-published tools + +`internal/extension/manifest.go` already lets extensions declare: + +```go +type ResourcesConfig struct { + Tools map[string]ToolConfig `toml:"tools,omitempty" json:"tools,omitempty"` + MCPServers map[string]MCPServerConfig `toml:"mcp_servers,omitempty" json:"mcp_servers,omitempty"` +} +``` + +`ToolConfig` carries description, input schema, and read-only status. `ResolveManifestToolResources` converts these manifest entries into `toolspkg.Tool` records with `Source = ToolSourceExtension`. The daemon syncer publishes them into the resource graph with source keys like `extension//tool/`. + +This is close to OpenClaw's manifest-first model, but AGH currently stops at metadata. A manifest-declared extension tool is not callable unless some separate ACP/MCP/runtime surface happens to expose it. + +### MCP resources + +The same extension manifest can declare MCP servers, and the daemon sync path resolves them into desired-state MCP server resources. Skills can also declare MCP sidecars. This gives AGH a strong candidate adapter for extension tools: manifest tools may be backed by an MCP server, an extension sidecar Host API endpoint, or native AGH built-ins, but all should normalize into one registry contract. + +### Session permission atoms + +`internal/store/session_lineage.go` defines `SessionPermissionPolicy` with concrete atoms: + +- `Tools` +- `Skills` +- `MCPServers` +- `WorkspacePaths` +- `NetworkChannels` +- `SandboxProfiles` + +`internal/session/spawn.go` validates child permissions as a subset of parent permissions. This is an important base for runtime tool policy because it is already persisted with session lineage and already participates in spawn delegation. + +The current agent definition has a flat `Tools []string` field in `internal/config/agent.go`. It lacks allow/deny overlays, named toolsets, namespace patterns, visibility tiers, and risk classes. + +### Hooks around tool calls + +`internal/hooks/payloads.go` already defines `ToolPreCallPayload`, `ToolPostCallPayload`, `ToolPostErrorPayload`, `ToolCallPatch`, and `ToolResultPatch`. This is the right policy extension point for a centralized dispatch pipeline: + +- pre-call hooks can deny or mutate input +- post-call hooks can redact or mutate output +- post-error hooks can classify or recover failures + +The current gap is that AGH does not have a single dispatch pipeline that all AGH-owned tools must pass through. + +### Skills registry contrast + +`internal/skills.Registry` is much more mature than tools. It has global snapshots, workspace overlays, content loading, verification, install provenance, and `GlobalVersion()` for invalidation. Skills are injected as a static prompt catalog at session start, while tool resources are not exposed as a session-callable registry. + +The Tool Registry should copy the skills registry's useful properties where they fit: global/workspace overlays, versioned snapshots, resource projection, progressive disclosure, and explicit content/schema loading. + +## Gaps + +1. No central runtime registry that owns executable tool handles. +2. No agent-facing discovery API for tools. +3. No native AGH tools such as `agh__tool_search`, `agh__skill_view`, or `agh__network_send`. +4. No availability model for env vars, binaries, MCP health, extension health, workspace scope, or policy state. +5. No central permission pipeline for AGH-owned tools. +6. No namespace or structured provenance model, so duplicate names would be ambiguous. +7. No toolsets or bundles comparable to skills/capabilities. +8. No direct extension execution boundary for manifest-declared tools. +9. No consistent way to expose tools over CLI, HTTP, UDS, and session-visible agent surfaces. +10. No usage telemetry by tool or skill. +11. No result-size budget, persistence policy, redaction, or output mapping at registry level. +12. The `dynamic` source enum exists but has no producer. + +## Relevant Code Paths + +- `internal/tools/tool.go:14-136`: tool source enum, metadata-only `Tool`, and list-only `ToolProvider`. +- `internal/tools/resource.go:13-61`: `ToolResourceKind` codec and JSON schema validation. +- `internal/daemon/tool_mcp_resources.go:20-122`: generic daemon `resourceCatalog` and `newToolProjector`. +- `internal/daemon/tool_mcp_resources.go:620-640`: extension manifest tools are projected into desired-state resources. +- `internal/extension/manifest.go:55-62`: extension resources can include tools and MCP servers. +- `internal/extension/manifest.go:154-160`: extension `ToolConfig` is static metadata only. +- `internal/extension/resource_publication.go:13-31`: manifest tool declarations become `toolspkg.Tool` records. +- `internal/config/agent.go:14-23`: `AgentDef.Tools []string` is flat. +- `internal/store/session_lineage.go:31-39`: session lineage has concrete `Tools` permission atoms. +- `internal/session/interfaces.go:244-251`: `AgentDriver` has no catalog-delta or AGH tool injection extension. +- `internal/hooks/payloads.go:520-568`: tool pre/post/error payloads already exist. +- `internal/skills/registry.go:100-103`: skills expose a global version suitable for catalog delta detection. +- `.compozy/tasks/autonomous/analysis/analysis_skills_tools_registry.md`: prior autonomy gap analysis with G1-G12 and P1-P9 proposals. +- `.compozy/tasks/hermes/analysis/analysis_tools_security.md`: security gaps relevant once AGH exposes agent-callable tools. + +## Design Constraints for the TechSpec + +The Tool Registry should be a foundation, not a pile of built-in commands. It should define the contracts, policy path, extension boundary, and surfaces first, then add a small bootstrap set of native AGH tools to prove the system. + +AGH should avoid copying in-process plugin patterns from Python/TypeScript systems. Third-party executable tools should cross a process/protocol boundary: MCP, extension sidecar Host API, subprocess adapter, or future bridge SDK. Built-in Go tools can register in-process because they are part of the daemon binary. + +The cold `tool` resource should remain valuable as catalog metadata and desired state, but executable dispatch must be modeled separately. A manifest-declared tool can be installed and discoverable while still being unavailable until its backend is healthy and authorized. + +Availability and authorization must both be rechecked at dispatch time. Hiding unavailable tools from discovery is useful, but it is not a security boundary. + +## Open Questions + +1. Should extension tools be executable in the MVP, or should MVP only make them discoverable with an explicit unavailable state? +2. If executable, should extension tools be allowed only through MCP/sidecar boundaries, or should trusted bundled extensions get in-process handlers? +3. Should the first AGH-native tool surface be injected into ACP sessions, exposed as an MCP server hosted by AGH, or exposed only through CLI/UDS/HTTP for drivers to call indirectly? +4. Should `internal/catalog` coordinate tools and skills, or should `internal/tools` own runtime tools while a thinner catalog/search service composes skills and tools? +5. Which visibility tiers are needed for MVP: internal, CLI/HTTP, agent-visible, model-visible, deferred-discoverable, extension-private? + +## Evidence + +- `internal/tools/tool.go:91-136`: current `Tool` and `ToolProvider` are descriptive and list-only. +- `internal/tools/resource.go:13-61`: tool resource codec validates metadata records. +- `internal/extension/manifest.go:55-62`: extension manifests can publish `resources.tools`. +- `internal/extension/resource_publication.go:13-31`: extension manifest tools become static tool resources. +- `internal/daemon/tool_mcp_resources.go:620-640`: daemon sync publishes extension tools and MCP servers into resource desired state. +- `internal/store/session_lineage.go:31-39`: session permission policy already includes `Tools`. +- `internal/hooks/payloads.go:520-568`: tool lifecycle hooks are already typed. +- `.compozy/tasks/autonomous/analysis/analysis_skills_tools_registry.md:1-220`: prior gap analysis identifies no runtime tool registry, no discovery API, no availability, and no agent-facing skill/tool call surface. +- `.compozy/tasks/hermes/analysis/analysis_tools_security.md:1-140`: security analysis warns that URL-capable, command-capable, MCP, and skill-install surfaces require stronger guardrails before broad exposure. diff --git a/.compozy/tasks/tools-registry/analysis/analysis_claude-code.md b/.compozy/tasks/tools-registry/analysis/analysis_claude-code.md new file mode 100644 index 000000000..0b412a5dd --- /dev/null +++ b/.compozy/tasks/tools-registry/analysis/analysis_claude-code.md @@ -0,0 +1,101 @@ +# Competitor Analysis: Claude Code Tool System + +## Overview + +Claude Code does not expose a single mutable "ToolRegistry" class in the generic plugin sense. Its effective registry is a layered system: + +1. a strongly typed `Tool` contract, +2. a static built-in tool list, +3. request-time tool pool assembly, +4. MCP adaptation into the same contract, +5. deferred discovery through Tool Search, +6. centralized dispatch and permission handling. + +For AGH, the main lesson is not Claude Code's static `tools.ts` shape. The useful pattern is the separation between canonical definition, contribution adapters, context-specific assembly, permission decision, dispatch, result mapping, and dynamic discovery. + +## Mechanisms / Patterns + +The `Tool` interface carries model identity, schemas, runtime behavior, permission hooks, safety metadata, rendering hooks, output mapping, and dynamic discovery flags. `buildTool` applies defaults for omitted fields. + +Built-in tools are imported statically and exposed through `getAllBaseTools()`. `getTools(permissionContext)` filters this base list by mode, deny rules, special/internal tool rules, REPL visibility, and per-tool availability. `assembleToolPool()` then merges built-ins with MCP tools, sorts for prompt-cache stability, and deduplicates by name with built-ins winning. + +MCP tools are adapted into the same `Tool` contract. Claude Code calls MCP `tools/list`, maps schemas and annotations, preserves server/tool provenance in `mcpInfo`, and refreshes on `tools/list_changed`. + +Deferred discovery is a key pattern. Deferred tools are indexed by name/search hints and hidden from the initial API request. The model can call `ToolSearchTool`, which returns `tool_reference` blocks that cause the full schemas to be included later. Claude Code also has fallback delta/message paths when provider-native dynamic discovery is unavailable. + +Permissions are evaluated in an ordered policy pipeline. Deny rules can hide tools from visibility and block dispatch. Runtime permission checks consider explicit allow/deny/ask rules, tool-specific checks, user interaction requirements, safety classification, headless behavior, mode bypasses, and hooks. + +Dispatch is centralized. `runToolUse` resolves the tool, validates schema input, runs optional validation, prepares observable input, executes pre-tool hooks, resolves permission, calls the handler, maps results, and handles result-size/storage behavior. `StreamingToolExecutor` uses `isConcurrencySafe` to parallelize safe tools while serializing unsafe ones. + +Plugins contribute tool-like behavior primarily through MCP servers, skills, commands, agents, hooks, and settings rather than native in-process `Tool` objects. This is a useful extension boundary for AGH because it keeps third-party tools behind a protocol/process adapter. + +## Relevant Code Paths + +- `.resources/claude-code/Tool.ts:362-520`: canonical `Tool` fields including schemas, `call`, availability, read-only/destructive/concurrency metadata, user-interaction requirements, deferred-loading flags, MCP metadata, validation, and permissions. +- `.resources/claude-code/Tool.ts:701-783`: `ToolDef`, defaults, and `buildTool`. +- `.resources/claude-code/tools.ts:161-193`: built-in tool source of truth. +- `.resources/claude-code/tools.ts:262-326`: runtime filtering by deny rules, special tools, REPL visibility, and `isEnabled`. +- `.resources/claude-code/tools.ts:345-389`: built-in + MCP tool pool assembly and deduplication. +- `.resources/claude-code/utils/api.ts:123-234`: conversion to API tool schemas and deferred-loading fields. +- `.resources/claude-code/utils/toolSearch.ts:155-197`: Tool Search modes. +- `.resources/claude-code/utils/toolSearch.ts:270-385`: provider/model capability checks for Tool Search. +- `.resources/claude-code/utils/toolSearch.ts:525-646`: discovered deferred tool extraction and delta computation. +- `.resources/claude-code/tools/ToolSearchTool/ToolSearchTool.ts:167-471`: search scoring and `tool_reference` outputs. +- `.resources/claude-code/services/api/claude.ts:1120-1339`: request-time Tool Search enablement and fallback injection. +- `.resources/claude-code/services/tools/toolExecution.ts:340-1297`: central dispatch. +- `.resources/claude-code/services/tools/streamingToolExecutor.ts:35-391`: concurrency-safe scheduling. +- `.resources/claude-code/utils/permissions/permissions.ts:236-362`, `1067-1312`: rule matching and ordered permission engine. +- `.resources/claude-code/services/mcp/client.ts:1738-2010`, `2160-2195`, `3020-3075`: MCP tools/list ingestion, adaptation, refresh, and call execution. +- `.resources/claude-code/services/mcp/useManageMCPConnections.ts:600-690`: `tools/list_changed` handling. +- `.resources/claude-code/utils/plugins/mcpPluginIntegration.ts:100-634`: plugin MCP server extraction, scoping, env resolution, and contribution adapter. +- `.resources/claude-code/types/plugin.ts:14-67`: plugin shape. +- `.resources/claude-code/skills/loadSkillsDir.ts:185-335`: skill frontmatter, `allowed-tools`, and user-invocable visibility. +- `.resources/claude-code/tools/AgentTool/runAgent.ts:440-690`: agent-specific permission scoping and merged tool pools. + +## Transferable Patterns + +AGH should define a first-class tool definition contract that attaches identity, schema, provenance, visibility, availability, permission requirements, dispatch handler, output policy, and observability metadata. + +AGH should treat tool pool assembly as separate from registration. Registration collects contributions; assembly produces a context-specific view for a workspace, user, agent, session, provider, mode, runtime health, and permission policy. + +AGH should model visibility explicitly instead of scattering "hidden" flags. Candidate tiers include internal-only, daemon-manageable, CLI/HTTP-visible, agent-visible, model-visible, deferred-discoverable, user-command-only, and extension-private. + +AGH should support provider-neutral search and deferred schema loading even if Anthropic-specific `tool_reference` blocks are not portable. A registry search index and `agh__tool_search` are useful independently. + +AGH should keep permissions as an ordered pipeline. Discovery-time filtering improves UX, but dispatch must revalidate deny/ask/allow rules, tool-specific requirements, session permissions, workspace permissions, and hooks. + +AGH should map MCP annotations into local metadata at adapter boundaries: read-only, destructive, open-world, title, schema, search hints, and provenance. + +AGH should carry `IsConcurrencySafe` or equivalent execution metadata and enforce it centrally. + +## Risks / Mismatches + +Claude Code makes LLM API calls directly and can use provider-specific dynamic tool features. AGH usually manages external ACP-compatible runtimes, so it cannot assume direct control of model API request payloads. + +Claude Code's built-in registry is a static import list. AGH needs an extensible daemon registry with contribution adapters, not a monolithic static list. + +`buildTool` defaults some permission behavior in a way that is acceptable for controlled built-ins but too permissive for third-party extension tools. AGH should default untrusted external tools to disabled, deny, or ask until policy grants them. + +Claude Code uses naming conventions such as MCP prefixes for some behavior. AGH should use structured provenance and namespaces instead. + +Claude Code's plugin tools primarily flow through MCP. This is a good MVP boundary, but AGH may also want a native extension Host API for richer lifecycle and local-resource management. + +## Open Questions + +1. Should AGH expose native registry tools to ACP runtimes as hosted MCP tools, direct ACP tools, or CLI/UDS-callable commands? +2. Should AGH persist deferred-tool discovery state per session, or keep search stateless and recompute on each call? +3. What should happen when an ACP runtime already has a tool with the same short name as an AGH-native tool? +4. Should extension/plugin tool contributions be MCP-only for MVP? + +## Evidence + +- `.resources/claude-code/Tool.ts:362-520`, `701-783`: canonical tool shape and defaults. +- `.resources/claude-code/tools.ts:161-389`: base registry, filtering, MCP merge, sorting, dedupe. +- `.resources/claude-code/utils/api.ts:123-234`: schema projection and deferred-loading fields. +- `.resources/claude-code/utils/toolSearch.ts:155-646`: dynamic discovery logic. +- `.resources/claude-code/tools/ToolSearchTool/ToolSearchTool.ts:167-471`: search scoring and references. +- `.resources/claude-code/services/tools/toolExecution.ts:340-1297`: central dispatch and result mapping. +- `.resources/claude-code/services/tools/streamingToolExecutor.ts:35-391`: concurrency-safe scheduling. +- `.resources/claude-code/utils/permissions/permissions.ts:236-362`, `1067-1312`: permission rule engine. +- `.resources/claude-code/services/mcp/client.ts:1738-2010`, `2160-2195`, `3020-3075`: MCP adapter. +- `.resources/claude-code/utils/plugins/mcpPluginIntegration.ts:100-634`: plugin-to-MCP contribution adapter. diff --git a/.compozy/tasks/tools-registry/analysis/analysis_claude_code_ideas.md b/.compozy/tasks/tools-registry/analysis/analysis_claude_code_ideas.md new file mode 100644 index 000000000..0736dc251 --- /dev/null +++ b/.compozy/tasks/tools-registry/analysis/analysis_claude_code_ideas.md @@ -0,0 +1,125 @@ +# Local Ideas Cross-Reference: docs/ideas/from-claude-code + +## Overview + +The `docs/ideas/from-claude-code` folder contains prior Claude Code analyses and a filtered recommendation document. The most important point is a tension: + +- Earlier filtering said Claude Code Tool Search and streaming tool execution were "not relevant" because AGH manages external ACP runtimes and does not make LLM API calls directly. +- A Tool Registry feature changes part of that conclusion. If AGH introduces daemon-owned, agent-callable tools, then search, progressive schema disclosure, permission ordering, result budgets, and tool metadata become relevant again. + +The TechSpec should be explicit about this boundary. AGH should not pretend it controls every provider's internal tool loop, but it can own a registry for AGH-native tools and extension-provided tools exposed through AGH surfaces. + +## Relevant Ideas + +### Canonical tool contract + +`analysis_tool_system.md` proposes a Go `Tool` shape with name, description, schema, permission checks, execution, classifier input, and result limits. This aligns with the competitor research, but AGH should avoid making security classifier behavior mandatory in the first registry layer. + +The stronger AGH version should split: + +- descriptor fields used for discovery and schema projection, +- policy metadata used for authorization, +- availability checks used for live health, +- handler/adapters used for dispatch, +- result policy used for truncation, persistence, redaction, and telemetry. + +### Tool Search and deferred loading + +The document proposes an `eager`, `deferred`, and `discovered` registry with `Search(query)` returning tool references. That is useful if AGH has large tool universes from MCP, extensions, skills, network peers, and built-ins. + +AGH should adapt this provider-neutrally: + +- `agh__tool_search` returns descriptors and optional schema handles. +- CLI/HTTP/UDS search returns the same data. +- ACP/model-specific `tool_reference` integration is optional and provider-dependent. +- The registry can still compute deltas and persist "discovered in this session" state later. + +### Result persistence and budgets + +The Claude Code analysis highlights per-tool max result sizes and disk persistence for large outputs. AGH needs the same concept because tool results may be delivered into session transcripts, HTTP responses, UDS clients, or agent-visible messages. + +The registry should define: + +- `MaxResultBytes` or a default by risk/source class, +- preview strategy, +- artifact persistence target, +- redaction path, +- telemetry fields for result bytes and persisted artifact id. + +### Permission and security validators + +The local idea files discuss command-specific validators, classifier input projection, dangerous pattern registries, and bash-specific semantics. Those are important for shell tools, but they should not block the Tool Registry foundation. + +For MVP, the registry should provide a hook point: + +- a tool can declare `RiskClass`, `OpenWorld`, `Destructive`, and `RequiresUserInteraction`; +- a policy engine can decide allow/deny/ask; +- specialized tools such as shell/url/browser can later plug in validators. + +### Prompt and catalog deltas + +`analysis_prompt_architecture.md` discusses enabled-tools-aware prompt sections and delta attachment patterns. The relevant AGH takeaway is that catalog changes should be incremental and explicit. For ACP runtimes that cannot accept live tool deltas, AGH should clearly fall back to "visible on next session." + +### Streaming executor and concurrency + +`analysis_query_engine.md` includes concurrency-safe vs exclusive tool execution. AGH should keep the metadata and enforce it at registry dispatch, but not copy a direct model streaming executor unless AGH owns a provider's query loop. + +### Plugin system references + +`analysis_services_infra.md` describes plugin refresh, availability, hooks, and plugin error taxonomy. This supports an AGH registry model where extension tools have lifecycle state and refresh reasons. AGH should convert extension sidecar and MCP health into availability reasons rather than exposing raw plugin errors to agents. + +## Filtered Recommendation Reversal + +`filtered_recommendations.md` says Tool Search, streaming execution, bash classifiers, and API tool loops are not relevant because AGH does not make LLM API calls. That remains true for driver-internal tools. + +However, the Tool Registry feature is not about controlling Claude Code's own tools. It is about creating AGH-owned tools that are: + +- discoverable through AGH, +- governed by AGH, +- executable through AGH, +- extensible by AGH extensions, +- visible to agents regardless of ACP runtime when an adapter exists. + +Therefore: + +- Do not copy Claude Code's provider API request mechanics into the MVP. +- Do copy the registry/search/permission/result architecture where AGH owns the tool surface. + +## Relevant Code / Document Paths + +- `docs/ideas/from-claude-code/analysis_tool_system.md:450-620`: deferred registry, security validator pipeline, classifier input projection, dangerous pattern registry, and key file references. +- `docs/ideas/from-claude-code/filtered_recommendations.md:1-38`: architectural warning that AGH is an orchestration kernel, not a direct LLM API loop. +- `docs/ideas/from-claude-code/analysis_prompt_architecture.md`: prompt sections and tool-aware catalog deltas. +- `docs/ideas/from-claude-code/analysis_query_engine.md`: tool execution concurrency and exclusive scheduling references. +- `docs/ideas/from-claude-code/analysis_services_infra.md`: plugin refresh and availability ideas. + +## Transferable Patterns + +1. Build a provider-neutral registry search API first. +2. Add deferred schema loading as an AGH catalog behavior, not as an Anthropic-only assumption. +3. Track per-tool result budgets and persisted artifacts. +4. Keep security validators pluggable by tool family. +5. Model concurrency metadata even before advanced scheduling. +6. Treat live catalog deltas as optional driver capabilities with a fallback. + +## Risks / Mismatches + +AGH should not duplicate the ACP runtime's internal shell/browser/file tools unless there is a clear cross-runtime AGH-owned reason. + +AGH should not depend on Claude Code-only `tool_reference` wire formats for the core registry contract. + +AGH should not make shell command classifiers part of the foundation unless the MVP includes an AGH-owned shell tool. + +AGH should not over-inject tool catalogs into prompts. Progressive disclosure and search should be preferred once the tool universe grows. + +## Open Questions + +1. Should AGH's first registry search surface be `agh__tool_search` as an agent-callable tool, `agh tool search` as CLI, or both? +2. Should catalog delta support be designed now even if only a subset of drivers implement it? +3. Should result persistence share AGH's session event/artifact store or get a dedicated tool-result artifact store? + +## Evidence + +- `docs/ideas/from-claude-code/analysis_tool_system.md:450-620`: local implementation sketches for deferred registry, validators, result storage, and key Claude Code paths. +- `docs/ideas/from-claude-code/filtered_recommendations.md:1-38`: explicit "AGH is not the LLM API loop" constraint. +- `docs/ideas/from-claude-code/filtered_recommendations.md:96-134`: skills activation and prompt assembly ideas that intersect with registry progressive disclosure. diff --git a/.compozy/tasks/tools-registry/analysis/analysis_goclaw.md b/.compozy/tasks/tools-registry/analysis/analysis_goclaw.md new file mode 100644 index 000000000..0aa25b7fd --- /dev/null +++ b/.compozy/tasks/tools-registry/analysis/analysis_goclaw.md @@ -0,0 +1,87 @@ +# Competitor Analysis: GoClaw Tool Registry + +## Overview + +GoClaw is the closest Go-native reference. It has a runtime `tools.Tool` interface with executable behavior, a registry that owns aliases, metadata, disabled state, rate limiting, grouping, deferred activation, and an executor/policy layer. It also adapts MCP tools into the same local interface. + +The useful pattern for AGH is not GoClaw's exact `map[string]any` API. It is the separation between executable tool contract, registry metadata, run-scoped context, policy filtering, MCP adaptation, and dispatch hooks. + +## Mechanisms / Patterns + +GoClaw's core tool contract includes `Name`, `Description`, `Parameters`, and `Execute(context.Context, map[string]any) *Result`. This is weaker than the typed API AGH should build, but it proves the right direction: tools are executable handles, not only metadata records. + +The registry owns tools, metadata, aliases, disabled state, rate limiting, result scrubbing, groups, and deferred activation. `ExecuteWithContext` injects run-scoped data through `context.Context` instead of mutating shared tool instances. + +The policy engine filters tools through global, provider, agent, group, capability, subagent, and sandbox rules. Lazy/deferred activation rechecks policy before exposing or using tools. + +MCP bridge tools implement the same interface. The MCP adapter converts remote tools into local handles, rechecks grants at runtime, and normalizes results. + +Hooks are lifecycle gates, not tools. `pre_tool_use` can block tool execution, while post hooks can observe and mutate limited fields. This maps well to AGH's existing hook payloads. + +Skills are exposed partly through searchable artifacts and marker/no-op tools such as skill search/use. AGH should be cautious here: "use skill" can be useful telemetry, but skill content loading should be a real catalog operation, not just a marker. + +## Relevant Code Paths + +- `.resources/goclaw/internal/tools/types.go:14-129`: core executable tool interface and result types. +- `.resources/goclaw/internal/tools/registry.go:18-459`: registry, aliases, groups, disabled state, rate limiting, scrubber, and deferred activation. +- `.resources/goclaw/internal/tools/executor.go`: central execution support. +- `.resources/goclaw/internal/tools/policy.go:13-520`: multi-layer tool policy and filtering. +- `.resources/goclaw/internal/tools/capability.go`: capability inference and policy integration. +- `.resources/goclaw/internal/tools/result.go`: result model. +- `.resources/goclaw/internal/mcp/bridge_tool.go:42-155`: MCP bridge implements local tool contract. +- `.resources/goclaw/internal/mcp/manager.go:318-515`: MCP manager and tool lifecycle. +- `.resources/goclaw/internal/mcp/grant_checker.go:46-129`: runtime grant checks. +- `.resources/goclaw/internal/mcp/mcp_tool_search.go:67-101`: MCP tool search support. +- `.resources/goclaw/internal/agent/loop_tool_filter.go:22-96`: agent loop filtering. +- `.resources/goclaw/internal/pipeline/tool_stage.go:51-152`: pipeline stage around tool execution. +- `.resources/goclaw/internal/hooks/types.go:19-44`: hook types. +- `.resources/goclaw/internal/hooks/dispatcher.go:153-318`: hook dispatch and mutation. +- `.resources/goclaw/migrations/000001...:478-499`, `000027...:230-245`: custom tool storage history. + +## Transferable Patterns + +AGH should promote `tools.Tool` from a record to a runtime contract, but with stronger types than GoClaw. Prefer `json.RawMessage` plus schema validation and typed `ToolResult` over unconstrained `map[string]any` crossing every boundary. + +AGH should pass per-call/session/workspace/user information through a `ToolCallContext` or context-bound immutable values, not by mutating registry entries. + +AGH should keep external adapters under the same registry: MCP tools, extension sidecar tools, and future bridge tools should all be executable through `Registry.Call`. + +AGH should implement dynamic groups/toolsets as policy inputs, not as separate registries. + +AGH should recheck grants at runtime even when discovery already filtered a tool. + +AGH should treat hooks as gates around dispatch, not as an alternative dispatch surface. + +AGH should persist metadata separately from executable code. Installed or extension-provided tool records can remain in resource storage while executable backends are resolved at runtime. + +## Risks / Mismatches + +GoClaw's `map[string]any` API is weak for AGH. AGH already has JSON schema resources and generated API contracts, so it should preserve schema validation and raw JSON boundaries. + +Some GoClaw paths can bypass pre-tool hooks in parallel execution. AGH should enforce one central dispatch pipeline regardless of whether tools run concurrently. + +Shared registry state can leak per-user or per-session MCP availability if not scoped carefully. AGH should compute context-specific views rather than storing "available for everyone" as a global truth. + +Capability inference by tool name is brittle. AGH should use explicit metadata and namespaces. + +Individual tool bodies should not own all security logic. The registry must own common gates: permission, availability, schema validation, risk class, hooks, result budget, redaction, telemetry, and concurrency. + +## Open Questions + +1. Should AGH's runtime registry be global with context-specific projections, or per-session snapshots derived from a global registry? +2. Should tool concurrency be enforced by a registry scheduler or by individual handlers returning metadata? +3. How should AGH persist disabled state and operator overrides for extension tools? +4. Should AGH include marker tools for skill use telemetry, or should skill view/install/load actions be real tools only? + +## Evidence + +- `.resources/goclaw/internal/tools/types.go:14-129`: executable Go tool contract. +- `.resources/goclaw/internal/tools/registry.go:18-459`: registry ownership of tools, aliases, groups, disabled state, and deferred activation. +- `.resources/goclaw/internal/tools/policy.go:13-520`: multi-layer policy. +- `.resources/goclaw/internal/mcp/bridge_tool.go:42-155`: MCP adapter as local tool. +- `.resources/goclaw/internal/mcp/manager.go:318-515`: MCP lifecycle. +- `.resources/goclaw/internal/mcp/grant_checker.go:46-129`: runtime grant checking. +- `.resources/goclaw/internal/agent/loop_tool_filter.go:22-96`: agent-specific filtering. +- `.resources/goclaw/internal/pipeline/tool_stage.go:51-152`: tool execution pipeline. +- `.resources/goclaw/internal/hooks/dispatcher.go:153-318`: hook-driven blocking/mutation. +- `.resources/goclaw/migrations/000001...:478-499`, `000027...:230-245`: persisted custom-tool metadata history. diff --git a/.compozy/tasks/tools-registry/analysis/analysis_hermes.md b/.compozy/tasks/tools-registry/analysis/analysis_hermes.md new file mode 100644 index 000000000..aad523a7d --- /dev/null +++ b/.compozy/tasks/tools-registry/analysis/analysis_hermes.md @@ -0,0 +1,91 @@ +# Competitor Analysis: Hermes Tool Registry + +## Overview + +Hermes has the clearest direct precedent for the "ToolRegistry" name. Its tool system centers on a single Python registry that collects tool definitions, schemas, handlers, availability checks, toolset membership, MCP adapters, plugin contributions, and dispatch metadata. + +The core pattern is powerful but too global for AGH to copy literally. The transferable part is the product shape: every tool contribution normalizes into one registry contract, the model-visible tool list is filtered by availability and toolset policy, and tool calls flow through one dispatch path. + +## Mechanisms / Patterns + +Hermes tools self-register at import time. A tool module imports the singleton registry and calls `registry.register(...)` with a name, toolset, JSON schema, handler, optional `check_fn`, env requirements, display metadata, async flag, and result budget. + +The registry provides: + +- `register`: add a tool definition and reject most non-MCP collisions. +- `get`: resolve a tool entry. +- `dispatch`: call the registered handler. +- `get_available_tools`: project the model-visible list after toolset and availability filters. + +Toolsets are recursive bundles. Named toolsets can compose other toolsets, and broad aliases such as `all` / `*` expand across registered tools. This is the most relevant pattern for AGH's agent-role tool policy. + +Availability is attached to the tool definition through `check_fn` and `requires_env`. Hermes filters unavailable tools before presenting definitions to the model. This is a critical reliability property, but Hermes does not consistently treat it as a dispatch-time security boundary. + +MCP tools are adapted into the same registry. Hermes discovers MCP tools, normalizes schemas, prefixes/organizes names, refreshes on MCP `tools/list_changed`, and registers each remote tool as a local registry entry. Dynamic MCP refresh is tested. + +Plugins can contribute tools, but the exact dispatch path can bypass parts of the normal `handle_function_call` pipeline. AGH should avoid this split by making the registry dispatch path mandatory for every executable AGH-owned tool. + +## Relevant Code Paths + +- `.resources/hermes/tools/registry.py:1-14`: registry purpose and top-level contract. +- `.resources/hermes/tools/registry.py:23-64`: `ToolEntry` shape. +- `.resources/hermes/tools/registry.py:176-228`: singleton registry operations. +- `.resources/hermes/tools/registry.py:260-327`: availability filtering and definitions. +- `.resources/hermes/tools/registry.py:352-433`: dispatch and result handling. +- `.resources/hermes/model_tools.py:141-153`: built-ins are imported to trigger registration. +- `.resources/hermes/model_tools.py:209-370`: tool definition projection for model calls. +- `.resources/hermes/model_tools.py:389-528`: function-call handling. +- `.resources/hermes/model_tools.py:529-705`: result transformation and tool-call lifecycle. +- `.resources/hermes/toolsets.py:483-692`: recursive toolset composition. +- `.resources/hermes/hermes_cli/tools_config.py:681-849`: operator configuration for tools. +- `.resources/hermes/hermes_cli/plugins.py:210-380`: plugin load and metadata paths. +- `.resources/hermes/hermes_cli/plugins.py:518-646`: plugin tool dispatch path. +- `.resources/hermes/tools/mcp_tool.py:860-1038`: MCP discovery and schema adaptation. +- `.resources/hermes/tools/mcp_tool.py:1058-1296`: MCP tool registration details. +- `.resources/hermes/tools/mcp_tool.py:1850-2108`: MCP refresh and runtime paths. +- `.resources/hermes/tools/mcp_tool.py:2508-2770`: dynamic discovery integration. +- `.resources/hermes/tests/tools/test_mcp_dynamic_discovery.py:1-160`: tests for MCP dynamic tool refresh. +- `.resources/hermes/tools/process_registry.py:1-21`, `465-690`: process registry and scoped runtime management. + +## Transferable Patterns + +AGH should build one registry contract that all tool sources normalize into: built-in Go tools, extension manifest tools, extension sidecar tools, MCP tools, and future dynamic tools. + +AGH should separate toolset policy from tool definitions. Toolsets should be named bundles resolved recursively at list/dispatch time, not hardcoded into each provider. + +AGH should fail closed during discovery when required env vars, binaries, MCP servers, or extension sidecars are missing. The discovery surface should explain why a tool is unavailable to operators, while the agent-visible surface should omit or mark tools according to policy. + +AGH should route MCP tools through the same registry dispatch path as native tools. MCP is an adapter, not a separate tool universe. + +AGH should treat dynamic refresh as a first-class event. Hermes' MCP `tools/list_changed` path is a useful precedent for a registry generation counter and catalog delta notification. + +AGH should include result budgets and transformation at the registry boundary. Large outputs should be persisted or summarized consistently rather than left to individual handlers. + +## Risks / Mismatches + +Hermes relies heavily on a process-wide singleton and import-time registration. AGH should prefer explicit composition-root registration because daemon boot already wires skills, extensions, resource stores, hooks, API services, and session managers. + +Hermes uses permissive in-process plugin execution. AGH should not load third-party executable handlers into the daemon process for MVP. Out-of-process MCP or extension sidecar execution is a better fit for AGH's security and observability model. + +Hermes availability filtering is strong for model-visible definitions, but AGH must also recheck availability at dispatch. Discovery filtering alone is not a security boundary. + +Hermes has some name-prefix and collision behavior around MCP tools. AGH should use structured namespaces and provenance instead of deriving security meaning from string prefixes. + +Hermes' plugin dispatch split is a warning. If AGH has CLI, HTTP, UDS, session, MCP, and extension entry points, all of them must call the same `Registry.Call` pipeline. + +## Open Questions + +1. Should AGH expose unavailable tools to operators with reasons while hiding them from model-visible surfaces? +2. Should AGH support recursive toolsets as resources, config fields, or both? +3. Should MCP `tools/list_changed` cause live session deltas, or only refresh the next model-visible catalog query? +4. What conflict policy should AGH use when multiple providers contribute the same short tool name? + +## Evidence + +- Hermes registry: `.resources/hermes/tools/registry.py:23-64`, `176-228`, `260-327`, `352-433`. +- Hermes request pipeline: `.resources/hermes/model_tools.py:141-705`. +- Hermes toolsets: `.resources/hermes/toolsets.py:483-692`. +- Hermes MCP adapter: `.resources/hermes/tools/mcp_tool.py:860-1296`, `1850-2108`, `2508-2770`. +- Hermes plugin loader/dispatch: `.resources/hermes/hermes_cli/plugins.py:210-380`, `518-646`. +- Hermes dynamic discovery tests: `.resources/hermes/tests/tools/test_mcp_dynamic_discovery.py:1-160`. +- Hermes process registry: `.resources/hermes/tools/process_registry.py:1-21`, `465-690`. diff --git a/.compozy/tasks/tools-registry/analysis/analysis_openclaw.md b/.compozy/tasks/tools-registry/analysis/analysis_openclaw.md new file mode 100644 index 000000000..551c0d3e2 --- /dev/null +++ b/.compozy/tasks/tools-registry/analysis/analysis_openclaw.md @@ -0,0 +1,87 @@ +# Competitor Analysis: OpenClaw Tool and Plugin Registry + +## Overview + +OpenClaw's strongest contribution is its two-phase extension model: manifest-first static discovery followed by runtime materialization. Plugin manifests can declare tool contracts, configuration, activation hints, and ownership metadata without immediately executing plugin code. Runtime tool registration then materializes concrete tools through plugin APIs or MCP adapters. + +This is highly relevant to AGH because AGH already lets extension manifests publish static `resources.tools`, but lacks a runtime execution boundary and a registry that can connect static declarations to callable backends. + +## Mechanisms / Patterns + +OpenClaw reads plugin manifests first. Static `contracts.tools`, config schema, activation hints, ownership, and duplicate diagnostics are available before runtime code is loaded. This supports cheap discovery, trust decisions, and policy projection. + +Runtime tools are registered through `api.registerTool` as either concrete tools or factories receiving a plugin tool context. This avoids global singleton state and gives each tool access to scoped runtime services. + +Tool materialization is policy-filtered. Core and plugin tools are assembled, then filtered by profile/provider/global/agent/group/sandbox/subagent rules. Plugin owner metadata supports policy by plugin id or broad plugin groups. + +OpenClaw treats MCP as a provider backend. Bundle/user MCP configs are connected over stdio, SSE, or streamable HTTP, tools are listed and sanitized, calls are wrapped, and per-session runtime instances have idle TTL and fingerprint invalidation. + +OpenClaw also exposes a direct HTTP gateway for tool invocation. This is useful as a manageability pattern, but AGH should only expose direct invoke through strict local authorization and the same registry dispatch pipeline. + +## Relevant Code Paths + +- `.resources/openclaw/src/plugins/manifest.ts:250-367`: manifest contract shape. +- `.resources/openclaw/src/plugins/manifest.ts:539-583`: manifest validation and duplicate handling. +- `.resources/openclaw/src/plugins/manifest.ts:1161-1251`: manifest registry integration. +- `.resources/openclaw/src/plugins/manifest-registry.ts:303-379`: plugin manifest discovery. +- `.resources/openclaw/src/plugins/manifest-registry.ts:640-805`: precedence/diagnostics behavior. +- `.resources/openclaw/src/plugins/types.ts:2209-2353`: plugin API and tool registration types. +- `.resources/openclaw/src/plugins/tool-types.ts:8-45`: tool type definitions. +- `.resources/openclaw/src/plugins/registry.ts:421-446`: plugin registry access patterns. +- `.resources/openclaw/src/plugins/registry.ts:1464-1557`: activation/materialization path. +- `.resources/openclaw/src/plugins/tools.ts:111-239`: tool assembly from plugins. +- `.resources/openclaw/src/agents/pi-tools.ts:585-656`, `673-727`: policy-filtered agent tool projection. +- `.resources/openclaw/src/agents/tool-policy.ts:102-164`: tool policy model. +- `.resources/openclaw/src/agents/tool-policy-pipeline.ts:36-147`: policy pipeline. +- `.resources/openclaw/src/agents/pi-bundle-mcp-runtime.ts:181-575`: MCP runtime sessions and lifecycle. +- `.resources/openclaw/src/agents/pi-bundle-mcp-materialize.ts:64-174`: MCP tool materialization. +- `.resources/openclaw/docs/gateway/tools-invoke-http-api.md:11-146`: direct tool invocation gateway. + +## Transferable Patterns + +AGH should keep manifest-first discovery. Extension `resources.tools` should remain static and cheap to inspect. Runtime code should not be needed to list declared tools. + +AGH should attach structured owner/provenance metadata to every tool: source kind, source id, namespace, extension id, MCP server id, bundle id, trust tier, and conflict state. + +AGH should materialize tools through factories/adapters with a scoped context rather than globals. Built-ins receive daemon services; MCP tools receive server clients; extension tools receive a Host API or sidecar client. + +AGH should support extension-level grants and expand them into explicit tool permissions. "Allow extension X" should resolve to the tool ids owned by extension X at a specific registry generation. + +AGH should isolate failures. If an extension sidecar is unhealthy, its tools should become unavailable with reasons without breaking the full registry. + +AGH should cache materialized context-specific views. Tool projection can be hot-path work for session starts, catalog queries, and live deltas. + +AGH should expose direct invocation only through the same registry dispatch pipeline and only on local trusted surfaces such as UDS/daemon-authenticated HTTP. + +## Risks / Mismatches + +OpenClaw manifests are broad. AGH should keep the MVP manifest addition small and avoid a general plugin DSL inside the tool registry workstream. + +Some OpenClaw discovery modes may still execute plugin code. AGH should make manifest-first discovery a hard rule for untrusted extensions. + +Global tool names create collision pressure. AGH should require stable namespaced ids and optionally expose short display names. + +`optional: true` style availability is too coarse for AGH. Availability should distinguish not installed, disabled, unauthorized, unhealthy, dependency missing, config missing, sandbox denied, and conflict. + +Trusted bundled-only policies are insufficient for AGH's extension story. Trust tier and execution boundary should both be explicit. + +## Open Questions + +1. Should AGH extension tools declare a backend kind in the manifest, such as `mcp`, `host_api`, or `subprocess`? +2. Should extension-owned tools be disabled until an operator grants the extension's requested tool family? +3. Should AGH allow extension-private tools that only that extension can call? +4. How should AGH represent duplicate names: hard error, namespaced id only, or visible conflict diagnostics? + +## Evidence + +- `.resources/openclaw/src/plugins/manifest.ts:250-367`, `539-583`, `1161-1251`: manifest-first contracts and validation. +- `.resources/openclaw/src/plugins/manifest-registry.ts:303-379`, `640-805`: discovery and precedence diagnostics. +- `.resources/openclaw/src/plugins/types.ts:2209-2353`: runtime plugin tool registration API. +- `.resources/openclaw/src/plugins/tool-types.ts:8-45`: tool type shape. +- `.resources/openclaw/src/plugins/tools.ts:111-239`: plugin tool assembly. +- `.resources/openclaw/src/agents/pi-tools.ts:585-656`, `673-727`: agent projection. +- `.resources/openclaw/src/agents/tool-policy.ts:102-164`: policy model. +- `.resources/openclaw/src/agents/tool-policy-pipeline.ts:36-147`: policy pipeline. +- `.resources/openclaw/src/agents/pi-bundle-mcp-runtime.ts:181-575`: MCP runtime. +- `.resources/openclaw/src/agents/pi-bundle-mcp-materialize.ts:64-174`: MCP materialization. +- `.resources/openclaw/docs/gateway/tools-invoke-http-api.md:11-146`: direct invoke gateway. diff --git a/.compozy/tasks/tools-registry/analysis/synthesis.md b/.compozy/tasks/tools-registry/analysis/synthesis.md new file mode 100644 index 000000000..5ff0864fe --- /dev/null +++ b/.compozy/tasks/tools-registry/analysis/synthesis.md @@ -0,0 +1,297 @@ +# Tool Registry Synthesis and Proposed Direction + +## Executive Summary + +AGH should build a Tool Registry as a daemon-owned runtime service that composes tool metadata, availability, policy, execution, hooks, telemetry, and extension adapters. + +The foundation should not be "add many built-in tools." The foundation should be: + +1. a first-class runtime tool contract, +2. a registry that aggregates built-ins, MCP, extensions, and dynamic providers, +3. a policy/availability projection for each agent/session, +4. a single dispatch pipeline, +5. an extension-safe execution boundary, +6. agent-manageable CLI/HTTP/UDS/session surfaces, +7. toolsets/bundles comparable to skills. + +The ACP inventory found `.resources/openfang` present, but with no meaningful ACP evidence. + +## Recommended Architecture + +### Accepted decisions so far + +- Extension tool execution boundary: manifest-first descriptors with out-of-process execution only for extension tools in the MVP. +- Session exposure path: AGH-hosted local MCP server plus shared CLI/HTTP/UDS contracts. +- Package boundary: `internal/tools` owns runtime registry contracts and dispatch; a thin `internal/catalog` facade composes tools and skills for cross-domain discovery. +- MVP native tool scope: bootstrap catalog/skill tools plus network and bounded task tools (`agh__tool_*`, `agh__skill_*`, `agh__network_peers`, `agh__network_send`, `agh__task_*`). +- Policy integration: existing ACP `permissions.mode` is the system/session approval ceiling; registry policy is a granular layer below it and cannot silently grant more authority than ACP policy allows. +- Visibility by surface: operator surfaces show unavailable/unauthorized/conflicted tools with reason codes; session/model-visible surfaces expose only tools callable in that effective session context. +- Naming/collision policy: one canonical public `ToolID` uses provider-safe lower snake segments separated by reserved `__`, for example `agh__skill_view` and `mcp__github__create_issue`; this is captured in ADR-007. + +### ACP compatibility finding + +ACP does not define a durable callable-tool registry. It defines session lifecycle, `mcpServers` bootstrap fields, client authority callbacks, permission requests, and observable tool-call events. ACP `ToolCall` records have `toolCallId`, human-readable `title`, coarse `kind`, status, locations, raw input/output, and content, but no programmatic tool `name` equivalent to MCP `Tool.name`. + +This means the Tool Registry should remain an AGH daemon/runtime service. Session exposure should use the accepted AGH-hosted MCP path, where AGH exposes the canonical `ToolID` directly as the hosted MCP `Tool.name`. + +Accepted identity format: + +- `ToolID`: stable provider-safe id with reserved `__` namespace separators, such as `agh__skill_view`. +- `DisplayTitle`: human-readable and non-unique. +- `SourceRef`: structured provenance, not inferred only from prefixes. + +Collision handling must be fail-closed. Canonical ID collisions are provider registration errors or operator diagnostics. Sanitized external-name collisions make the affected tools unavailable to that session until resolved. Display title collisions are allowed because titles are not policy identities. + +### 1. Split descriptor, runtime handle, and resource record + +Keep the existing `internal/tools.Tool` resource shape as the cold catalog/desired-state record, but introduce a runtime contract with separate types: + +- `ToolID`: stable provider-safe id such as `agh__skill_view`, `mcp__github__create_issue`, `ext__linear__search`. +- `Descriptor`: identity, description, input schema, optional output schema, read-only/destructive/open-world/concurrency metadata, source/provenance, visibility, tags, owner, result budget. +- `Handle`: descriptor plus `Availability(ctx, ToolContext)` and `Call(ctx, ToolCall)` for executable tools. +- `Provider`: contributes descriptors/handles and can refresh. +- `Registry`: owns provider registration, indexing, listing, search, policy projection, and dispatch. +- `ToolResult`: structured output, preview, artifacts, redactions, bytes, display title, metadata. + +This avoids overloading the desired-state resource with function pointers while still allowing resource records to feed the runtime registry. + +### 2. Use manifest-first extension tools + +Extension manifests should continue to declare tool metadata statically. Add enough metadata to connect the declaration to a backend: + +- backend kind: `mcp`, `extension_host`, `subprocess`, or `builtin` where appropriate; +- namespace/owner; +- visibility; +- risk class; +- required config/env/capabilities; +- optional toolset memberships. + +The registry can list these tools without executing extension code. A tool becomes executable only when its backend adapter is healthy, authorized, and has a callable handle. + +Recommendation for MVP: no in-process third-party handlers. Built-in Go tools can register in-process. Extension tools should execute through MCP or an extension sidecar/Host API adapter. + +### 3. Make availability a state machine, not a boolean + +Use explicit status: + +- `registered`: descriptor exists. +- `enabled`: operator/session policy has not disabled it. +- `available`: dependencies are present and backend is healthy. +- `authorized`: caller policy permits visibility/use. +- `executable`: there is a live handle for dispatch. +- `conflicted`: id/name collision requires resolution. + +Discovery can hide unavailable/unauthorized tools from agents while operator surfaces show reasons. Dispatch must recheck availability and authorization. + +The registry should expose separate operator and session projections. The operator projection includes diagnostics, source/provenance, policy reasons, availability reasons, and conflicts. The session projection powers hosted MCP and future driver injection and includes only tools that pass effective visibility/execution gates for that session. + +### 4. Centralize dispatch + +Every AGH-owned tool call should pass through: + +1. resolve tool id/alias in context, +2. validate input against schema, +3. compute availability, +4. evaluate policy and session permission atoms, +5. run `tool.pre_call` hooks, +6. enforce concurrency/rate/result budgets, +7. call provider adapter, +8. normalize result, +9. redact/truncate/persist output, +10. run `tool.post_call` or `tool.post_error` hooks, +11. emit telemetry. + +No CLI, HTTP, UDS, MCP, extension, or session path should bypass this pipeline. + +### 5. Model policy as overlays + +Use one policy engine that combines: + +- system/session ACP `permissions.mode`, +- daemon defaults, +- workspace config, +- extension grants, +- agent definition, +- session lineage `SessionPermissionPolicy.Tools`, +- skill/command scoped grants where relevant, +- explicit allow/deny patterns, +- named toolsets, +- risk defaults. + +Toolsets should be recursive resources/config entries. This copies Hermes' strongest idea while fitting AGH's resource model. + +The registry must not create a second approval system that contradicts ACP. `approve-all` removes automatic approval prompts for otherwise allowed tools, but explicit registry denies, source grants, session lineage restrictions, availability failures, and hooks still apply. `approve-reads` auto-approves only registry-classified read-only tools. `deny-all` denies execution by default and requires an explicit approval path. + +### 6. Provide a small bootstrap native toolset + +The TechSpec should not enumerate every future AGH tool. It should require a small proving set: + +- `agh__tool_list` +- `agh__tool_search` +- `agh__tool_info` +- `agh__skill_list` +- `agh__skill_view` + +Optional later groups: + +- `agh__skill_install` +- `agh__network_peers` +- `agh__network_send` +- `agh__task_*` +- `agh__extension_*` + +The bootstrap set proves discovery, schema loading, skill body loading, policy, result budget, and telemetry without overcommitting the whole daemon. + +### 7. Expose agent-manageable surfaces + +The registry should have shared contract types used by: + +- CLI: `agh tool list/search/info/invoke`. +- HTTP: `/api/tools`, `/api/tools/{id}`, `/api/tools/{id}/invoke`. +- UDS: same operations for local agents and internal tools. +- Session-visible tool surface: either an AGH-hosted MCP server, driver-specific ACP tool injection where possible, or a fallback where agents can use `agh` CLI/UDS through their runtime. + +The TechSpec should pick one MVP path and keep the others as contract-compatible surfaces. + +### 8. Treat Tool Search as provider-neutral + +Claude Code's `tool_reference` mechanism is useful but not portable. AGH should implement registry search as a normal catalog operation first: + +- search over name, namespace, description, tags, source, toolset, and search hints; +- return metadata first; +- load schema/details on demand; +- optionally persist discovered state per session later. + +Driver-specific schema-on-demand integration can be a future enhancement. + +### 9. Reuse existing AGH infrastructure + +Build on: + +- `internal/tools` for contracts and registry, +- `internal/resources` for desired-state records, +- `internal/extension` manifest publication, +- `internal/hooks` for pre/post/error dispatch gates, +- `internal/store.SessionPermissionPolicy` for lineage constraints, +- `internal/skills.Registry` for skill listing/content, +- `internal/toolruntime` for subprocess ownership if extension tools need process handles, +- `internal/api/contract` for shared HTTP/UDS payloads. + +Avoid a large generic `internal/catalog` at first unless it only coordinates cross-domain search. The runtime tool registry belongs in or near `internal/tools`; a catalog facade can compose tools and skills for `agh__tool_*` / `agh__skill_*`. + +## Proposed MVP Scope + +### In scope + +- Runtime tool registry contract and central dispatch pipeline. +- Built-in provider for `agh__tool_list`, `agh__tool_search`, `agh__tool_info`, `agh__skill_list`, `agh__skill_search`, `agh__skill_view`, `agh__network_peers`, `agh__network_send`, and a bounded `agh__task_*` set. +- Resource-backed descriptors from existing `tool` records. +- Extension manifest backend metadata for future executable extension tools. +- MCP adapter design, even if full MCP call-through is deferred. +- Context-specific list/search/info APIs. +- Tool policy with allow/deny patterns and named toolsets. +- Availability model and reason codes. +- Hook integration for pre/post/error. +- Telemetry events for list/search/info/call and failures. +- CLI/HTTP/UDS contract surfaces. + +### Out of scope for MVP + +- Full shell/browser/file tool replacement for ACP runtimes. +- Provider-specific Anthropic `tool_reference` integration. +- In-process third-party extension handlers. +- Large catalog of AGH-native tools beyond the selected catalog/skill/network/task set. +- Skill install/remove/update tools unless explicitly paired with supply-chain policy/scanning work. +- Network peer remote tool execution. +- Marketplace signing/trust overhaul, except for explicit risk hooks needed by extension tools. + +## Critical Decisions Before TechSpec + +1. Extension execution boundary: out-of-process only, metadata-only first, or trusted in-process handlers. +2. Session exposure path: hosted MCP, direct ACP extension, CLI/UDS fallback, or all in phases. +3. Package boundary: runtime registry in `internal/tools` with a catalog facade, or a new `internal/catalog` owning tools and skills together. +4. MVP tool set: only list/search/info/view, or include mutating install/network/task tools. +5. Policy defaults: external tools disabled, ask, or visible-but-not-callable until granted. +6. Availability visibility: hide unavailable tools from agents, show unavailable tools with reasons, or configurable by surface. +7. Naming/collision policy: accepted in ADR-007. Use one canonical provider-safe `ToolID` with reserved `__` namespace separators, display-only title, structured provenance, and no shadowing or silent sanitized-name collisions. + +## Competitor Pattern Matrix + +| Pattern | Hermes | Claude Code | GoClaw | OpenClaw | AGH Recommendation | +|---|---:|---:|---:|---:|---| +| Single normalized tool contract | Yes | Yes | Yes | Yes | Required | +| Runtime executable registry | Yes | Distributed | Yes | Yes | Required | +| Manifest-first extension discovery | Partial | Plugin metadata | Partial | Strong | Required | +| MCP as adapter | Yes | Strong | Yes | Strong | Required | +| Availability gating | Strong discovery | `isEnabled` + MCP state | Policy/lazy checks | Lifecycle state | Required at discovery and dispatch | +| Central dispatch | Mostly | Strong | Mostly | Gateway + adapters | Required with no bypass | +| Toolsets/bundles | Strong | Policy lists | Groups | Policy groups | Required | +| Deferred search | Partial | Strong | Search helpers | Partial | Provider-neutral MVP | +| Concurrency metadata | Partial | Strong | Partial | Partial | Required metadata, scheduling can evolve | +| Extension in-process handlers | Yes | No native direct tools | Some | Plugin API | Avoid for MVP | + +## Implementation Shape to Explore in TechSpec + +```go +type Descriptor struct { + ID ToolID + DisplayName string + Description string + InputSchema json.RawMessage + OutputSchema json.RawMessage + Source SourceRef + Visibility Visibility + Risk RiskClass + ReadOnly bool + Destructive bool + OpenWorld bool + ConcurrencySafe bool + MaxResultBytes int64 + Toolsets []string + Tags []string +} + +type Handle interface { + Descriptor() Descriptor + Availability(ctx context.Context, call ToolContext) Availability + Call(ctx context.Context, call ToolCall) (ToolResult, error) +} + +type Provider interface { + ID() string + ListTools(ctx context.Context) ([]Descriptor, error) + Resolve(ctx context.Context, id ToolID) (Handle, bool, error) +} + +type Registry interface { + List(ctx context.Context, scope Scope) ([]ToolView, error) + Search(ctx context.Context, scope Scope, query SearchQuery) ([]ToolView, error) + Get(ctx context.Context, scope Scope, id ToolID) (ToolView, error) + Call(ctx context.Context, scope Scope, req CallRequest) (ToolResult, error) +} +``` + +The exact Go shape can change, but the separation should hold. + +## Risks + +If AGH exposes tool invocation before policy and availability are in place, it will create a broader attack surface than today's ACP-delegated tools. + +If extension tools can run in-process, one bad extension can compromise the daemon. + +If the registry only lists resources but does not dispatch, AGH will still lack the "last mile" that motivated the feature. + +If the registry only works through one surface, agents will not be able to manage it consistently. + +If name collisions are postponed, extension/MCP adoption will force a breaking change later. + +## Evidence Index + +- AGH current state: `analysis_agh_current_state.md`. +- Hermes reference: `analysis_hermes.md`. +- Claude Code reference: `analysis_claude-code.md`. +- GoClaw reference: `analysis_goclaw.md`. +- OpenClaw reference: `analysis_openclaw.md`. +- Local Claude Code ideas: `analysis_claude_code_ideas.md`. +- Prior autonomy gaps: `.compozy/tasks/autonomous/analysis/analysis_skills_tools_registry.md`. +- Security constraints: `.compozy/tasks/hermes/analysis/analysis_tools_security.md`. diff --git a/.compozy/tasks/tools-registry/qa/peer-review-prompt-round2.md b/.compozy/tasks/tools-registry/qa/peer-review-prompt-round2.md new file mode 100644 index 000000000..a1099f32e --- /dev/null +++ b/.compozy/tasks/tools-registry/qa/peer-review-prompt-round2.md @@ -0,0 +1,84 @@ +You are an architecture reviewer pressure-testing an AGH TechSpec authored by another LLM. +The spec ships into a greenfield-alpha codebase with zero production users; bias toward +simpler, deletable solutions over compatibility shims. + +CONTEXT FILES TO READ: +- TechSpec: /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md +- ADRs: + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-001-extension-tool-execution-boundary.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-002-session-tool-exposure-path.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-003-runtime-registry-package-boundary.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-004-mvp-native-tool-scope.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-005-acp-approval-policy-integration.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-006-tool-visibility-by-surface.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-007-canonical-tool-id-format.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-009-public-go-extension-tool-sdk.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-010-remote-mcp-call-through.md +- Research: + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_acp_tool_registry_compatibility.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_claude-code.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_claude_code_ideas.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_goclaw.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_hermes.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_openclaw.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/synthesis.md +- Architecture rules: + - /Users/pedronauck/Dev/compozy/agh/CLAUDE.md + - /Users/pedronauck/Dev/compozy/agh/internal/CLAUDE.md +- Lessons: /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/ + +REVIEW FOCUS: +- This is round 2 after the spec was corrected from descriptor-only extension/MCP tools to executable `native_go`, executable `extension_host`, and executable `mcp` backends. +- Pressure-test whether TypeScript and Go extension function authoring is concrete enough for implementation without in-process third-party code. +- Pressure-test whether remote MCP call-through correctly reuses existing MCP config/auth and does not create a parallel token lifecycle. +- Pressure-test whether mutating/destructive/open-world extension/MCP tools are sufficiently gated by source policy, ACP ceiling, approval, session lineage, and hooks. + +YOUR JOB: +1. Read every context file fully before reasoning. +2. Identify BLOCKERS (issues that prevent approval): unsound concurrency, missing migration paths, + under-specified safety invariants, parallel-queue creation, hooks tailing event tables, hidden + coupling to deferred features, security regressions (raw claim_token leakage, unverified-format + identity classification), schema-without-migration, partial-surface completion (CLI/HTTP only, + UDS/docs/codegen later), test-shape violations baked into the plan. +3. Identify NITS (non-blocking improvements): clarity, naming, test-density, observability event + coverage, doc co-ship completeness. +4. Issue a READINESS verdict: READY / BLOCKED / NEEDS_REWORK. + +CONSTRAINTS: +- Greenfield: prefer "delete the old thing" over "preserve compat". +- Hard cuts only: any rename touches code, storage, APIs, CLI, extensions, specs, RFCs, + and .compozy/tasks/* artifacts in the same change. +- task_runs is the single durable queue. Reject any parallel queue. +- ClaimNextRun is the only authoritative claim primitive. Reject any peer claimer. +- Manual operator paths converge with autonomous on the same primitives. +- Hooks dispatch at the call site; never tail event tables. +- claim_token (raw) never crosses transport, channel, log, or memory. +- Generated artifacts co-ship with source change in same PR. +- Subagents are read-only. + +OUTPUT FORMAT (strict JSON): +{ + "blockers": [ + { + "id": "B-NNN", + "section": "", + "issue": "", + "rationale": "", + "suggested_fix": "" + } + ], + "nits": [ + { + "id": "N-NNN", + "section": "", + "issue": "", + "suggested_fix": "" + } + ], + "readiness": "READY|BLOCKED|NEEDS_REWORK", + "summary": "" +} + +Do not output anything outside the JSON object. Do not soften criticism. diff --git a/.compozy/tasks/tools-registry/qa/peer-review-prompt.md b/.compozy/tasks/tools-registry/qa/peer-review-prompt.md new file mode 100644 index 000000000..021055c06 --- /dev/null +++ b/.compozy/tasks/tools-registry/qa/peer-review-prompt.md @@ -0,0 +1,75 @@ +You are an architecture reviewer pressure-testing an AGH TechSpec authored by another LLM. +The spec ships into a greenfield-alpha codebase with zero production users; bias toward +simpler, deletable solutions over compatibility shims. + +CONTEXT FILES TO READ: +- TechSpec: /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md +- ADRs: + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-001-extension-tool-execution-boundary.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-002-session-tool-exposure-path.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-003-runtime-registry-package-boundary.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-004-mvp-native-tool-scope.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-005-acp-approval-policy-integration.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-006-tool-visibility-by-surface.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-007-canonical-tool-id-format.md +- Research: + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_acp_tool_registry_compatibility.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_claude-code.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_claude_code_ideas.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_goclaw.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_hermes.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_openclaw.md + - /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/synthesis.md +- Architecture rules: + - /Users/pedronauck/Dev/compozy/agh/CLAUDE.md + - /Users/pedronauck/Dev/compozy/agh/internal/CLAUDE.md +- Lessons: /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/ + +YOUR JOB: +1. Read every context file fully before reasoning. +2. Identify BLOCKERS (issues that prevent approval): unsound concurrency, missing migration paths, + under-specified safety invariants, parallel-queue creation, hooks tailing event tables, hidden + coupling to deferred features, security regressions (raw claim_token leakage, unverified-format + identity classification), schema-without-migration, partial-surface completion (CLI/HTTP only, + UDS/docs/codegen later), test-shape violations baked into the plan. +3. Identify NITS (non-blocking improvements): clarity, naming, test-density, observability event + coverage, doc co-ship completeness. +4. Issue a READINESS verdict: READY / BLOCKED / NEEDS_REWORK. + +CONSTRAINTS: +- Greenfield: prefer "delete the old thing" over "preserve compat". +- Hard cuts only: any rename touches code, storage, APIs, CLI, extensions, specs, RFCs, + and .compozy/tasks/* artifacts in the same change. +- task_runs is the single durable queue. Reject any parallel queue. +- ClaimNextRun is the only authoritative claim primitive. Reject any peer claimer. +- Manual operator paths converge with autonomous on the same primitives. +- Hooks dispatch at the call site; never tail event tables. +- claim_token (raw) never crosses transport, channel, log, or memory. +- Generated artifacts co-ship with source change in same PR. +- Subagents are read-only. + +OUTPUT FORMAT (strict JSON): +{ + "blockers": [ + { + "id": "B-NNN", + "section": "", + "issue": "", + "rationale": "", + "suggested_fix": "" + } + ], + "nits": [ + { + "id": "N-NNN", + "section": "", + "issue": "", + "suggested_fix": "" + } + ], + "readiness": "READY|BLOCKED|NEEDS_REWORK", + "summary": "" +} + +Do not output anything outside the JSON object. Do not soften criticism. diff --git a/.compozy/tasks/tools-registry/qa/peer-review-result-round2.err b/.compozy/tasks/tools-registry/qa/peer-review-result-round2.err new file mode 100644 index 000000000..e69de29bb diff --git a/.compozy/tasks/tools-registry/qa/peer-review-result-round2.json b/.compozy/tasks/tools-registry/qa/peer-review-result-round2.json new file mode 100644 index 000000000..28cd4dda7 --- /dev/null +++ b/.compozy/tasks/tools-registry/qa/peer-review-result-round2.json @@ -0,0 +1,408 @@ +{"type":"run.started","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:35.724535Z","status":"running","usage":{}} +{"type":"session.attached","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:39.688198Z","session":{"acp_session_id":"596f6a7f-444c-4dc6-82c6-c9f17f26bcd1"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:39.688557Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:42.912712Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:43.253712Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01XZAnAYirZ9mppmDzJPQ8X9","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01XZAnAYirZ9mppmDzJPQ8X9","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:44.154947Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01XZAnAYirZ9mppmDzJPQ8X9","blocks":[{"type":"tool_use","id":"toolu_01XZAnAYirZ9mppmDzJPQ8X9","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:44.155097Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01QnnCfEBCxGd9w7icUeQjn2","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01QnnCfEBCxGd9w7icUeQjn2","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:44.871368Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01QnnCfEBCxGd9w7icUeQjn2","blocks":[{"type":"tool_use","id":"toolu_01QnnCfEBCxGd9w7icUeQjn2","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-001-extension-tool-execution-boundary.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-001-extension-tool-execution-boundary.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-001-extension-tool-execution-boundary.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:44.871655Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01QEunTAMSBrPGkfzNGPU5sK","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01QEunTAMSBrPGkfzNGPU5sK","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:45.578239Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01QEunTAMSBrPGkfzNGPU5sK","blocks":[{"type":"tool_use","id":"toolu_01QEunTAMSBrPGkfzNGPU5sK","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-002-session-tool-exposure-path.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-002-session-tool-exposure-path.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-002-session-tool-exposure-path.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:45.578326Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01Wn98aR8jsYueoDzYWZoDY8","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01Wn98aR8jsYueoDzYWZoDY8","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:46.230849Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Wn98aR8jsYueoDzYWZoDY8","blocks":[{"type":"tool_use","id":"toolu_01Wn98aR8jsYueoDzYWZoDY8","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-003-runtime-registry-package-boundary.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-003-runtime-registry-package-boundary.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-003-runtime-registry-package-boundary.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:46.231035Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01MBywGZmnoRTjhM2eByZF53","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01MBywGZmnoRTjhM2eByZF53","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:46.709741Z","update":{"kind":"tool_call_started","toolCallId":"toolu_015h7PbqqRrN5mfoqkrVPbLF","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_015h7PbqqRrN5mfoqkrVPbLF","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:46.709871Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01MBywGZmnoRTjhM2eByZF53","blocks":[{"type":"tool_use","id":"toolu_01MBywGZmnoRTjhM2eByZF53","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-004-mvp-native-tool-scope.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-004-mvp-native-tool-scope.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-004-mvp-native-tool-scope.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:47.194948Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_015h7PbqqRrN5mfoqkrVPbLF","blocks":[{"type":"tool_use","id":"toolu_015h7PbqqRrN5mfoqkrVPbLF","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-005-acp-approval-policy-integration.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-005-acp-approval-policy-integration.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-005-acp-approval-policy-integration.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:47.246102Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:47.256201Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01MBywGZmnoRTjhM2eByZF53","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:47.256326Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_015h7PbqqRrN5mfoqkrVPbLF","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:47.256358Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01QEunTAMSBrPGkfzNGPU5sK","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:47.256517Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Wn98aR8jsYueoDzYWZoDY8","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:47.256763Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01QnnCfEBCxGd9w7icUeQjn2","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:47.259526Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01MBywGZmnoRTjhM2eByZF53","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01MBywGZmnoRTjhM2eByZF53","content":"```\n1\t# ADR-004: MVP Tool Scope\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Context\n8\t\n9\tThe Tool Registry MVP must prove more than metadata listing. It must demonstrate that AGH-owned tools can be discovered, authorized, invoked through the hosted MCP surface, called through CLI/HTTP/UDS contracts, routed through one dispatch pipeline, and observed.\n10\t\n11\tThe smallest read-only bootstrap toolset would prove catalog mechanics, but it would not prove meaningful autonomy. AGH's product direction requires agents to manage coordination and task flows, so network and task tools should be represented in the first implementation slice.\n12\t\n13\tAt the same time, skill installation is a separate supply-chain surface. It requires stricter install policy, trust tiers, approval flows, and scanner decisions. It should not be bundled into the first registry execution proof unless the supply-chain work is explicitly scoped.\n14\t\n15\t## Decision\n16\t\n17\tThe MVP tool scope includes four executable groups:\n18\t\n19\t1. Built-in `native_go` tools for AGH catalog, skill, network, and bounded task operations.\n20\t2. Installed extension-host tools implemented through TypeScript or Go subprocess SDKs.\n21\t3. Remote/local MCP tools discovered from existing MCP config/resource sources and called through daemon-owned MCP clients.\n22\t4. The AGH-hosted MCP exposure proxy that presents the effective session projection for all callable groups.\n23\t\n24\tThe built-in `native_go` scope includes:\n25\t\n26\t- `agh__tool_list`\n27\t- `agh__tool_search`\n28\t- `agh__tool_info`\n29\t- `agh__skill_list`\n30\t- `agh__skill_search`\n31\t- `agh__skill_view`\n32\t- `agh__network_peers`\n33\t- `agh__network_send`\n34\t- `agh__task_list`\n35\t- `agh__task_read`\n36\t- `agh__task_create`\n37\t- `agh__task_child_create`\n38\t- `agh__task_update`\n39\t- `agh__task_cancel`\n40\t- `agh__task_run_list`\n41\t\n42\tClaim/release/complete/fail/run-start task operations are excluded from this MVP because they cross claim-token, lease, spawn, and session lifecycle authority. They require a separate task execution TechSpec.\n43\t\n44\tSkill install/remove/update tools are not included in the MVP native tool scope unless a later decision explicitly adds the required supply-chain and approval work.\n45\t\n46\tThe MVP must also include executable proof fixtures:\n47\t\n48\t- a TypeScript extension defining at least one read-only tool and one mutating tool through `extension.tool(...)`;\n49\t- a Go subprocess extension defining equivalent tools through the public Go extension SDK;\n50\t- an MCP test server with read-only and mutating tools, auth status coverage, and remote call-through.\n51\t\n52\t## Consequences\n53\t\n54\tThe MVP must include both read-only and mutating tools. The registry must model risk, read-only/destructive/open-world flags, permission checks, and policy gates from the first implementation.\n55\t\n56\tNative, extension-host, and MCP tools must use the same registry dispatch path as catalog tools. They must not call around policy, availability, hooks, result budgeting, auth redaction, or telemetry.\n57\t\n58\tThe hosted MCP server must expose only the tool subset authorized for the session. Agent-visible discovery must not advertise network/task tools to sessions that lack the required permission atoms.\n59\t\n60\tQA must include real scenario coverage for:\n61\t\n62\t- listing and searching tools,\n63\t- viewing a skill body through `agh__skill_view`,\n64\t- listing peers,\n65\t- sending a network message through `agh__network_send` with permission enforcement,\n66\t- creating/updating or otherwise exercising the bounded task tool set,\n67\t- invoking a TypeScript extension-host tool through CLI/HTTP/UDS/hosted MCP,\n68\t- invoking a Go SDK extension-host tool through CLI/HTTP/UDS/hosted MCP,\n69\t- invoking a remote MCP-backed tool through CLI/HTTP/UDS/hosted MCP,\n70\t- proving unauthorized sessions cannot see or call mutating/destructive tools.\n71\t\n72\t## Rejected Alternatives\n73\t\n74\t### Read-only bootstrap only\n75\t\n76\tThis would be safer and simpler, but it would leave the registry unproven for AGH's coordination and autonomy use cases.\n77\t\n78\t### Bootstrap plus skill install\n79\t\n80\tThis would improve agent self-service, but it introduces supply-chain risk that belongs in a dedicated policy/scanning/install decision.\n81\t\n82\t### Foundation only\n83\t\n84\tThis would create the architecture without proving the agent-first experience that motivated the Tool Registry work.\n85\t\n86\t## Evidence\n87\t\n88\t- `.compozy/tasks/tools-registry/analysis/synthesis.md`: recommends a small bootstrap set and identifies network/task tools as later groups.\n89\t- `.compozy/tasks/autonomous/analysis/analysis_skills_tools_registry.md`: prior gaps identify network and task tools as strategically important agent-callable surfaces.\n90\t- `.compozy/tasks/hermes/analysis/analysis_tools_security.md`: mutating and open-world tools require stronger permission and security gates before broad exposure.\n91\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:47.259721Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01QEunTAMSBrPGkfzNGPU5sK","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01QEunTAMSBrPGkfzNGPU5sK","content":"```\n1\t# ADR-002: Session Tool Exposure Path\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Context\n8\t\n9\tThe Tool Registry must be agent-manageable, not only an internal daemon API. AGH manages external ACP-compatible runtimes, so it cannot assume direct control over every provider's internal model API tool loop.\n10\t\n11\tThe registry still needs a model-visible path for session-callable tools such as `agh__tool_search`, `agh__skill_view`, `ext__linear__search`, and `mcp__github__create_issue`. Competitor research suggests MCP is the most portable first boundary:\n12\t\n13\t- Claude Code and other runtimes already understand MCP tools.\n14\t- MCP keeps tool exposure protocol-based rather than driver-specific.\n15\t- MCP can be backed by the same registry dispatch path as CLI, HTTP, and UDS.\n16\t- Runtimes without MCP can still reach AGH through CLI/UDS fallback surfaces.\n17\t\n18\t## Decision\n19\t\n20\tThe MVP will expose session-callable registry tools through an AGH-hosted local MCP server plus shared CLI, HTTP, and UDS contracts.\n21\t\n22\tThe daemon will own one registry contract and dispatch pipeline. Surfaces call into that same pipeline:\n23\t\n24\t- hosted MCP server for model-visible AGH, extension-host, and MCP-backed tools in runtimes that support MCP,\n25\t- CLI commands for operator and agent fallback use,\n26\t- HTTP API for web/operator clients,\n27\t- UDS API for local trusted clients and internal AGH tools.\n28\t\n29\tDirect driver/ACP injection can be added later as an optimization for runtimes that support it, but it is not the MVP exposure path.\n30\t\n31\t## Consequences\n32\t\n33\tEvery session-callable tool must be representable as an MCP tool without losing policy, availability, hook, telemetry, source provenance, auth redaction, approval, and result-budget behavior.\n34\t\n35\tThe registry contract types must be shared below all surfaces rather than copied into each transport.\n36\t\n37\tSession start should be able to attach the hosted AGH MCP server for agents whose runtime supports MCP. Agents/runtimes without MCP remain supported through CLI/UDS fallback.\n38\t\n39\tThe hosted MCP server is not an execution backend. It is an exposure transport. `tools/list` returns the effective session projection, and every `tools/call` re-enters `internal/tools.Registry.Call`, which resolves `native_go`, `extension_host`, or `mcp` handles and revalidates policy at dispatch time.\n40\t\n41\tThe TechSpec must define how the hosted MCP server is authorized, scoped to the session/workspace, and prevented from bypassing registry dispatch.\n42\t\n43\tLive catalog deltas can be designed as a later driver capability. The MVP can refresh the hosted MCP server's tool list and expose search/list tools through the registry.\n44\t\n45\t## Rejected Alternatives\n46\t\n47\t### CLI/UDS only\n48\t\n49\tThis would be simpler and still agent-operable through terminal tools, but it would not provide native tool calls for runtimes with MCP support and would leave the main \"last mile\" gap partially open.\n50\t\n51\t### Direct ACP/driver injection first\n52\t\n53\tThis could be cleaner for a single provider, but it is less portable and would force the first implementation into provider-specific behavior.\n54\t\n55\t### HTTP/UDS only\n56\t\n57\tThis would build the management API but delay the session-visible tool surface, making the foundation less useful to autonomous agents.\n58\t\n59\t## Evidence\n60\t\n61\t- `.compozy/tasks/tools-registry/analysis/analysis_claude-code.md`: MCP tools are adapted into the same local tool contract and refreshed dynamically.\n62\t- `.compozy/tasks/tools-registry/analysis/analysis_openclaw.md`: MCP is a provider backend for plugin/bundle tools.\n63\t- `.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md`: AGH already resolves MCP sidecars and has CLI/HTTP/UDS-style management surfaces elsewhere.\n64\t- `.compozy/tasks/tools-registry/analysis/analysis_claude_code_ideas.md`: AGH should avoid assuming direct LLM API control while still exposing AGH-owned tools through provider-neutral surfaces.\n65\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:47.259883Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Wn98aR8jsYueoDzYWZoDY8","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01Wn98aR8jsYueoDzYWZoDY8","content":"```\n1\t# ADR-003: Runtime Registry Package Boundary\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Context\n8\t\n9\tAGH already has `internal/tools`, but it currently defines only metadata records and a list-only provider interface. The Tool Registry foundation needs runtime concerns: executable handles, provider adapters, availability, policy projection, dispatch, hooks, telemetry, result budgeting, and hosted MCP exposure.\n10\t\n11\tThe registry also needs to compose with skills for agent-facing discovery operations such as `agh__tool_search`, `agh__skill_list`, and `agh__skill_view`. That creates a package-boundary question: should the runtime registry live in `internal/tools`, in a new broad `internal/catalog`, or in daemon orchestration code?\n12\t\n13\t## Decision\n14\t\n15\t`internal/tools` will own the runtime Tool Registry contracts and execution path.\n16\t\n17\tThe package will evolve from metadata-only records into the home for:\n18\t\n19\t- tool descriptors and source/provenance types,\n20\t- executable handles and providers,\n21\t- availability and reason codes,\n22\t- policy projection for tool views,\n23\t- central `Registry.Call` dispatch,\n24\t- result normalization and result-budget metadata,\n25\t- provider adapters for `native_go`, `extension_host`, `mcp`, and future bridges.\n26\t\n27\tA thin `internal/catalog` facade may compose `internal/tools` and `internal/skills` for cross-domain list/search/view surfaces and for AGH-native tools such as `agh__tool_search` and `agh__skill_view`.\n28\t\n29\tThe daemon composition root will wire providers and dependencies, but it will not own registry logic.\n30\t\n31\t## Consequences\n32\t\n33\tTool execution remains in the tool domain instead of a broad catalog domain. This keeps dispatch, policy, availability, and telemetry testable without booting the daemon.\n34\t\n35\t`internal/catalog` stays small and read-oriented. It coordinates cross-domain discovery and progressive disclosure, but does not become a second execution engine.\n36\t\n37\tExisting `internal/tools.Tool` resource compatibility must be handled through clear type splits rather than by adding runtime function fields to resource records. The TechSpec must distinguish cold resource specs from runtime descriptors and handles:\n38\t\n39\t- cold `tool` resources describe desired state and provenance;\n40\t- runtime `Descriptor` values normalize policy, schema, source, backend, and risk metadata;\n41\t- runtime `Handle` values execute through Go function closures, extension subprocess calls, or MCP client calls.\n42\t\n43\tDaemon boot must register providers explicitly, following AGH's composition-root discipline.\n44\t\n45\t`internal/tools` must not import `internal/extension`, `internal/mcp`, `internal/api/*`, `internal/cli`, or `internal/daemon`. Those adapters are injected by daemon wiring behind interfaces defined in `internal/tools`.\n46\t\n47\t## Rejected Alternatives\n48\t\n49\t### New `internal/catalog` owns everything\n50\t\n51\tThis would centralize skills and tools, but it would also mix skill content loading, tool dispatch, extension adapter logic, search, and policy into one large package too early.\n52\t\n53\t### Daemon-owned registry logic\n54\t\n55\tKeeping registry logic inside daemon orchestration would reduce package churn, but it would make dispatch harder to test and would couple tool semantics to boot wiring.\n56\t\n57\t## Evidence\n58\t\n59\t- `.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md`: `internal/tools` is currently metadata-only while daemon boot already owns resource projection.\n60\t- `.compozy/tasks/tools-registry/analysis/analysis_goclaw.md`: GoClaw keeps executable tool contracts and policy close to the tools package.\n61\t- `.compozy/tasks/tools-registry/analysis/synthesis.md`: recommends `internal/tools` for runtime contracts and a thin catalog facade for cross-skill/tool search.\n62\t- `internal/CLAUDE.md`: AGH favors interfaces where consumed and composition-root wiring over daemon package logic accumulation.\n63\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:47.260066Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01QnnCfEBCxGd9w7icUeQjn2","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01QnnCfEBCxGd9w7icUeQjn2","content":"```\n1\t# ADR-001: Extension Tool Execution Boundary\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Date\n8\t\n9\t2026-04-28\n10\t\n11\t## Context\n12\t\n13\tAGH's Tool Registry must let operators and agents create real tools through the same extension model AGH already uses for hooks, memory backends, bridge adapters, and subprocess-hosted capabilities.\n14\t\n15\tThe previous interpretation of this ADR kept extension tools descriptor-only in the MVP. That is insufficient: it lets AGH list extension tools but does not let a TypeScript or Go extension author define a callable tool. It also conflicts with the current extension runtime, which already supports subprocess JSON-RPC, capability negotiation, Host API grants, health checks, service methods, and TypeScript `Extension.handle(...)` handlers.\n16\t\n17\tThe registry still must preserve daemon safety. Third-party extension code must not run in-process inside the daemon. Cold resource records must not persist function pointers or dynamic handler state.\n18\t\n19\t## Decision\n20\t\n21\tThe Tool Registry MVP supports three executable backend classes:\n22\t\n23\t- `native_go`: in-process Go function tools compiled into the daemon and registered by first-party/built-in providers at the daemon composition root.\n24\t- `extension_host`: out-of-process tools implemented by installed extensions through the existing subprocess JSON-RPC extension runtime.\n25\t- `mcp`: remote or local MCP tools called through daemon-owned MCP client adapters that consume existing MCP config and `internal/mcp/auth` redacted credential state.\n26\t\n27\tExtension-host tools are first-class executable tools in the MVP. An extension declares `capabilities.provides = [\"tool.provider\"]`, publishes manifest-authoritative `resources.tools` descriptors, and implements the negotiated `tools/call` service method. The daemon only dispatches to an extension tool when the extension is enabled, active, healthy, negotiated `tool.provider`, implements `tools/call`, and passes registry policy, source grants, availability, hooks, and session lineage checks.\n28\t\n29\tThird-party extension code never runs in the daemon process. Third-party Go tools use a subprocess Go SDK, not Go `plugin`, cgo-loaded shared libraries, reflection injection, or manifest function pointers.\n30\t\n31\t## Alternatives Considered\n32\t\n33\t### Descriptor-only extension tools\n34\t\n35\t- **Description**: Extension manifests publish tool metadata, but calls return unavailable until a later TechSpec implements backend dispatch.\n36\t- **Pros**: Smaller MVP and easier security review.\n37\t- **Cons**: Fails the product requirement that extensions can create tools. Leaves TypeScript and Go extension authors with no executable tool path.\n38\t- **Why rejected**: The user explicitly rejected this as too weak, and subagent/codebase research confirmed existing extension runtime can support executable subprocess handlers.\n39\t\n40\t### In-process third-party handlers\n41\t\n42\t- **Description**: Let trusted extensions register Go or JavaScript handlers directly in the daemon process.\n43\t- **Pros**: Low call latency and a simple function-call programming model.\n44\t- **Cons**: Expands daemon compromise risk, creates a plugin ABI, complicates lifecycle isolation, and pressures AGH toward unsafe `plugin`/reflection behavior.\n45\t- **Why rejected**: Built-ins can use `native_go`; third-party extension code must cross a process/protocol boundary.\n46\t\n47\t### TypeScript-only extension tools\n48\t\n49\t- **Description**: Support executable TypeScript extension tools first; defer Go subprocess authoring.\n50\t- **Pros**: Reuses the existing TypeScript SDK quickly.\n51\t- **Cons**: Does not satisfy the requirement that extensions define tools using Go functions or TypeScript.\n52\t- **Why rejected**: The MVP must include a public Go subprocess SDK for tool providers.\n53\t\n54\t## Consequences\n55\t\n56\t### Positive\n57\t\n58\t- Extension tools become real executable capabilities in the MVP.\n59\t- Built-in Go tools and third-party extension tools share one registry, policy path, hosted MCP exposure path, hook path, telemetry path, and result-budget path.\n60\t- The design reuses AGH's existing extension runtime instead of inventing a descriptor-only side channel.\n61\t- Remote MCP tools become governed by AGH policy and observability instead of living in provider-private tool universes.\n62\t\n63\t### Negative\n64\t\n65\t- MVP scope grows: registry execution, extension protocol, TypeScript SDK, Go SDK, MCP client call-through, redaction, and E2E coverage must ship together.\n66\t- Remote MCP call-through adds auth, transport, timeout, and result-normalization complexity.\n67\t- Extension descriptor/runtime reconciliation needs strict validation to avoid mismatch bugs.\n68\t\n69\t### Risks\n70\t\n71\t- Misclassified mutating tools could bypass approval expectations. Mitigation: descriptor validation, source grants, policy matrix tests, and dispatch-time revalidation.\n72\t- Extension tools could leak secrets through results or errors. Mitigation: central redaction and result limiting before crossing CLI, HTTP, UDS, MCP, SSE, logs, or events.\n73\t- MCP OAuth tokens could leak through registry views. Mitigation: consume only redacted `internal/mcp/auth` status and never copy token material into descriptors, resources, events, or responses.\n74\t\n75\t## Implementation Notes\n76\t\n77\t- `internal/tools` owns `native_go`, `extension_host`, and `mcp` backend contracts.\n78\t- `internal/daemon` wires first-party Go function providers and injects extension/MCP adapters.\n79\t- `internal/extension/protocol` adds `CapabilityProvideToolProvider = \"tool.provider\"` and `ExtensionServiceMethodToolsCall = \"tools/call\"`.\n80\t- `@agh/extension-sdk` adds `extension.tool(descriptor, handler)`.\n81\t- A new public Go subprocess SDK adds the equivalent Go helper for defining tools with Go functions.\n82\t- Hosted MCP remains the session exposure transport, but every call enters `internal/tools.Registry.Call`.\n83\t\n84\t## References\n85\t\n86\t- `.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md`\n87\t- `.compozy/tasks/tools-registry/analysis/synthesis.md`\n88\t- `internal/extension/manager.go`\n89\t- `internal/extension/protocol/host_api.go`\n90\t- `sdk/typescript/src/extension.ts`\n91\t- `internal/subprocess/handshake.go`\n92\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:47.260258Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_015h7PbqqRrN5mfoqkrVPbLF","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_015h7PbqqRrN5mfoqkrVPbLF","content":"```\n1\t# ADR-005: ACP Approval Policy Integration\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Context\n8\t\n9\tAGH already has a system-level ACP tool approval policy exposed in settings and enforced by the ACP tool host:\n10\t\n11\t- `deny-all`\n12\t- `approve-reads`\n13\t- `approve-all`\n14\t\n15\tThe Tool Registry will add per-tool metadata and policy, including read-only, destructive, open-world, source, toolset, session permission atoms, and extension/MCP grants. If this registry policy is designed as a parallel approval system, AGH can produce contradictory states such as:\n16\t\n17\t- system policy says `deny-all`, but a tool-level policy says allow;\n18\t- system policy says `approve-reads`, but a mutating tool claims read-only;\n19\t- system policy says `approve-all`, but a session/agent intentionally narrows permissions;\n20\t- hosted MCP exposes a tool the ACP host would later block.\n21\t\n22\tThe registry policy must integrate with the existing ACP policy rather than bypass it.\n23\t\n24\t## Decision\n25\t\n26\tThe ACP `permissions.mode` policy is the system approval ceiling for session-visible tool execution.\n27\t\n28\tTool Registry policy operates below that ceiling as a more granular filter. It can narrow, classify, require approval, or deny a tool, but it cannot silently grant more authority than the effective system/session ACP policy allows.\n29\t\n30\tThe effective decision order is:\n31\t\n32\t1. Resolve the system/session ACP approval mode.\n33\t2. Resolve agent/session lineage tool permission atoms.\n34\t3. Resolve registry visibility and allow/deny/toolset policy.\n35\t4. Resolve source/risk defaults for built-in, extension, MCP, and dynamic tools.\n36\t5. Resolve tool descriptor risk flags: read-only, destructive, open-world, requires interaction.\n37\t6. Run availability checks.\n38\t7. Run pre-call hooks.\n39\t8. Dispatch only if the combined decision is allowed or explicitly approved.\n40\t\n41\t`approve-all` is permissive but not a bypass of explicit denies. It removes automatic ACP prompting for allowed tools, but registry deny rules, unavailable state, session lineage restrictions, hooks, and source grants still apply.\n42\t\n43\t`approve-reads` auto-approves only tools classified as read-only by the registry and allowed by session policy. Mutating, destructive, open-world, network-send, task-write, extension-host write, and MCP write tools require explicit allow by `ToolID`, toolset, or source policy plus approval when the effective approval mode requires it.\n44\t\n45\t`deny-all` denies by default. The registry may still list operator-visible tools with reasons, but session-visible execution requires explicit approval/allowance through the existing ACP approval path or an equivalent local approval surface.\n46\t\n47\tAgent frontmatter and session lineage can lower permissions relative to system policy. They cannot raise permissions above the system approval ceiling.\n48\t\n49\tThe MVP permits mutating, open-world, and destructive extension-host and MCP tools, but only when all gates pass:\n50\t\n51\t1. The descriptor classifies `read_only`, `destructive`, `open_world`, and `requires_interaction` correctly.\n52\t2. The source tier is allowed for the effective workspace/session.\n53\t3. The concrete `ToolID` or expanded toolset is allowed by registry/session policy.\n54\t4. ACP `permissions.mode` does not deny the call.\n55\t5. The approval bridge succeeds when approval is required.\n56\t6. The backend is available, healthy, authorized, and non-conflicted.\n57\t7. Hooks do not deny or narrow the call.\n58\t8. Dispatch revalidates all gates immediately before execution.\n59\t\n60\t## Consequences\n61\t\n62\tThe registry must compute an `EffectiveToolDecision` instead of storing a single boolean. The decision should include:\n63\t\n64\t- system approval mode,\n65\t- session/agent policy result,\n66\t- registry policy result,\n67\t- source/risk default result,\n68\t- availability result,\n69\t- hook result,\n70\t- final visibility decision,\n71\t- final execution decision,\n72\t- user/operator-facing reason codes.\n73\t\n74\tThe hosted AGH MCP server must expose only tools allowed by the effective visibility decision for that session. Dispatch must still revalidate the effective execution decision.\n75\t\n76\tTool descriptors must classify read-only vs mutating accurately. A mutating, destructive, or open-world tool mislabeled as read-only is a correctness and security bug.\n77\t\n78\tThe web settings UI text remains true: `approve-all` auto-approves tool calls, but agents and registry policy can lower permissions. The TechSpec should clarify that \"auto-approved\" does not mean \"all registered tools are visible and executable regardless of registry policy.\"\n79\t\n80\t## Rejected Alternatives\n81\t\n82\t### Registry policy bypasses ACP policy\n83\t\n84\tThis would create inconsistent behavior between ACP-native tools, AGH-hosted MCP tools, and CLI/UDS calls. It would also make the existing settings UI misleading.\n85\t\n86\t### Registry policy replaces ACP policy\n87\t\n88\tThis would require redesigning existing ACP permission handling and settings before the Tool Registry can ship. The MVP should integrate with the existing model and extend it.\n89\t\n90\t### Tool-level policy alone controls execution\n91\t\n92\tPer-tool policy is necessary but insufficient. Session lineage, system approval mode, hooks, availability, and source grants all affect whether a call is safe and authorized.\n93\t\n94\t## Evidence\n95\t\n96\t- `internal/acp/permission.go:75-132`: ACP policy defaults, path validation, and decisions for `approve-all`, `approve-reads`, and `deny-all`.\n97\t- `internal/acp/tool_host.go:64-83`: local tool host is constructed with an ACP permission mode.\n98\t- `web/src/routes/_app/settings/general.tsx:307-315`: settings UI describes the three tool approval policies.\n99\t- `.compozy/tasks/tools-registry/analysis/analysis_claude-code.md`: permission should be an ordered pipeline rather than a tool-local boolean.\n100\t- `.compozy/tasks/tools-registry/analysis/analysis_goclaw.md`: runtime grants should be rechecked at execution time.\n101\t- `.compozy/tasks/tools-registry/analysis/synthesis.md`: dispatch must recheck availability and authorization and use one central pipeline.\n102\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:47.551299Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01XZAnAYirZ9mppmDzJPQ8X9","toolCallState":"failed","blocks":[{"type":"tool_result","toolUseId":"toolu_01XZAnAYirZ9mppmDzJPQ8X9","content":"```\nFile content (29771 tokens) exceeds maximum allowed tokens (25000). Use offset and limit parameters to read specific portions of the file, or search for specific content instead of reading the whole file.\n```","isError":true}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:53.5109Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:53.511081Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01SUNdoe3ozhBDxFb7AgavhT","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01SUNdoe3ozhBDxFb7AgavhT","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:54.447657Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01SUNdoe3ozhBDxFb7AgavhT","blocks":[{"type":"tool_use","id":"toolu_01SUNdoe3ozhBDxFb7AgavhT","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md (1 - 500)","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md","limit":500,"offset":1}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:54.487054Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01RWD15GzguiQDPHd3P6bFou","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01RWD15GzguiQDPHd3P6bFou","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:55.240537Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01RWD15GzguiQDPHd3P6bFou","blocks":[{"type":"tool_use","id":"toolu_01RWD15GzguiQDPHd3P6bFou","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-006-tool-visibility-by-surface.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-006-tool-visibility-by-surface.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-006-tool-visibility-by-surface.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:55.264942Z","update":{"kind":"tool_call_started","toolCallId":"toolu_014CTiPPQp5gPsdbpT4Wvmtf","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_014CTiPPQp5gPsdbpT4Wvmtf","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:55.452058Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_014CTiPPQp5gPsdbpT4Wvmtf","blocks":[{"type":"tool_use","id":"toolu_014CTiPPQp5gPsdbpT4Wvmtf","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-007-canonical-tool-id-format.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-007-canonical-tool-id-format.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-007-canonical-tool-id-format.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:55.463356Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01Ndz4p9XpKnf5AoWr5q8Mgs","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01Ndz4p9XpKnf5AoWr5q8Mgs","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.030605Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Ndz4p9XpKnf5AoWr5q8Mgs","blocks":[{"type":"tool_use","id":"toolu_01Ndz4p9XpKnf5AoWr5q8Mgs","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.037771Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01VchKBhxaZKVNeg3o1ZYxza","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01VchKBhxaZKVNeg3o1ZYxza","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.112781Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01VchKBhxaZKVNeg3o1ZYxza","blocks":[{"type":"tool_use","id":"toolu_01VchKBhxaZKVNeg3o1ZYxza","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-009-public-go-extension-tool-sdk.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-009-public-go-extension-tool-sdk.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-009-public-go-extension-tool-sdk.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.121073Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01KaD2oCrG6emjUnpDC4EEp1","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01KaD2oCrG6emjUnpDC4EEp1","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.156099Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01KaD2oCrG6emjUnpDC4EEp1","blocks":[{"type":"tool_use","id":"toolu_01KaD2oCrG6emjUnpDC4EEp1","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-010-remote-mcp-call-through.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-010-remote-mcp-call-through.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-010-remote-mcp-call-through.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.176451Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.19216Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_014CTiPPQp5gPsdbpT4Wvmtf","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.192227Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01VchKBhxaZKVNeg3o1ZYxza","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.1924Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01KaD2oCrG6emjUnpDC4EEp1","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.192524Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01RWD15GzguiQDPHd3P6bFou","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.193501Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Ndz4p9XpKnf5AoWr5q8Mgs","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.196808Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_014CTiPPQp5gPsdbpT4Wvmtf","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_014CTiPPQp5gPsdbpT4Wvmtf","content":"```\n1\t# ADR-007: Canonical Tool ID Format\n2\t\n3\t## Status\n4\t\n5\tAccepted.\n6\t\n7\t## Context\n8\t\n9\tAGH needs one stable tool identifier that can be used across the runtime registry, policy rules, CLI, HTTP, UDS, telemetry, hooks, audit logs, and the AGH-hosted MCP surface.\n10\t\n11\tEarlier options considered dotted internal IDs such as `agh.skill.view` plus a separate MCP-safe wire alias such as `agh_skill_view`. ACP/MCP compatibility research showed this would work technically, but it creates two strong names for the same tool and increases the chance of policy, audit, telemetry, or dispatch code using the wrong identity.\n12\t\n13\tThe identifier also needs to remain compatible with provider and host constraints. MCP allows dots in tool names, but common model tool/function APIs are stricter and accept letters, digits, underscores, and hyphens with a 64-character limit. AGH should choose a lowest-common-denominator format for callable tool IDs rather than rely on a more permissive protocol layer.\n14\t\n15\tClaude Code uses the `mcp__server__tool` convention for MCP tools. This gives an explicit namespace boundary while staying inside provider-safe characters.\n16\t\n17\t## Decision\n18\t\n19\tAGH will use one canonical public `ToolID` format across every surface:\n20\t\n21\t```text\n22\t\u003csegment\u003e( \"__\" \u003csegment\u003e )*\n23\t```\n24\t\n25\tEach segment must match:\n26\t\n27\t```text\n28\t[a-z][a-z0-9_]*\n29\t```\n30\t\n31\tGlobal constraints:\n32\t\n33\t- maximum length: 64 characters;\n34\t- lowercase ASCII only;\n35\t- digits allowed after the first character of each segment;\n36\t- `_` allowed inside a segment;\n37\t- `__` is reserved exclusively as a segment separator;\n38\t- no dot;\n39\t- no hyphen;\n40\t- no uppercase;\n41\t- no empty segment;\n42\t- no leading or trailing `_` inside a segment if it would create an empty separator ambiguity.\n43\t\n44\tExamples:\n45\t\n46\t```text\n47\tagh__tool_list\n48\tagh__tool_search\n49\tagh__tool_info\n50\tagh__skill_list\n51\tagh__skill_view\n52\tagh__network_peers\n53\tagh__network_send\n54\tagh__task_list\n55\tagh__task_read\n56\text__linear__search\n57\text__linear__create_issue\n58\tmcp__github__create_issue\n59\tmcp__context7__query_docs\n60\t```\n61\t\n62\t`ToolID` is the identity used by:\n63\t\n64\t- registry descriptors;\n65\t- provider registration;\n66\t- policy allow/deny rules;\n67\t- toolsets;\n68\t- CLI commands;\n69\t- HTTP and UDS APIs;\n70\t- hooks;\n71\t- telemetry and audit logs;\n72\t- hosted MCP `Tool.name`;\n73\t- dispatch requests.\n74\t\n75\tAGH will not use a second wire alias for the same tool in the MVP. Display titles are UI-only and do not participate in policy, authorization, conflict resolution, or dispatch.\n76\t\n77\tSource/provenance remains structured metadata, not an alternate identity:\n78\t\n79\t```json\n80\t{\n81\t \"id\": \"mcp__github__create_issue\",\n82\t \"source\": {\n83\t \"kind\": \"mcp\",\n84\t \"serverName\": \"github\",\n85\t \"rawToolName\": \"create_issue\"\n86\t }\n87\t}\n88\t```\n89\t\n90\tAGH may show a shorter display title such as `Create Issue`, but the canonical ID remains `mcp__github__create_issue`.\n91\t\n92\t## Collision Rules\n93\t\n94\tRegistration and session projection must fail closed:\n95\t\n96\t- If two providers produce the same `ToolID`, the later registration is rejected or marked `conflicted`.\n97\t- If sanitizing an external MCP/server/extension tool name would collide with an existing `ToolID`, the candidate tool is marked `conflicted` and is not exposed to model-visible surfaces.\n98\t- AGH must not silently truncate, overwrite, or choose \"last writer wins\".\n99\t- Operator surfaces may show conflicted tools with reason codes and provenance.\n100\t- Session/model-visible surfaces expose only non-conflicted callable tools.\n101\t\n102\t## Consequences\n103\t\n104\tPositive:\n105\t\n106\t- One identifier works across registry, wire, policy, telemetry, and dispatch.\n107\t- No dotted-to-wire alias mapping is needed in the MVP.\n108\t- Namespace boundaries remain visible through reserved `__`.\n109\t- The format is compatible with stricter provider tool-name constraints.\n110\t- Policy patterns stay simple, for example `agh__skill_*` and `mcp__github__*`.\n111\t\n112\tTradeoffs:\n113\t\n114\t- Dotted names such as `agh.skill.view` are more visually familiar for namespace trees, but they are not provider-safe enough to use as callable IDs.\n115\t- `__` is less aesthetically clean than dots, but it avoids dual identity.\n116\t- Raw external names must be preserved in `SourceRef` for exact provenance and debugging.\n117\t\n118\t## Follow-Ups\n119\t\n120\t- The TechSpec must update all tool examples to this format.\n121\t- The registry validator must enforce the grammar.\n122\t- Extension and MCP adapters must sanitize external names deterministically and report conflicts.\n123\t- Policy matching must treat `__` as an identity segment separator and `_` as a normal segment character.\n124\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.197372Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01VchKBhxaZKVNeg3o1ZYxza","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01VchKBhxaZKVNeg3o1ZYxza","content":"```\n1\t# ADR-009: Public Go Extension Tool SDK\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Date\n8\t\n9\t2026-04-28\n10\t\n11\t## Context\n12\t\n13\tThe Tool Registry must support extension authors defining tools using Go functions or TypeScript. AGH already has a TypeScript extension SDK, but Go extension authoring currently relies on direct protocol implementation or internal packages such as `internal/bridgesdk`, which are not public extension-author surfaces.\n14\t\n15\tBuilt-in daemon tools can use in-process Go functions, but third-party Go extension tools must remain out-of-process for safety and lifecycle isolation.\n16\t\n17\t## Decision\n18\t\n19\tThe Tool Registry MVP includes a public Go subprocess extension SDK for tool providers.\n20\t\n21\tThe SDK exposes an authoring API equivalent to TypeScript `extension.tool(descriptor, handler)`, but implemented as a Go subprocess runtime. It handles initialize/shutdown, health checks, `tool.provider` negotiation, `tools/call` dispatch, Host API client calls, typed errors, descriptor export for runtime reconciliation, and test harness support.\n22\t\n23\tIn-process `native_go` remains limited to first-party/built-in tools compiled into the daemon and wired by `internal/daemon`. Third-party Go extension tools use the public Go SDK and execute as managed subprocesses.\n24\t\n25\t## Alternatives Considered\n26\t\n27\t### Protocol examples only\n28\t\n29\t- **Description**: Ship Go examples that implement JSON-RPC manually, without a reusable SDK.\n30\t- **Pros**: Smaller implementation.\n31\t- **Cons**: Weak developer experience and inconsistent correctness across extension authors.\n32\t- **Why rejected**: The user explicitly requested Go function authoring, not raw protocol plumbing.\n33\t\n34\t### Go `plugin` or shared-library handlers\n35\t\n36\t- **Description**: Load Go functions into the daemon process dynamically.\n37\t- **Pros**: Natural function-call authoring.\n38\t- **Cons**: Unsafe daemon process boundary, platform friction, ABI issues, and lifecycle complexity.\n39\t- **Why rejected**: Third-party code must stay out-of-process.\n40\t\n41\t### TypeScript-only extension authoring\n42\t\n43\t- **Description**: Support TypeScript tool handlers first and defer Go SDK work.\n44\t- **Pros**: Reuses existing SDK.\n45\t- **Cons**: Does not satisfy Go function authoring requirements.\n46\t- **Why rejected**: Go function authoring is in MVP scope.\n47\t\n48\t## Consequences\n49\t\n50\t### Positive\n51\t\n52\t- Go extension authors get a real function-based authoring path without compromising daemon isolation.\n53\t- TypeScript and Go SDKs share the same runtime protocol and test expectations.\n54\t- E2E tests can prove both language paths through the same registry dispatch pipeline.\n55\t\n56\t### Negative\n57\t\n58\t- MVP scope includes a new SDK package, scaffolding, tests, docs, and generated contract parity.\n59\t- Public SDK design must be stable enough for extension authors.\n60\t\n61\t### Risks\n62\t\n63\t- SDK and daemon protocol can drift. Mitigation: generate shared contract types and include cross-SDK conformance tests.\n64\t- Go SDK may accidentally import `internal/*` packages. Mitigation: place it under a public package path and test from an external-package perspective.\n65\t\n66\t## Implementation Notes\n67\t\n68\t- Add a public Go SDK package under `sdk/go` or an equivalent non-internal module path.\n69\t- Add a create-extension template for Go tool provider extensions.\n70\t- Add a Go SDK harness that can load a tool extension, mock Host API calls, and call `tools/call`.\n71\t- Do not use the SDK for daemon built-ins; built-ins use `native_go` providers.\n72\t\n73\t## References\n74\t\n75\t- `internal/subprocess/handshake.go`\n76\t- `internal/extension/manager.go`\n77\t- `sdk/typescript/src/extension.ts`\n78\t- `internal/bridgesdk/runtime.go`\n79\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.197589Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01RWD15GzguiQDPHd3P6bFou","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01RWD15GzguiQDPHd3P6bFou","content":"```\n1\t# ADR-006: Tool Visibility by Surface\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Context\n8\t\n9\tThe Tool Registry will track more states than \"exists\" or \"does not exist.\" A tool may be registered but disabled, unauthorized, unavailable, unhealthy, missing configuration, missing an MCP backend, blocked by session policy, denied by ACP approval mode, or conflicted by name.\n10\t\n11\tDifferent consumers need different views:\n12\t\n13\t- operators need diagnostics and reason codes to fix configuration and extension problems;\n14\t- agents need a low-noise callable surface that does not invite impossible calls;\n15\t- dispatch still needs to revalidate because discovery visibility is not a security boundary.\n16\t\n17\t## Decision\n18\t\n19\tOperator surfaces show unavailable and unauthorized tools with reason codes. Session-visible and model-visible surfaces expose only tools that are visible and callable for the effective session context.\n20\t\n21\tOperator surfaces include:\n22\t\n23\t- CLI,\n24\t- HTTP API,\n25\t- Web UI,\n26\t- privileged UDS/operator views.\n27\t\n28\tSession-visible/model-visible surfaces include:\n29\t\n30\t- the AGH-hosted MCP tool list exposed to an agent session,\n31\t- any future direct ACP/driver tool injection,\n32\t- non-privileged session-scoped UDS catalog views.\n33\t\n34\tThe registry must compute both:\n35\t\n36\t- `OperatorToolView`: includes all registered tools plus state, reason codes, source/provenance, policy diagnostics, conflict diagnostics, and availability details.\n37\t- `SessionToolView`: includes only tools that pass effective visibility and execution preconditions for that session.\n38\t\n39\tDispatch must revalidate the full effective execution decision even when a tool was present in `SessionToolView`.\n40\t\n41\t## Consequences\n42\t\n43\tAgents are not shown tools that they cannot call in the current session. This avoids prompt/tool-call noise and reduces attempts to invoke unavailable tools.\n44\t\n45\tOperators can still debug why a tool is not appearing to an agent, including whether the cause is ACP approval mode, session lineage, agent policy, source grants, extension health, MCP health, missing config, or a conflict.\n46\t\n47\tCLI/HTTP endpoints need an explicit scope or view mode. For example:\n48\t\n49\t- operator default: include unavailable tools and reasons;\n50\t- session-scoped query: return the same filtered view that the hosted MCP server would expose.\n51\t\n52\tThe hosted MCP server must use `SessionToolView`, not raw registry contents.\n53\t\n54\t## Rejected Alternatives\n55\t\n56\t### Everyone sees unavailable tools\n57\t\n58\tThis improves agent planning transparency but increases noise and risks inducing models to call tools that the daemon will reject.\n59\t\n60\t### Hide unavailable tools everywhere\n61\t\n62\tThis is clean for fail-closed execution, but it makes operator troubleshooting poor and hides extension/MCP/config problems.\n63\t\n64\t### Configurable per surface in MVP\n65\t\n66\tThis offers maximum flexibility but creates a larger behavior matrix before the registry foundation is stable.\n67\t\n68\t## Evidence\n69\t\n70\t- `.compozy/tasks/tools-registry/analysis/analysis_hermes.md`: availability filtering is useful for model-visible definitions.\n71\t- `.compozy/tasks/tools-registry/analysis/analysis_claude-code.md`: request-time tool pools are context-specific and filtered before model exposure.\n72\t- `.compozy/tasks/tools-registry/analysis/analysis_openclaw.md`: lifecycle and policy states need diagnostics while agent projections should be policy-filtered.\n73\t- `.compozy/tasks/tools-registry/analysis/synthesis.md`: discovery can hide unavailable/unauthorized tools from agents while operator surfaces show reasons, but dispatch still rechecks.\n74\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.197798Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Ndz4p9XpKnf5AoWr5q8Mgs","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01Ndz4p9XpKnf5AoWr5q8Mgs","content":"```\n1\t# ADR-008: Manifest-Authoritative Extension Tool Descriptors\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Date\n8\t\n9\t2026-04-28\n10\t\n11\t## Context\n12\t\n13\tExtension tools need both install-time auditability and runtime ergonomics. If descriptors are runtime-only, AGH cannot inspect policy, risk, schemas, or source grants before launching extension code. If descriptors are manifest-only, SDK helpers cannot verify that the running extension actually implements the declared handlers.\n14\t\n15\tAGH already treats `extension.toml` as the install-time contract for resources, capabilities, Host API actions, subprocess launch, security grants, and bridge metadata. The Tool Registry should extend that model rather than create a second descriptor authority.\n16\t\n17\t## Decision\n18\t\n19\tExtension tool descriptors are manifest-authoritative with runtime reconciliation.\n20\t\n21\t`extension.toml` declares every extension-host tool under `resources.tools`. Each declaration includes the canonical `id`, display metadata, input/output schemas, risk flags, toolsets, result budget, and handler binding.\n22\t\n23\tThe extension runtime may also expose descriptors through `provide_tools` or SDK metadata generated by `extension.tool(...)`, but those runtime descriptors are only a reconciliation signal. The daemon marks an extension-host tool executable only when the runtime descriptor matches the manifest-authoritative descriptor for identity, schema, risk flags, and handler binding.\n24\t\n25\tRuntime-only descriptors are rejected in the MVP. Manifest descriptors without a matching runtime handler remain operator-visible but unavailable with a deterministic reason code.\n26\t\n27\t## Alternatives Considered\n28\t\n29\t### Runtime-authoritative descriptors\n30\t\n31\t- **Description**: The manifest only declares `tool.provider`; the extension reports all tools after initialize.\n32\t- **Pros**: Flexible and ergonomic for SDK authors.\n33\t- **Cons**: Requires executing extension code before AGH knows tool risk, schemas, or source policy.\n34\t- **Why rejected**: Install-time auditability is required for policy, docs, and operator diagnostics.\n35\t\n36\t### Manifest-only descriptors\n37\t\n38\t- **Description**: The manifest is the only descriptor source; runtime implementation is checked only when a call happens.\n39\t- **Pros**: Simple policy model.\n40\t- **Cons**: Handler mismatch is discovered too late, and SDK helper bugs are harder to detect.\n41\t- **Why rejected**: Runtime reconciliation catches drift before session exposure.\n42\t\n43\t## Consequences\n44\t\n45\t### Positive\n46\t\n47\t- Operators and agents can inspect extension tool risk before extension code runs.\n48\t- SDK helpers can improve authoring ergonomics without becoming the source of truth.\n49\t- Tool install, enable, disable, conflict, and policy diagnostics remain deterministic.\n50\t\n51\t### Negative\n52\t\n53\t- Extension authors must keep manifest descriptors and SDK registration in sync.\n54\t- The daemon must compare runtime descriptors against manifest descriptors and report structured mismatch reasons.\n55\t\n56\t### Risks\n57\t\n58\t- Descriptor drift could make valid tools unavailable. Mitigation: SDK templates generate manifest snippets and tests assert manifest/runtime parity.\n59\t- Manifest fields could become verbose. Mitigation: SDK scaffolding and docs should generate the boilerplate.\n60\t\n61\t## Implementation Notes\n62\t\n63\t- Add `id`, `output_schema`, `risk`, `destructive`, `open_world`, `requires_interaction`, `max_result_bytes`, `toolsets`, `tags`, and `handler` fields to extension `ToolConfig`.\n64\t- Add `provide_tools` support to the subprocess protocol or include equivalent runtime descriptor metadata during initialize.\n65\t- Add deterministic mismatch reason codes such as `runtime_descriptor_missing`, `runtime_descriptor_mismatch`, and `handler_missing`.\n66\t- The TypeScript and Go SDKs must include tests that compare generated/runtime descriptors to manifest fixtures.\n67\t\n68\t## References\n69\t\n70\t- `internal/extension/manifest.go`\n71\t- `internal/extension/resource_publication.go`\n72\t- `sdk/typescript/src/extension.ts`\n73\t- `.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md`\n74\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.198001Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01KaD2oCrG6emjUnpDC4EEp1","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01KaD2oCrG6emjUnpDC4EEp1","content":"```\n1\t# ADR-010: Remote MCP Call-Through In MVP\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Date\n8\t\n9\t2026-04-28\n10\t\n11\t## Context\n12\t\n13\tAGH already models MCP server configuration and remote auth. The previous Tool Registry spec treated MCP-backed tools as descriptors with availability diagnostics only. The revised MVP must make MCP tools executable through the same registry dispatch path as built-in and extension-host tools.\n14\t\n15\tRemote MCP call-through must not duplicate AGH's MCP auth model or leak OAuth tokens. ACP currently converts MCP servers to stdio-only session entries, so remote MCP execution should happen inside the daemon-owned registry adapter rather than by passing remote MCP config directly through ACP.\n16\t\n17\t## Decision\n18\t\n19\tRemote/local MCP tools are executable in the Tool Registry MVP.\n20\t\n21\tThe daemon owns MCP client adapters that discover/list/call MCP tools from validated MCP configuration and resource sources. The adapters consume existing MCP config, transport, auth metadata, and redacted auth status from `internal/mcp/auth`. Token material remains owned by `internal/mcp/auth` and its `TokenStore`; registry descriptors and results never copy tokens.\n22\t\n23\tHosted MCP remains AGH's session exposure transport. When an agent calls `mcp__...` through the hosted MCP server, the call re-enters `internal/tools.Registry.Call`; the registry then invokes the daemon-owned MCP client adapter after policy, auth, availability, hook, schema, and result-budget checks.\n24\t\n25\t## Alternatives Considered\n26\t\n27\t### Descriptor-only MCP tools\n28\t\n29\t- **Description**: Show MCP tool descriptors and auth diagnostics, but do not call them in MVP.\n30\t- **Pros**: Smaller security and transport surface.\n31\t- **Cons**: Leaves MCP as a second-class source and fails to prove external tool execution.\n32\t- **Why rejected**: The accepted MVP scope includes remote MCP call-through.\n33\t\n34\t### Pass remote MCP servers directly to ACP sessions\n35\t\n36\t- **Description**: Let providers connect directly to remote MCP servers.\n37\t- **Pros**: Less daemon adapter work.\n38\t- **Cons**: ACP conversion is currently stdio-only, policy/audit is provider-dependent, and AGH cannot centrally enforce result redaction or source grants.\n39\t- **Why rejected**: AGH needs one daemon-owned dispatch path.\n40\t\n41\t### Duplicate MCP auth in the registry\n42\t\n43\t- **Description**: Store MCP tokens or OAuth state with tool descriptors.\n44\t- **Pros**: Simple adapter lookup.\n45\t- **Cons**: Duplicates credential ownership and increases leak risk.\n46\t- **Why rejected**: `internal/mcp/auth` remains the sole credential owner.\n47\t\n48\t## Consequences\n49\t\n50\t### Positive\n51\t\n52\t- MCP tools become agent-callable through the same policy, visibility, hook, telemetry, and hosted MCP surfaces as built-ins and extension-host tools.\n53\t- Existing MCP auth and settings diagnostics remain authoritative.\n54\t- AGH can enforce a consistent `ToolID` and collision policy for MCP sources.\n55\t\n56\t### Negative\n57\t\n58\t- MVP must implement daemon-side MCP discovery/call clients, transport handling, timeout behavior, auth refresh/error mapping, and redaction tests.\n59\t- MCP adapters add more failure states to availability and session projection.\n60\t\n61\t### Risks\n62\t\n63\t- OAuth tokens could leak through registry output. Mitigation: registry consumes only redacted status and uses narrow `internal/mcp/auth` execution interfaces for bearer material.\n64\t- Remote MCP call latency could block hosted MCP responses. Mitigation: explicit timeouts, cancellation, and structured backend failure errors.\n65\t- External MCP tool names could collide after sanitization. Mitigation: fail-closed conflict handling and operator-visible diagnostics.\n66\t\n67\t## Implementation Notes\n68\t\n69\t- Reuse `aghconfig.MCPServer`, `internal/config/mcpjson.go`, `internal/config/mcp_resource.go`, skill MCP resolution, extension MCP resources, and `internal/mcp/auth`.\n70\t- Fix resource cloning paths that currently drop `Transport`, `URL`, or `Auth` before relying on remote MCP diagnostics/calls.\n71\t- Add MCP adapter tests for stdio, HTTP, SSE, auth-required, expired/invalid auth, collision, timeout, cancellation, and redaction.\n72\t- Hosted MCP never receives remote OAuth tokens; it receives only AGH-hosted session projection entries.\n73\t\n74\t## References\n75\t\n76\t- `internal/config/provider.go`\n77\t- `internal/mcp/auth`\n78\t- `internal/store/globaldb/global_db_mcp_auth.go`\n79\t- `internal/acp/client.go`\n80\t- `.compozy/tasks/tools-registry/analysis/analysis_acp_tool_registry_compatibility.md`\n81\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.500453Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01SUNdoe3ozhBDxFb7AgavhT","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:56.503228Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01SUNdoe3ozhBDxFb7AgavhT","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01SUNdoe3ozhBDxFb7AgavhT","content":"```\n1\t# Tool Registry Foundation TechSpec\n2\t\n3\t## Executive Summary\n4\t\n5\tThis TechSpec designs AGH's Tool Registry as a daemon-owned runtime service, not as a static list of built-in commands. The registry will unify tool identity, discovery, availability, policy, execution, hooks, telemetry, extension descriptors, MCP adapters, and session-visible exposure through one central dispatch pipeline.\n6\t\n7\tThere is no existing `_prd.md` for this task. The scope is based on the user request, competitor research under `.compozy/tasks/tools-registry/analysis/`, accepted ADRs under `.compozy/tasks/tools-registry/adrs/`, current AGH code exploration, and the prior autonomous skills/tools registry gap analysis.\n8\t\n9\tThe primary architectural trade-off is to make AGH-native tools visible to sessions through an AGH-hosted local MCP server in the MVP instead of trying to invent an ACP-specific registry. ACP does not define a callable tool registry; it defines session lifecycle, MCP bootstrap, permission callbacks, and tool-call observations. MCP provides the programmatic `Tool.name`, so AGH will expose its canonical `ToolID` directly as the hosted MCP tool name.\n10\t\n11\tThe foundation will support executable native/bundled tools, executable extension-host tools, and executable MCP-backed tools. Built-in AGH tools execute in-process through `native_go` handles compiled into the daemon. Third-party TypeScript and Go extension tools execute out-of-process through the existing extension runtime, a new `tool.provider` capability, `provide_tools` reconciliation, and `tools/call` RPC. MCP-backed tools execute through daemon-owned MCP clients that consume the existing MCP config/auth subsystem. Descriptor-only is an unavailable/error state, not the MVP contract for extension or MCP tools.\n12\t\n13\t## MVP Boundary Statement\n14\t\n15\tMVP boundary: implementation steps 1-16 build the Tool Registry foundation, AGH-hosted MCP session exposure, native bootstrap tools, executable TypeScript/Go extension-host tools, executable daemon-owned MCP call-through, shared CLI/HTTP/UDS surfaces, policy/availability enforcement, hooks, observability, docs, SDK updates, and verification. This MVP proves the registry as an executable daemon primitive without replacing every ACP provider-native tool.\n16\t\n17\tPost-MVP work deferred to later TechSpecs:\n18\t\n19\t- direct driver-specific tool injection outside hosted MCP;\n20\t- full shell/browser/file tool replacement for ACP runtimes;\n21\t- remote peer tool execution over AGH Network;\n22\t- provider-specific deferred schema loading such as Anthropic `tool_reference`;\n23\t- broad marketplace signing/trust overhaul;\n24\t- skill install/remove/update tools;\n25\t- bridge SDK executable tool adapters;\n26\t- direct in-process plugin loading for third-party Go or TypeScript code;\n27\t- client-supplied ACP `mcpServers` as session-scoped registry sources.\n28\t\n29\tExplicitly out of scope for this TechSpec:\n30\t\n31\t- in-process third-party extension handlers;\n32\t- silent compatibility aliases for dotted tool IDs;\n33\t- policy bypasses for `approve-all`;\n34\t- partial surface delivery where CLI/HTTP ships without UDS, docs, codegen, and hosted MCP parity;\n35\t- storing matchable ownership or authorization state inside opaque JSON metadata blobs.\n36\t\n37\tBackend delivery boundary:\n38\t\n39\t| Backend kind | MVP delivery | Invocation behavior |\n40\t|---|---|---|\n41\t| `native_go` | Descriptor, availability, policy, and full dispatch through `Registry.Call` | Executable in-process only for daemon-compiled AGH built-ins |\n42\t| `extension_host` | Manifest-authoritative descriptor, runtime reconciliation, extension health, source policy, and full dispatch through `Registry.Call` | Executable out-of-process through existing extension subprocess runtime, `tool.provider`, and `tools/call`; TypeScript and Go SDKs wrap handlers as functions |\n43\t| `mcp` | Descriptor discovery, source provenance, health/auth diagnostics, collision handling, source policy, and full dispatch through `Registry.Call` | Executable through daemon-owned MCP clients using existing MCP config and `internal/mcp/auth` status/token interfaces |\n44\t| `subprocess` | Not a public registry backend kind in MVP | Rejected by manifest validation; third-party Go/TypeScript tools use `extension_host`, whose implementation is subprocess-isolated |\n45\t| `bridge` | Reserved post-MVP backend kind | Rejected by MVP validation unless a later bridge TechSpec enables it |\n46\t\n47\tAGH-hosted MCP is different from `mcp` backend tools. Hosted MCP is the session exposure transport for AGH registry tools; `mcp` backend tools are external tools contributed by MCP servers and are executable only through daemon-owned MCP client adapters after the same registry policy, source, approval, hook, and session-lineage gates pass.\n48\t\n49\t## Architectural Boundaries\n50\t\n51\t`internal/daemon` remains the only composition root. It may import and compose `internal/tools`, `internal/skills`, `internal/mcp`, `internal/extension`, `internal/hooks`, `internal/session`, `internal/network`, `internal/task`, `internal/api/*`, and config/resource stores. No package may import `internal/daemon`.\n52\t\n53\tPackage import boundaries:\n54\t\n55\t- `internal/tools` owns `ToolID`, descriptors, backend kinds, providers, handles, registry, policy interfaces, availability, dispatch contracts, and result normalization. It must not import `internal/daemon`, `internal/api/*`, `internal/cli`, `internal/extension`, `internal/session`, `internal/network`, or `internal/task`.\n56\t- `internal/catalog`, if added, is a thin composition-facing facade over `internal/tools` and `internal/skills`. It must not own tool dispatch or policy.\n57\t- `internal/extension` may publish manifest-authoritative tool descriptors and expose live out-of-process extension tool invokers through public registry contracts. It must not execute third-party tool handlers in-process and must not import registry internals beyond public `internal/tools` descriptor/provider contracts.\n58\t- `internal/mcp` may adapt external MCP tools, call external MCP servers through daemon-owned clients, and host the AGH MCP stdio proxy. All AGH-owned calls must enter `internal/tools.Registry.Call` through UDS or an injected interface; MCP code must not duplicate dispatch policy.\n59\t- `internal/mcp/auth` already owns remote MCP OAuth 2.1 + PKCE, redacted status, token refresh/logout, and durable token storage through `internal/store/globaldb`. The Tool Registry may consume redacted auth status through a daemon-injected interface, but it must not reimplement OAuth flows, open the MCP auth token store directly, or persist remote MCP token material.\n60\t- `internal/api/core` owns transport-independent handlers. `internal/api/httpapi` and `internal/api/udsapi` only register routes and transport concerns.\n61\t- `internal/cli` calls UDS/HTTP client methods and does not import runtime registry implementations.\n62\t- `internal/hooks` owns typed hook payloads and execution. Hooks dispatch at the registry call site; no code may tail event tables to trigger tool hooks.\n63\t- `internal/store` may persist session lineage permission atoms and events. It must not decide tool policy; it validates and stores normalized atoms.\n64\t- `internal/session` can receive hosted MCP server config and session projections through interfaces. It must not implement a parallel tool registry.\n65\t\n66\tBoundaries to update in implementation:\n67\t\n68\t- If `internal/catalog` or a new `internal/mcp` subpackage is added, update `magefile.go` package boundary checks in the same change.\n69\t- Any OpenAPI/contract change must co-ship generated `openapi/agh.json` and `web/src/generated/agh-openapi.d.ts`.\n70\t- Any CLI surface must co-ship HTTP/UDS parity unless the spec explicitly marks the surface CLI-only. This TechSpec has no CLI-only tool surfaces.\n71\t\n72\t## System Architecture\n73\t\n74\t### Component Overview\n75\t\n76\t| Component | Responsibility | Boundary |\n77\t|---|---|---|\n78\t| `internal/tools` runtime registry | Owns canonical `ToolID`, descriptors, backend kinds, providers, handles, availability projection, policy evaluation, dispatch, result normalization, and telemetry hooks | Does not import `daemon`, `api`, or `cli`; composed by `internal/daemon` |\n79\t| Cold `tool` resources | Persist desired-state tool metadata from extensions, bundles, and future dynamic producers | Metadata only; no function pointers or execution handles |\n80\t| Built-in `native_go` provider | Registers AGH-native tools such as tool search, skill view, network peers/send, and bounded task tools | In-process only because it ships inside the daemon binary |\n81\t| Extension-host provider | Converts extension-published tool resources into runtime descriptors, reconciles them with `provide_tools`, and invokes handlers over extension subprocess RPC | Does not execute extension code in-process; fails closed on manifest/runtime mismatch |\n82\t| Extension SDKs | TypeScript and Go helper APIs let extension authors define tools with functions while the runtime remains out-of-process | SDKs generate/reconcile manifest descriptors and register `tools/call` handlers |\n83\t| MCP adapter provider | Normalizes MCP-backed descriptors, health, auth status, source provenance, and executes calls through daemon-owned MCP clients | Fails closed on health, auth, policy, approval, hook, or name collision problems |\n84\t| Existing MCP auth runtime | Supplies redacted remote MCP OAuth 2.1 + PKCE status for external MCP diagnostics | Owned by `internal/mcp/auth`; registry must not duplicate OAuth flow, token storage, or token refresh/logout |\n85\t| `internal/catalog` facade | Thin cross-domain list/search/view facade over tools and skills | Optional coordination layer; runtime tool dispatch remains in `internal/tools` |\n86\t| Policy engine | Combines ACP approval mode, session lineage, agent policy, source/risk defaults, registry allow/deny, toolsets, availability, and hooks | Produces structured effective decisions, never a single ambiguous boolean |\n87\t| AGH-hosted MCP proxy | Exposes session-callable AGH tools as MCP tools using canonical `ToolID` names | Runs through `agh tool mcp --session \u003cid\u003e` and proxies to daemon over UDS |\n88\t| API/CLI surfaces | Expose machine-readable list/search/info/invoke/status behavior | Shared contracts in `internal/api/contract`, handlers in `internal/api/core`, HTTP/UDS parity |\n89\t| Hook integration | Runs `tool.pre_call`, `tool.post_call`, and `tool.post_error` around registry dispatch | Hooks can deny, narrow, patch, redact, or annotate, but cannot bypass policy |\n90\t| Observability | Emits durable events and metrics for registration, projection, decisions, calls, failures, conflicts, truncation, and policy denials | Redacts secrets and raw tokens |\n91\t\n92\tData flow:\n93\t\n94\t1. Extensions, built-ins, MCP servers, and future dynamic providers register cold descriptors and, where executable, runtime handles.\n95\t2. `internal/daemon` composes providers into `internal/tools.Registry`.\n96\t3. Registry indexes descriptors by canonical `ToolID`.\n97\t4. Operator surfaces can list all tools with status and reason codes.\n98\t5. Session/model-visible surfaces request a session projection and receive only callable tools for that effective context.\n99\t6. Every invocation enters `Registry.Call`, which validates schema, recomputes availability/policy, runs hooks, calls the `native_go`, `extension_host`, or `mcp` handle, normalizes output, persists/observes, and returns a bounded result.\n100\t\n101\t## Implementation Design\n102\t\n103\t### Core Interfaces\n104\t\n105\tThese are final-shape interface contracts for implementation planning. Implementers may add helper types, but registry dispatch must preserve these method responsibilities and must not reintroduce metadata-only runtime providers.\n106\t\n107\t```go\n108\ttype Registry interface {\n109\t\tList(ctx context.Context, scope Scope) ([]ToolView, error)\n110\t\tSearch(ctx context.Context, scope Scope, q SearchQuery) ([]ToolView, error)\n111\t\tGet(ctx context.Context, scope Scope, id ToolID) (ToolView, error)\n112\t\tCall(ctx context.Context, scope Scope, req CallRequest) (ToolResult, error)\n113\t}\n114\t\n115\ttype Handle interface {\n116\t\tDescriptor() Descriptor\n117\t\tAvailability(ctx context.Context, scope Scope) Availability\n118\t\tCall(ctx context.Context, req CallRequest) (ToolResult, error)\n119\t}\n120\t```\n121\t\n122\tSupporting contracts:\n123\t\n124\t```go\n125\ttype BackendKind string\n126\t\n127\tconst (\n128\t\tBackendNativeGo BackendKind = \"native_go\"\n129\t\tBackendExtensionHost BackendKind = \"extension_host\"\n130\t\tBackendMCP BackendKind = \"mcp\"\n131\t\tBackendBridge BackendKind = \"bridge\"\n132\t)\n133\t```\n134\t\n135\t```go\n136\ttype Provider interface {\n137\t\tID() SourceRef\n138\t\tList(ctx context.Context, scope Scope) ([]Descriptor, error)\n139\t\tResolve(ctx context.Context, scope Scope, id ToolID) (Handle, bool, error)\n140\t}\n141\t```\n142\t\n143\t```go\n144\ttype NativeToolFunc func(ctx context.Context, scope Scope, req CallRequest) (ToolResult, error)\n145\t```\n146\t\n147\t```go\n148\ttype ExtensionToolInvoker interface {\n149\t\tProvideTools(ctx context.Context, extensionID string) ([]ExtensionToolRuntimeDescriptor, error)\n150\t\tCallTool(ctx context.Context, extensionID string, req ExtensionToolCallRequest) (ToolResult, error)\n151\t}\n152\t```\n153\t\n154\t```go\n155\ttype MCPToolInvoker interface {\n156\t\tListTools(ctx context.Context, source SourceRef) ([]MCPToolDescriptor, error)\n157\t\tCallTool(ctx context.Context, source SourceRef, req MCPToolCallRequest) (ToolResult, error)\n158\t}\n159\t```\n160\t\n161\t```go\n162\ttype MCPAuthStatus struct {\n163\t\tServerName string\n164\t\tStatus string\n165\t\tAuthType string\n166\t\tClientID string\n167\t\tScopes []string\n168\t\tExpiresAt *time.Time\n169\t\tRefreshable bool\n170\t\tTokenPresent bool\n171\t\tDiagnostic string\n172\t}\n173\t\n174\ttype MCPAuthStatusProvider interface {\n175\t\tStatus(ctx context.Context, source SourceRef) (MCPAuthStatus, error)\n176\t}\n177\t\n178\ttype PolicyEvaluator interface {\n179\t\tEvaluate(ctx context.Context, scope Scope, d Descriptor) (EffectiveToolDecision, error)\n180\t}\n181\t```\n182\t\n183\t```go\n184\ttype ResultLimiter interface {\n185\t\tApply(ctx context.Context, d Descriptor, result ToolResult) (ToolResult, error)\n186\t}\n187\t\n188\ttype HookRunner interface {\n189\t\tPreCall(ctx context.Context, call CallRequest) (CallRequest, EffectiveToolDecision, error)\n190\t\tPostCall(ctx context.Context, call CallRequest, result ToolResult) (ToolResult, error)\n191\t\tPostError(ctx context.Context, call CallRequest, err error) error\n192\t}\n193\t```\n194\t\n195\tError conventions:\n196\t\n197\t- `ErrToolNotFound`\n198\t- `ErrToolConflict`\n199\t- `ErrToolUnavailable`\n200\t- `ErrToolDenied`\n201\t- `ErrToolApprovalRequired`\n202\t- `ErrToolInvalidInput`\n203\t- `ErrToolResultTooLarge`\n204\t- `ErrToolBackendFailed`\n205\t\n206\tAll production errors must wrap with `%w` where applicable and must map to deterministic API/CLI error codes.\n207\t\n208\t### Data Models\n209\t\n210\t`ToolID`\n211\t\n212\t- Canonical public ID used by registry, policy, CLI, HTTP, UDS, hooks, telemetry, and hosted MCP.\n213\t- Format: `\u003csegment\u003e( \"__\" \u003csegment\u003e )*`\n214\t- Segment regex: `[a-z][a-z0-9_]*`\n215\t- Max length: 64.\n216\t- Lowercase ASCII only.\n217\t- `__` is reserved as namespace separator.\n218\t- No dots, hyphens, uppercase, empty segments, or dual wire aliases.\n219\t\n220\tExamples:\n221\t\n222\t- `agh__tool_list`\n223\t- `agh__tool_search`\n224\t- `agh__tool_info`\n225\t- `agh__skill_list`\n226\t- `agh__skill_search`\n227\t- `agh__skill_view`\n228\t- `agh__network_peers`\n229\t- `agh__network_send`\n230\t- `agh__task_list`\n231\t- `agh__task_read`\n232\t- `mcp__github__create_issue`\n233\t- `ext__linear__search`\n234\t\n235\t`Descriptor`\n236\t\n237\t- `id ToolID`\n238\t- `backend BackendRef`\n239\t- `display_title string`\n240\t- `description string`\n241\t- `input_schema json.RawMessage`\n242\t- `output_schema json.RawMessage`\n243\t- `source SourceRef`\n244\t- `visibility Visibility`\n245\t- `risk RiskClass`\n246\t- `read_only bool`\n247\t- `destructive bool`\n248\t- `open_world bool`\n249\t- `requires_interaction bool`\n250\t- `concurrency_safe bool`\n251\t- `max_result_bytes int64`\n252\t- `toolsets []ToolsetID`\n253\t- `tags []string`\n254\t- `search_hints []string`\n255\t\n256\t`BackendRef`\n257\t\n258\t- `kind BackendKind`\n259\t- `extension_id string`\n260\t- `handler string`\n261\t- `mcp_server string`\n262\t- `mcp_tool string`\n263\t- `native_name string`\n264\t- `requires_capabilities []string`\n265\t\n266\t`ExtensionToolRuntimeDescriptor`\n267\t\n268\t- `id ToolID`\n269\t- `handler string`\n270\t- `input_schema_digest string`\n271\t- `output_schema_digest string`\n272\t- `read_only bool`\n273\t- `risk RiskClass`\n274\t- `capabilities []string`\n275\t\n276\t`SourceRef`\n277\t\n278\t- `kind`: `builtin`, `mcp`, `extension`, `dynamic`\n279\t- `owner`: daemon, extension id, MCP server id, bundle id, skill id, or provider id\n280\t- `raw_server_name`\n281\t- `raw_tool_name`\n282\t- `resource_id`\n283\t- `resource_version`\n284\t- `workspace_id`\n285\t- `scope`\n286\t\n287\t`Availability`\n288\t\n289\tStates are composable, not a single boolean:\n290\t\n291\t- `registered`\n292\t- `enabled`\n293\t- `available`\n294\t- `authorized`\n295\t- `executable`\n296\t- `conflicted`\n297\t\n298\tReason codes include:\n299\t\n300\t- `dependency_missing`\n301\t- `backend_unhealthy`\n302\t- `backend_not_executable`\n303\t- `extension_inactive`\n304\t- `extension_runtime_mismatch`\n305\t- `extension_capability_missing`\n306\t- `mcp_unreachable`\n307\t- `mcp_auth_unconfigured`\n308\t- `mcp_auth_required`\n309\t- `mcp_auth_expired`\n310\t- `mcp_auth_invalid`\n311\t- `mcp_auth_refresh_failed`\n312\t- `source_disabled`\n313\t- `policy_denied`\n314\t- `approval_required`\n315\t- `session_denied`\n316\t- `hook_denied`\n317\t- `schema_invalid`\n318\t- `conflicted_id`\n319\t- `conflicted_sanitized_name`\n320\t- `result_budget_exceeded`\n321\t\n322\tMCP-backed descriptors may attach a redacted `MCPAuthStatus` to operator-visible tool views only. The status mirrors the existing `internal/mcp/auth.StatusValue` values (`unconfigured`, `needs_login`, `authenticated`, `expired`, `invalid`) plus registry reason codes, and may include `server_name`, `auth_type`, `client_id`, `scopes`, `expires_at`, `refreshable`, `token_present`, and `diagnostic`. It must never include access tokens, refresh tokens, OAuth authorization codes, PKCE verifiers, client secrets, or hosted MCP bind tokens. Session/model-visible projections do not include `MCPAuthStatus`; they hide or deny the tool through `Availability` reason codes.\n323\t\n324\t`EffectiveToolDecision`\n325\t\n326\t- `visible_to_operator bool`\n327\t- `visible_to_session bool`\n328\t- `callable bool`\n329\t- `approval_required bool`\n330\t- `system_permission_mode`\n331\t- `session_policy_result`\n332\t- `agent_policy_result`\n333\t- `registry_policy_result`\n334\t- `source_policy_result`\n335\t- `availability_result`\n336\t- `hook_result`\n337\t- `reason_codes []string`\n338\t\n339\t`ToolResult`\n340\t\n341\t- `content []ToolContent`\n342\t- `structured any`\n343\t- `preview string`\n344\t- `artifacts []ArtifactRef`\n345\t- `metadata map[string]any`\n346\t- `redactions []Redaction`\n347\t- `truncated bool`\n348\t- `bytes int64`\n349\t- `duration_ms int64`\n350\t\n351\t`Toolset`\n352\t\n353\tToolsets are separate named resources/config entries, not overloaded as tools. They use a typed `ToolsetID`, include exact `ToolID`s and patterns, and may include other toolsets recursively. Policy fields must distinguish `tools` and `toolsets` to avoid ambiguity.\n354\t\n355\t`ToolsetID` uses the same grammar as `ToolID`: lowercase ASCII segments separated by reserved `__`, maximum 64 characters, and no dots, hyphens, uppercase, or empty segments.\n356\t\n357\t`dynamic` source kind is reserved because the existing source enum already has it, but MVP has no dynamic producer and no dynamic validation surface. Dynamic tools remain unavailable unless a later TechSpec defines their producer, policy, and provenance model.\n358\t\n359\t### Data-Model Field Rationale\n360\t\n361\t| Field or key | Shape | Purpose | Storage decision |\n362\t|---|---|---|---|\n363\t| `ToolID` | string, provider-safe `__`-segmented id | Single policy/dispatch/audit identity across registry, CLI, HTTP, UDS, hooks, telemetry, and hosted MCP | Typed field, never inferred from display title |\n364\t| `Descriptor.backend` | structured `BackendRef` | Binds the descriptor to the only allowed executable backend path | Typed struct; dispatch never infers backend from source prefix or metadata |\n365\t| `Descriptor.source` | structured `SourceRef` | Preserve raw external provenance without making prefixes the only source of truth | Typed struct; raw names are subfields, not alternate IDs |\n366\t| `Descriptor.input_schema` | JSON Schema object | Validate call input before dispatch | JSON schema payload is appropriate because schema content is externally structured and opaque to AGH queries |\n367\t| `Descriptor.output_schema` | JSON Schema object | Optional structured output contract | JSON schema payload is appropriate for the same reason as input schema |\n368\t| `Descriptor.visibility` | enum | Separate internal/operator/session/model surfaces | Typed enum, queryable and policy-relevant |\n369\t| `Descriptor.risk` | enum | Classify read/mutate/network/open-world behavior | Typed enum, never free-text metadata |\n370\t| `Descriptor.read_only` | bool | Drives `approve-reads` and safety checks | Typed bool; misclassification is a security bug |\n371\t| `Descriptor.destructive` | bool | Forces stricter policy and approval behavior | Typed bool, not metadata |\n372\t| `Descriptor.open_world` | bool | Marks tools that can reach arbitrary external state | Typed bool, not metadata |\n373\t| `Descriptor.requires_interaction` | bool | Distinguishes autonomous-safe calls from interactive calls | Typed bool, not metadata |\n374\t| `Descriptor.max_result_bytes` | int64 | Enforces result budget consistently across surfaces | Typed numeric field with config default |\n375\t| `Descriptor.toolsets` | `[]ToolsetID` | Supports recursive named bundles without overloading individual tools | Typed list; expanded to concrete `ToolID`s for lineage |\n376\t| `MCPAuthStatus` | redacted status object | Lets operator surfaces explain remote MCP login/expiry without exposing credentials | Derived from `internal/mcp/auth`; never persisted by the registry |\n377\t| `EffectiveToolDecision.reason_codes` | `[]string` enum values | Operator diagnostics and deterministic error contracts | Typed enum strings; no prose-only decisions |\n378\t| `[tools].enabled` | bool | Global registry execution switch | Config key with validation/defaults |\n379\t| `[tools].hosted_mcp_enabled` | bool | Allows session exposure through AGH-hosted MCP | Config key with validation/defaults |\n380\t| `[tools].default_max_result_bytes` | int64 | Default output cap when descriptor is silent | Config key with validation/defaults |\n381\t| `[tools.policy].external_default` | enum | Default executable policy for extension/MCP/dynamic tools | Config key, not hidden in metadata |\n382\t| `agent.tools` | `[]ToolID/pattern` | Allow concrete tools/patterns for an agent | Frontmatter/config field resolved to concrete lineage atoms |\n383\t| `agent.toolsets` | `[]ToolsetID` | Allow named bundles for an agent | Frontmatter/config field expanded before session lineage |\n384\t| `agent.deny_tools` | `[]ToolID/pattern` | Explicit narrowing layer for an agent | Frontmatter/config field, not runtime-only state |\n385\t| `extension.resources.tools.*.backend` | structured backend metadata | Declares the manifest-authoritative runtime binding for `extension_host` or `mcp` execution | Extension manifest fields, not in-process function pointers; daemon rejects mismatched runtime descriptors |\n386\t| `extension.resources.tools.*.handler` | string | Names the extension SDK handler used by `tools/call` | Manifest field validated against `provide_tools`; not executable by itself |\n387\t| `ExtensionToolRuntimeDescriptor` | redacted runtime descriptor | Lets daemon confirm a running extension provides exactly the manifest-declared handler/schema/risk shape | Runtime-only reconciliation result; not persisted as source of truth |\n388\t| `MCPToolHandle` | daemon-owned client handle | Calls remote MCP tools without exposing remote credentials to descriptors or sessions | Runtime-only handle injected by `internal/mcp`; token material stays behind `internal/mcp/auth` |\n389\t\n390\tNo new SQLite columns are required for the MVP registry foundation. Existing session lineage stores concrete tool permission atoms; implementation should validate those atoms as canonical `ToolID`s. Existing remote MCP OAuth tokens already live in `globaldb.mcp_auth_tokens` through `mcpauth.TokenStore`; registry work must not add token fields to tool descriptors, resources, events, or metadata. Hosted MCP bind tokens are separate ephemeral session secrets and must not be stored in `mcp_auth_tokens`; if a later design needs durable hosted-proxy token state, it must add a separate typed table with its own lifecycle and redaction tests. If a later task needs durable queryable tool-call history beyond append-only events, it must add a typed side table such as `tool_calls` rather than placing queryable call state in a session metadata JSON blob.\n391\t\n392\t### Side-Table vs JSON Decisions\n393\t\n394\t| Domain state | Decision | Rationale |\n395\t|---|---|---|\n396\t| Tool descriptors | Typed resource specs plus runtime descriptors | Descriptors are matchable by id/source/risk/visibility and must not live as opaque metadata blobs |\n397\t| Toolsets | Typed config/resource records | Toolset membership affects policy and lineage; it must be queryable and expandable deterministically |\n398\t| Tool call events | Append-only event payloads for MVP; typed side table only if queryable history is required | Events are the operational ledger; indexed call history would be matchable state and must not be hidden in JSON metadata |\n399\t| Source provenance | Structured `SourceRef` fields | Raw MCP/extension names are needed for debugging and collision handling |\n400\t| Input/output schemas | JSON Schema blobs | Schema contents are inherently opaque external contracts and are not AGH ownership state |\n401\t| Tool result structured payload | JSON payload plus typed envelope fields | Result body can be arbitrary, but status, bytes, truncation, redaction, and tool id are typed envelope fields |\n402\t| Policy decisions | Typed `EffectiveToolDecision` | Authorization is matchable and auditable; it cannot be a JSON bag |\n403\t| Availability reasons | Typed reason-code list | Operator diagnostics and tests need deterministic matching |\n404\t\n405\t### API Endpoints\n406\t\n407\tAll endpoints are implemented once in `internal/api/core` and registered by HTTP and UDS transports.\n408\t\n409\t| Method | Path | Description |\n410\t|---|---|---|\n411\t| `GET` | `/api/tools` | List operator-visible tools with availability/policy reason codes |\n412\t| `POST` | `/api/tools/search` | Search tools by id, title, description, source, tags, and toolsets |\n413\t| `GET` | `/api/tools/{id}` | Return descriptor, availability, policy view, schema, and source provenance |\n414\t| `POST` | `/api/tools/{id}/invoke` | Invoke a tool through registry dispatch |\n415\t| `GET` | `/api/sessions/{id}/tools` | Return session/model-visible callable projection |\n416\t| `POST` | `/api/sessions/{id}/tools/search` | Search only within effective session-callable projection |\n417\t| `GET` | `/api/toolsets` | List named toolsets and expansion status |\n418\t| `GET` | `/api/toolsets/{id}` | Inspect one toolset expansion and conflicts |\n419\t\n420\tInvoke request:\n421\t\n422\t```json\n423\t{\n424\t \"session_id\": \"sess_...\",\n425\t \"workspace_id\": \"ws_...\",\n426\t \"input\": {},\n427\t \"approval_token\": \"optional-local-approval-reference\"\n428\t}\n429\t```\n430\t\n431\t`approval_token` is an opaque local approval reference issued by the daemon approval surface for CLI/HTTP/UDS calls. AGH stores only a hash, never logs or emits the raw value, redacts it from SSE/events/errors, scopes it to one tool decision, and treats it as separate from `claim_token`. Hosted MCP does not accept client-supplied `approval_token`; it uses the Hosted MCP Approval Bridge below.\n432\t\n433\tInvoke response:\n434\t\n435\t```json\n436\t{\n437\t \"tool_id\": \"agh__skill_view\",\n438\t \"status\": \"completed\",\n439\t \"result\": {},\n440\t \"truncated\": false,\n441\t \"duration_ms\": 23,\n442\t \"events\": []\n443\t}\n444\t```\n445\t\n446\tStatus codes:\n447\t\n448\t- `200` completed/listed.\n449\t- `202` approval required or async dispatch accepted, only if the tool is explicitly async.\n450\t- `400` invalid `ToolID`, invalid schema input, or malformed request.\n451\t- `403` denied by ACP ceiling, session lineage, registry policy, source policy, or hook.\n452\t- `404` not found or hidden from caller context.\n453\t- `409` conflicted canonical ID or sanitized external name.\n454\t- `422` registered but unavailable or not executable.\n455\t- `500` internal daemon error.\n456\t- `502` backend adapter failure.\n457\t\n458\tCLI parity:\n459\t\n460\t- `agh tool list -o json`\n461\t- `agh tool search \u003cquery\u003e -o json`\n462\t- `agh tool info \u003ctool-id\u003e -o json`\n463\t- `agh tool invoke \u003ctool-id\u003e --input \u003cjson\u003e -o json`\n464\t- `agh toolsets list -o json`\n465\t- `agh toolsets info \u003ctoolset-id\u003e -o json`\n466\t- `agh tool mcp --session \u003csession-id\u003e` for the hosted MCP stdio proxy\n467\t\n468\t## Integration Points\n469\t\n470\t### ACP\n471\t\n472\tACP does not impose a callable tool registry pattern. AGH must not use ACP `ToolCall.title` as a policy identity or dispatch key.\n473\t\n474\tAGH will integrate with ACP by:\n475\t\n476\t- passing the hosted AGH MCP server in ACP `mcpServers` during session creation/load when the selected agent supports MCP;\n477\t- preserving ACP tool-call observations as session events keyed by `toolCallId`;\n478\t- mapping AGH-owned tool calls back into ACP lifecycle updates where relevant;\n479\t- treating ACP `ToolKind` as risk/display metadata, not identity;\n480\t- keeping `permissions.mode` as the system/session approval ceiling.\n481\t\n482\tCurrent-state caveat: `internal/acp.toSDKMCPServers` currently emits stdio-only `acpsdk.McpServer` values. MVP registry work must keep hosted AGH MCP as a stdio-only injected server and must not imply remote MCP HTTP/SSE ACP parity until a later implementation adds tested HTTP/SSE conversion, redacted Authorization/header handling, and provider capability checks.\n483\t\n484\t### Hosted MCP\n485\t\n486\tMVP exposure path:\n487\t\n488\t```text\n489\tagent session -\u003e ACP mcpServers -\u003e agh tool mcp --session \u003cid\u003e -\u003e UDS -\u003e daemon Tool Registry\n490\t```\n491\t\n492\tThe hosted MCP server lists only session-callable tools. It exposes MCP `Tool.name` equal to AGH canonical `ToolID`. It does not expose unavailable, unauthorized, or conflicted tools to the model-visible surface.\n493\t\n494\tHosted MCP authentication:\n495\t\n496\t- On session creation/load, the daemon mints a single-use session-bound `hosted_mcp_bind_token`. The daemon stores only the token hash and associates it with `session_id`, `workspace_id`, expiry, and proxy state.\n497\t- The raw `hosted_mcp_bind_token` is passed to `agh tool mcp` through a reserved private process environment variable controlled by the spawned MCP server configuration, never through argv and never persisted in config, resource stores, session metadata, lineage, events, or tool descriptors. AGH redaction treats this token like `claim_token`.\n498\t- At startup, `agh tool mcp` performs a UDS bind RPC with `session_id` and the raw `hosted_mcp_bind_token` before it can list or call tools.\n499\t- The daemon compares the `hosted_mcp_bind_token` hash, binds the UDS connection to exactly one session/workspace projection, and rejects any later client-supplied `session_id` or `workspace_id`.\n500\t- The `hosted_mcp_bind_token` is invalidated on first successful bind, session end, proxy disconnect, or TTL expiry, whichever happens first.\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:59.783351Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:19:59.783617Z","update":{"kind":"tool_call_started","toolCallId":"toolu_018RiWsgpUs13TLq5PZWUU1H","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_018RiWsgpUs13TLq5PZWUU1H","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:00.003429Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_018RiWsgpUs13TLq5PZWUU1H","blocks":[{"type":"tool_use","id":"toolu_018RiWsgpUs13TLq5PZWUU1H","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md (500 - 999)","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md","limit":500,"offset":500}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:00.056227Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:00.323379Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_018RiWsgpUs13TLq5PZWUU1H","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:00.335614Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_018RiWsgpUs13TLq5PZWUU1H","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_018RiWsgpUs13TLq5PZWUU1H","content":"```\n500\t- The `hosted_mcp_bind_token` is invalidated on first successful bind, session end, proxy disconnect, or TTL expiry, whichever happens first.\n501\t- A foreign process calling `agh tool mcp --session \u003cid\u003e` without a valid `hosted_mcp_bind_token` receives a deterministic permission error and no tool projection.\n502\t- Redaction tests must cover the reserved bind-token env key/value across ACP payload diagnostics, process diagnostics, crash bundles, logs, SSE/events, settings output, tool results, and MCP responses.\n503\t\n504\tHosted MCP approval bridge:\n505\t\n506\t- Hosted MCP projections include only tools that are callable without a new approval prompt or tools whose session has a live daemon-mediated approval channel.\n507\t- When `EffectiveToolDecision.approval_required=true` and ACP `session/request_permission` is available, `Registry.Call` issues the ACP permission request and blocks the MCP `tools/call` response until approved, denied, or timed out.\n508\t- When no approval channel is available, hosted MCP hides the tool from `tools/list` if that can be determined during projection. If a call still reaches dispatch, it returns `ErrToolApprovalRequired` with reason codes `approval_required` and `approval_unreachable`.\n509\t- Hosted MCP cannot satisfy approval using client-supplied arguments. CLI/HTTP/UDS may use `approval_token`; hosted MCP must use the daemon approval bridge.\n510\t\n511\tHosted MCP lifecycle:\n512\t\n513\t- The stdio proxy is spawned by the ACP runtime from AGH-provided `mcpServers` config and is scoped to one AGH session.\n514\t- The proxy exits when stdio closes, when the session stops, or when the bind token expires before successful bind.\n515\t- On ACP `session/load`, the daemon mints a fresh bind token and provides a fresh hosted MCP entry for that resumed session.\n516\t- The proxy never accepts a client-supplied workspace id. The daemon derives workspace id from the bound session at projection time and dispatch time.\n517\t\n518\t### Existing MCP Config And Auth\n519\t\n520\tAGH already has an MCP server configuration and remote-auth subsystem. The Tool Registry must consume those surfaces instead of defining a parallel MCP model:\n521\t\n522\t- `internal/config/provider.go` defines `MCPServer`, `MCPServerTransport` (`stdio`, `http`, `sse`), and `MCPAuthConfig` for OAuth 2.1 + PKCE metadata/client settings. Token material is explicitly outside config.\n523\t- `internal/config/mcpjson.go` loads `mcp.json` sidecars using `mcpServers` or `mcp_servers`.\n524\t- `internal/config/mcp_resource.go` validates `mcp_server` desired-state resources.\n525\t- `internal/mcp/auth` owns metadata discovery, PKCE state, authorization-code exchange, refresh, redacted status, logout/revocation, and `StatusValue`.\n526\t- `internal/store/globaldb/global_db_mcp_auth.go` persists remote MCP OAuth tokens behind `mcpauth.TokenStore` with encryption/redaction boundaries.\n527\t- `internal/cli/mcp_auth.go` provides the existing agent-operable commands `agh mcp auth login`, `agh mcp auth status`, and `agh mcp auth logout`.\n528\t- `internal/settings` and `internal/api/contract/settings.go` already expose redacted MCP auth status for settings surfaces.\n529\t\n530\tThe hosted MCP bind token described above is not a remote MCP OAuth token. It is a daemon-minted session credential for AGH's local stdio proxy. Remote MCP OAuth tokens remain owned by `internal/mcp/auth` and `globaldb`. Registry descriptors, events, tool results, and MCP proxy arguments must never mix those credentials or reuse one lifecycle for the other.\n531\t\n532\tExternal MCP-backed tool availability must derive auth diagnostics from the existing auth service:\n533\t\n534\t| `internal/mcp/auth.StatusValue` | Registry reason code | Session projection behavior |\n535\t|---|---|---|\n536\t| `unconfigured` | `mcp_auth_unconfigured` | Hide external MCP tools unless the server is public and executable support exists |\n537\t| `needs_login` | `mcp_auth_required` | Hide from model-visible projection; operator view points to `agh mcp auth login \u003cserver\u003e` |\n538\t| `authenticated` | none | Auth does not block availability; calls may proceed only through the daemon-owned MCP adapter after registry policy passes |\n539\t| `expired` | `mcp_auth_expired` | Hide from model-visible projection; operator view points to `agh mcp auth status --refresh \u003cserver\u003e` |\n540\t| `invalid` | `mcp_auth_invalid` | Hide from model-visible projection; operator view points to logout/login repair |\n541\t\n542\tImplementation correction required before the registry consumes MCP resource catalogs: current `internal/daemon/tool_mcp_resources.go` clones MCP server records through `cloneDaemonMCPServer`, which preserves only `Name`, `Command`, `Args`, and `Env`. Registry work that depends on remote MCP resources must update that clone path and its tests to preserve `Transport`, `URL`, and `Auth`; otherwise remote MCP auth metadata will be silently dropped from tool diagnostics.\n543\t\n544\t### Extensions\n545\t\n546\tExtension-installed tools are possible and should be first-class.\n547\t\n548\tCurrent foundation already has:\n549\t\n550\t- `extension.toml` `resources.tools`\n551\t- `resources.publish.families = [\"tools\"]`\n552\t- resource projection into daemon tool records\n553\t- extension health/status infrastructure\n554\t- Host API capability checks\n555\t\n556\tThis TechSpec extends that by adding manifest-authoritative backend metadata, runtime reconciliation, and executable out-of-process handlers:\n557\t\n558\t```toml\n559\t[resources.tools.search]\n560\tid = \"ext__linear__search\"\n561\tdescription = \"Search Linear issues\"\n562\tread_only = true\n563\tbackend.kind = \"extension_host\"\n564\tbackend.handler = \"search\"\n565\ttoolsets = [\"linear__read\"]\n566\t```\n567\t\n568\tTypeScript extensions define the matching runtime handler through `@agh/extension-sdk`:\n569\t\n570\t```ts\n571\textension.tool(\"search\", {\n572\t readOnly: true,\n573\t inputSchema: z.object({ query: z.string() }),\n574\t}, async ({ input, context }) =\u003e {\n575\t return { content: [{ type: \"text\", text: await searchLinear(input.query, context) }] };\n576\t});\n577\t```\n578\t\n579\tGo extensions define the same handler through the public Go extension SDK:\n580\t\n581\t```go\n582\text.Tool(\"search\", aghsdk.ToolOptions{\n583\t\tReadOnly: true,\n584\t\tInputSchema: searchInputSchema,\n585\t}, func(ctx context.Context, req aghsdk.ToolRequest[SearchInput]) (aghsdk.ToolResult, error) {\n586\t\treturn searchLinear(ctx, req.Input)\n587\t})\n588\t```\n589\t\n590\tExtensions must not freely impersonate `agh__*` or another extension namespace. Raw manifest names remain in `SourceRef`. `extension.toml` is the source of truth; SDK registration is runtime proof that the live extension process implements the manifest-declared handler and compatible schemas.\n591\t\n592\tExecution boundaries:\n593\t\n594\t- `native_go`: full MVP dispatch through in-process daemon code compiled with AGH.\n595\t- `extension_host`: full MVP dispatch through out-of-process extension runtime, `tool.provider`, `provide_tools`, and `tools/call`.\n596\t- `mcp`: full MVP dispatch through daemon-owned MCP client adapters using existing MCP config/auth.\n597\t- `subprocess`: not a separate public backend kind; TypeScript and Go extension SDKs use the existing subprocess runtime behind `extension_host`.\n598\t- `bridge`: future bridge adapter, rejected by MVP validation unless a later TechSpec enables it.\n599\t\n600\tNo in-process third-party extension handlers in MVP. A Go function authored by an extension runs in the extension's subprocess binary through the Go SDK, not inside the daemon.\n601\t\n602\t### MCP Sources\n603\t\n604\tMCP-backed tools use:\n605\t\n606\t```text\n607\tmcp__\u003cserver\u003e__\u003ctool\u003e\n608\t```\n609\t\n610\tThe registry must preserve raw server/tool names in `SourceRef`. Sanitization collisions fail closed and mark the candidate tool `conflicted`.\n611\t\n612\tAGH-managed MCP sources in MVP are the existing validated projections, not raw file scans:\n613\t\n614\t- top-level and workspace MCP config plus global/workspace `mcp.json` sidecars;\n615\t- provider and agent MCP server declarations;\n616\t- skill MCP declarations resolved by `internal/skills.MCPResolver`, including the existing `allowed_marketplace_mcp` trust gate;\n617\t- extension `resources.mcp_servers` records resolved by `internal/extension/resource_publication.go`;\n618\t- future client-supplied ACP `mcpServers` only if a later TechSpec makes them session-scoped tool sources with explicit source trust and collision handling.\n619\t\n620\tTop-level, provider, agent, and `mcp.json` declarations use `aghconfig.MCPServer` and may carry remote `transport`, `url`, and `auth` fields. Current skill and extension MCP declarations are stdio subprocess declarations with `name`, `command`, `args`, and `env`; registry work must not infer remote OAuth support from them until their manifests are explicitly extended.\n621\t\n622\tExternal MCP descriptor discovery must consume existing `aghconfig.MCPServer` config/resource projections and `internal/mcp/auth` redacted status. It must not read raw config files directly, bypass strict `mcp.json` decoding, bypass skill sidecar symlink hardening, bypass marketplace MCP consent, or bypass extension resource grants.\n623\t\n624\tExternal MCP-backed tools are executable in the MVP only through daemon-owned MCP client adapters. They are session-callable when descriptor discovery succeeds, the source is explicitly allowed, existing MCP auth status is usable, the registry policy/approval/session/hook gates pass, and dispatch can call the remote MCP server without exposing token material. Operator surfaces still show unavailable MCP tools with deterministic diagnostics when health, auth, source, policy, collision, or schema checks fail.\n625\t\n626\t### Hooks\n627\t\n628\tExisting tool hook concepts become part of central registry dispatch:\n629\t\n630\t- `tool.pre_call`\n631\t- `tool.post_call`\n632\t- `tool.post_error`\n633\t\n634\tPayloads should use canonical `tool_id`. Existing `tool_name` / `tool_namespace` usage should be replaced in the registry path to avoid dual identity.\n635\t\n636\t### Skills\n637\t\n638\tBuilt-in skill tools prove progressive disclosure:\n639\t\n640\t- `agh__skill_list`\n641\t- `agh__skill_search`\n642\t- `agh__skill_view`\n643\t\n644\tThese call into `internal/skills.Registry`, respect workspace overlays, reuse content verification, and enforce result budgets. Install/remove/update tools are intentionally out of MVP unless supply-chain policy/scanning is expanded.\n645\t\n646\t`agh__skill_view` applies registry result budgeting. If content exceeds the descriptor limit, the result returns `truncated=true`, a typed `next_offset`, and an artifact/reference strategy rather than silently dropping content.\n647\t\n648\t### Network And Tasks\n649\t\n650\tMVP network tools:\n651\t\n652\t| ToolID | Read-only | Destructive | Open-world | Authority route |\n653\t|---|---:|---:|---:|---|\n654\t| `agh__network_peers` | true | false | false | Existing network peer/list service |\n655\t| `agh__network_send` | false | false | true | Existing network send service with channel/session policy checks |\n656\t\n657\tMVP task tools:\n658\t\n659\t| ToolID | Read-only | Destructive | Open-world | Authority route |\n660\t|---|---:|---:|---:|---|\n661\t| `agh__task_list` | true | false | false | `task.Service.ListTasks` |\n662\t| `agh__task_read` | true | false | false | `task.Service.GetTask` |\n663\t| `agh__task_create` | false | false | false | `task.Service.CreateTask` |\n664\t| `agh__task_child_create` | false | false | false | `task.Service.CreateChildTask` |\n665\t| `agh__task_update` | false | false | false | `task.Service.UpdateTask` |\n666\t| `agh__task_cancel` | false | true | false | `task.Service.CancelTask` |\n667\t| `agh__task_run_list` | true | false | false | `task.Service.ListTaskRuns` |\n668\t\n669\tExcluded task tools:\n670\t\n671\t- `agh__task_claim`\n672\t- `agh__task_release`\n673\t- `agh__task_complete`\n674\t- `agh__task_fail`\n675\t- `agh__task_run_start`\n676\t- `agh__task_run_complete`\n677\t- `agh__task_run_cancel`\n678\t\n679\tThose excluded tools cross claim/lease/session lifecycle authority. They require a separate task-execution TechSpec because `task.Service.ClaimNextRun`, `Spawn`, and session manager terminal-state transitions are authoritative primitives and must not be wrapped by generic agent-callable tools.\n680\t\n681\tAll network and task tools must route through existing network/task services and existing authorization rules. Mutating tools must not be classified as read-only.\n682\t\n683\t## Extensibility Integration Plan\n684\t\n685\t### Extension Manifests\n686\t\n687\tUpdate extension manifest tool declarations with runtime metadata:\n688\t\n689\t- `id`\n690\t- `backend.kind`\n691\t- `backend.handler`\n692\t- `backend.server`\n693\t- `backend.tool`\n694\t- `requires_env`\n695\t- `required_capabilities`\n696\t- `risk`\n697\t- `destructive`\n698\t- `open_world`\n699\t- `requires_interaction`\n700\t- `max_result_bytes`\n701\t- `toolsets`\n702\t- `tags`\n703\t- `visibility`\n704\t\n705\tExtension-published descriptors are installed through existing resource publication, then normalized by the registry provider. For `extension_host` tools, the extension process must advertise `tool.provider`, expose `provide_tools`, and implement `tools/call`. The daemon marks the tool executable only when the runtime descriptor matches the manifest-authoritative `id`, handler name, schema digests, and risk flags.\n706\t\n707\tExtension-published MCP servers currently use `resources.mcp_servers` with stdio-only `command`, `args`, and `env` fields through `internal/extension/resource_publication.go`. This TechSpec does not add remote OAuth fields to extension MCP server declarations in MVP. If a future extension wants to publish remote authenticated MCP servers, that future TechSpec must extend the extension manifest schema to mirror `aghconfig.MCPServer` transport/auth fields and reuse `internal/mcp/auth`; it must not introduce extension-local token storage.\n708\t\n709\tFor `mcp` extension tool descriptors, `backend.server` must resolve to an existing authorized MCP server source in the same extension/config scope, and `backend.tool` must match a discovered MCP tool. Missing, unauthorized, unhealthy, unauthenticated, or conflicted backend servers keep the tool operator-visible but unavailable with deterministic reason codes. A missing or unauthorized backend server cannot make the tool session-callable.\n710\t\n711\tFor `extension_host` descriptors, `backend.handler` must match a runtime handler exposed by the extension SDK. Missing handlers, schema digest mismatches, risk flag mismatches, inactive extensions, or missing `tool.provider` grants keep the tool operator-visible but unavailable with `extension_runtime_mismatch`, `extension_capability_missing`, or `extension_inactive`.\n712\t\n713\t### Hooks\n714\t\n715\tAdd or update hook payload schemas to include:\n716\t\n717\t- `tool_id`\n718\t- `display_title`\n719\t- `source`\n720\t- `risk`\n721\t- `read_only`\n722\t- `destructive`\n723\t- `open_world`\n724\t- `session_id`\n725\t- `workspace_id`\n726\t- `decision`\n727\t- `reason_codes`\n728\t- `input_digest`\n729\t- `result_digest`\n730\t\n731\tHooks can deny or patch only through typed return contracts. They cannot raise permissions above ACP/session policy.\n732\t\n733\t### Skills, Tools, Resources, Bundles\n734\t\n735\tAdd toolsets as named resources/config entries:\n736\t\n737\t- built-in `agh__bootstrap`\n738\t- built-in `agh__catalog`\n739\t- built-in `agh__coordination`\n740\t- built-in `agh__tasks`\n741\t- extension-provided toolsets such as `linear__read`\n742\t\n743\tBundles may include toolsets in the future, but must expand to concrete `ToolID`s during session projection to preserve lineage narrowing.\n744\t\n745\tSkill MCP sidecars (`mcp.json`) remain MCP server declarations, not executable registry tools by themselves. Current skill MCP declarations carry only `name`, `command`, `args`, and `env`; they are stdio declarations. The registry may use those declarations as external MCP descriptor sources only after preserving skill sidecar symlink hardening, applying the existing skill trust gate (`allowed_marketplace_mcp`), and adding remote auth diagnostics from `internal/mcp/auth` only when the underlying source is an auth-capable `aghconfig.MCPServer`.\n746\t\n747\t### Bridge SDKs\n748\t\n749\tNo direct bridge SDK execution in MVP. The registry design reserves a `bridge` backend kind, but the adapter is not required until bridge-managed tools need execution.\n750\t\n751\t### AGH Network\n752\t\n753\tNo remote peer tool execution in MVP. Peer discovery may later advertise loaded toolsets or tool summaries, but remote dispatch requires a separate trust and authorization design.\n754\t\n755\t### Docs For Extension Authors\n756\t\n757\tAdd docs covering:\n758\t\n759\t- canonical `ToolID` rules;\n760\t- extension tool manifest shape;\n761\t- TypeScript `extension.tool(...)` authoring;\n762\t- Go SDK function-based tool authoring;\n763\t- backend kinds;\n764\t- why third-party handlers are out-of-process;\n765\t- manifest/runtime reconciliation failures and how to debug `provide_tools`;\n766\t- result budgets and redaction;\n767\t- availability reason codes;\n768\t- how to debug conflicted/unavailable tools;\n769\t- CLI/HTTP/UDS management paths.\n770\t\n771\t## Agent Manageability Plan\n772\t\n773\tAgents must be able to inspect and operate the registry without the web UI.\n774\t\n775\tCLI:\n776\t\n777\t- `agh tool list -o json`\n778\t- `agh tool search \u003cquery\u003e -o json`\n779\t- `agh tool info \u003ctool-id\u003e -o json`\n780\t- `agh tool invoke \u003ctool-id\u003e --input \u003cjson\u003e -o json`\n781\t- `agh toolsets list -o json`\n782\t- `agh toolsets info \u003ctoolset-id\u003e -o json`\n783\t- `agh tool mcp --session \u003cid\u003e`\n784\t- Existing remote MCP auth commands remain the management path for external MCP credentials: `agh mcp auth login \u003cserver\u003e -o json`, `agh mcp auth status [server] -o json`, and `agh mcp auth logout \u003cserver\u003e -o json`.\n785\t\n786\tHTTP and UDS parity:\n787\t\n788\t- Same contract types.\n789\t- Same reason codes.\n790\t- Same redaction.\n791\t- Same policy decisions.\n792\t- UDS is the preferred local machine path for CLI and hosted MCP proxy.\n793\t- Existing MCP settings endpoints remain the management path for server config/status: `GET /api/settings/mcp-servers`, `PUT /api/settings/mcp-servers/:name`, and `DELETE /api/settings/mcp-servers/:name` over HTTP and UDS.\n794\t- Tool registry operator views may embed or link redacted settings `auth_status`; they must not create duplicate MCP auth commands or expose token material.\n795\t\n796\tDiscovery behavior:\n797\t\n798\t- Operator surfaces show all registered tools, including unavailable/unauthorized/conflicted entries.\n799\t- Session/model-visible surfaces show only callable tools for that effective session.\n800\t- Dispatch always recomputes policy and availability even if discovery already hid unsafe tools.\n801\t\n802\tDeterministic errors:\n803\t\n804\t- Errors include `code`, `message`, `tool_id`, `reason_codes`, and redacted structured details.\n805\t- Policy errors must identify the denying layer: `system_permission_mode`, `session_lineage`, `agent_policy`, `registry_policy`, `source_policy`, `availability`, or `hook`.\n806\t- MCP auth errors identify the MCP server name and redacted status/reason code, never token material. Tool registry surfaces may recommend the existing `agh mcp auth ...` repair command, but they do not start OAuth login/logout flows in MVP.\n807\t\n808\tE2E manageability checks:\n809\t\n810\t- CLI list/search/info matches HTTP and UDS for the same workspace/session.\n811\t- Hosted MCP `tools/list` equals `GET /api/sessions/{id}/tools`.\n812\t- Denied tools are visible in operator list but absent from session projection.\n813\t- Extension-installed tool descriptor appears after install and disappears after disable/remove.\n814\t\n815\t## Config Lifecycle\n816\t\n817\t### Global `config.toml`\n818\t\n819\tAdd:\n820\t\n821\t```toml\n822\t[tools]\n823\tenabled = true\n824\thosted_mcp_enabled = true\n825\tdefault_max_result_bytes = 262144\n826\t\n827\t[tools.policy]\n828\texternal_default = \"disabled\"\n829\t```\n830\t\n831\tSemantics:\n832\t\n833\t- `tools.enabled=false` disables AGH-owned registry dispatch and hosted MCP exposure, but operator diagnostics can still show static resources where safe.\n834\t- `hosted_mcp_enabled=true` allows AGH to inject/offer the local hosted MCP proxy for sessions.\n835\t- `default_max_result_bytes` applies when a descriptor does not specify a smaller limit.\n836\t- `external_default=\"disabled\"` means extension/MCP/dynamic executable tools are registered and operator-visible, but not session-callable until enabled by explicit tool, toolset, source-tier, or agent grants. Built-in AGH bootstrap tools remain enabled by default subject to ACP/session policy.\n837\t\n838\tAllowed `external_default` values:\n839\t\n840\t- `disabled`\n841\t- `ask`\n842\t- `enabled`\n843\t\n844\tMVP default is `disabled`.\n845\t\n846\tMutating, destructive, open-world, or interaction-requiring extension/MCP tools cannot become callable through `approve-reads`. They require explicit policy grants by `ToolID`, toolset, source, or agent plus the effective ACP/session ceiling, approval bridge when required, session lineage, and hook revalidation.\n847\t\n848\t### Existing MCP Config And Auth Lifecycle\n849\t\n850\tNo new `config.toml` keys are added for remote MCP OAuth tokens. Existing MCP lifecycle remains authoritative:\n851\t\n852\t- MCP server definitions continue to come from top-level `[mcp_servers]`, provider `[providers.\u003cname\u003e.mcp_servers]`, agent-local `mcp_servers`, global/workspace `mcp.json`, skill sidecars, and extension `resources.mcp_servers`.\n853\t- Remote MCP auth configuration continues to use `MCPAuthConfig` fields (`type`, metadata/issuer/authorization/token/revocation URLs, `client_id`, `client_secret_env`, `scopes`) on remote MCP servers.\n854\t- Access tokens and refresh tokens continue to live only in the `internal/mcp/auth` token store backed by `internal/store/globaldb`; they are not copied into registry config, session lineage, tool descriptors, events, or extension manifests.\n855\t- The registry may read redacted MCP auth status to produce operator diagnostics and availability reason codes, but cannot mutate auth state. Login, refresh, and logout remain `agh mcp auth ...` operations in MVP.\n856\t- The hosted MCP session bind token is ephemeral process/session state, not `config.toml` state and not part of the MCP OAuth token store.\n857\t\n858\t### Agent Definitions\n859\t\n860\tKeep the existing `tools` field but harden its meaning:\n861\t\n862\t- `tools`: exact canonical `ToolID`s or approved wildcard patterns.\n863\t- `toolsets`: named toolset IDs.\n864\t- `deny_tools`: exact IDs or patterns that always narrow permissions.\n865\t\n866\tSession lineage should persist concrete resolved `ToolID` atoms, not broad unresolved wildcard patterns. Child session permissions must remain subsets of parent session permissions.\n867\t\n868\tInvalid existing lineage atoms reject session spawn/load with a typed validation error. Greenfield posture applies: AGH does not silently normalize old atoms, and local databases that predate this TechSpec require a fresh `AGH_HOME` rather than compatibility migration shims.\n869\t\n870\t### Tool Pattern Grammar\n871\t\n872\tAllowed policy pattern forms:\n873\t\n874\t- exact canonical IDs, for example `agh__skill_view`;\n875\t- namespace-prefix wildcards ending in `*`, for example `agh__skill_*` or `mcp__github__*`;\n876\t- toolset IDs in `toolsets`, never in `tools`.\n877\t\n878\tDisallowed forms:\n879\t\n880\t- regular expressions;\n881\t- suffix wildcards such as `*__search`;\n882\t- mid-segment wildcards such as `agh__*__view`;\n883\t- uppercase, dots, hyphens, or empty segments;\n884\t- wildcard forms that would match across a reserved `__` boundary ambiguously.\n885\t\n886\tPattern matching runs against canonical `ToolID` only. Display titles, raw MCP tool names, and extension manifest names do not participate in policy matching.\n887\t\n888\t### Validation\n889\t\n890\tConfig validation must reject:\n891\t\n892\t- invalid `ToolID` patterns;\n893\t- unknown toolset IDs when a config is resolved in a concrete workspace;\n894\t- `__` misuse;\n895\t- extension attempts to publish under reserved `agh__*`;\n896\t- global defaults that would expose external tools without source policy support;\n897\t- result byte limits below zero or above a daemon maximum.\n898\t\n899\t### Docs And Generated Surfaces\n900\t\n901\tUpdate:\n902\t\n903\t- CLI docs for `agh tool` and `agh toolsets`;\n904\t- existing MCP auth CLI docs when registry diagnostics reference `agh mcp auth ...`;\n905\t- settings docs for `permissions.mode` to clarify ceiling behavior;\n906\t- settings MCP server docs for redacted `auth_status` reuse in tool diagnostics;\n907\t- extension author docs;\n908\t- site docs for Tool Registry architecture;\n909\t- OpenAPI contract and generated web types.\n910\t\n911\t## Impact Analysis\n912\t\n913\t| Component | Impact Type | Description and Risk | Required Action |\n914\t|---|---|---|---|\n915\t| `internal/tools` | Modified/new | Becomes runtime registry owner, not just metadata definitions | Add `ToolID`, descriptors, providers, registry, policy, dispatch |\n916\t| `internal/config` | Modified/consumed | Existing `MCPServer` transport/auth config is the source of truth for MCP resources | Preserve `transport`, `url`, and `auth`; do not move OAuth config under `[tools]` |\n917\t| `internal/resources` | Modified | Cold tool resource remains desired state but must carry canonical ID/source metadata | Update codecs, validators, tests |\n918\t| `internal/extension` | Modified | Extension tools gain backend metadata, manifest/runtime reconciliation, and executable out-of-process invocation | Extend manifest types, protocol capabilities, `provide_tools`, `tools/call`, validation, lifecycle, and publication tests |\n919\t| `internal/mcp` | Modified/new | Hosted MCP proxy exposes registry tools; MCP adapter normalizes and executes external tools | Add MCP list/call bridge through UDS/registry and daemon-owned remote MCP client call-through |\n920\t| `internal/mcp/auth` | Consumed | Existing remote MCP OAuth/PKCE status drives external MCP availability diagnostics | Inject redacted status provider; do not duplicate token store or OAuth flows |\n921\t| `internal/acp` | Modified | Session creation/load must include hosted AGH MCP where applicable; permission mode becomes registry ceiling; current MCP conversion is stdio-only | Wire session projection, keep hosted MCP stdio-only in MVP, and avoid implying remote HTTP/SSE ACP parity |\n922\t| `internal/store` | Modified | Session lineage `Tools` atoms become canonical resolved `ToolID`s | Validate IDs and preserve subset checks |\n923\t| `internal/hooks` | Modified | Tool hook payloads should use canonical `tool_id` | Update payloads, matchers, docs, tests |\n924\t| `internal/api/contract` | New/modified | Shared DTOs for tools/toolsets/calls/errors | Add contract types and codegen |\n925\t| `internal/api/core` | New/modified | Transport-independent tool handlers | Implement list/search/info/invoke/session projection |\n926\t| `internal/api/httpapi` | Modified | Register HTTP routes | Thin transport registration only |\n927\t| `internal/api/udsapi` | Modified | Register UDS routes | Thin transport registration only |\n928\t| `internal/cli` | Modified/consumed | Agent-manageable `agh tool` and `agh toolsets` commands; existing `agh mcp auth` remains the MCP credential path | Add structured output and UDS client methods; link diagnostics to existing auth commands |\n929\t| `internal/settings` | Consumed | Existing MCP server list includes redacted `auth_status` | Reuse status shape for operator diagnostics; no duplicate settings status model |\n930\t| `internal/skills` | Modified | Skill list/search/view tools call into skills registry; skill MCP sidecars may inform external MCP descriptor sources | Preserve skill sidecar symlink hardening and `allowed_marketplace_mcp` trust filtering |\n931\t| `internal/network` | Modified | Network tools call peers/send through existing service | Ensure mutating calls enforce policy |\n932\t| `internal/task` | Modified | Bounded task tools call task service | Keep TaskManager authority model intact |\n933\t| `sdk/typescript` | Modified | Extension authors define tools using TypeScript functions | Add `extension.tool(...)`, schema digesting, `provide_tools`, and `tools/call` handler support |\n934\t| `sdk/go` | New | Extension authors define tools using Go functions in subprocess extensions | Add public Go extension SDK mirroring TypeScript tool-provider APIs |\n935\t| `sdk/create-extension` | Modified | Templates should scaffold executable tool providers | Add TypeScript and Go tool-extension templates plus manifest examples |\n936\t| `web/` | Modified | Settings/help surfaces may display registry policy state, tool diagnostics, and existing redacted MCP `auth_status` | Only render truthful daemon-backed status; no invented remote login controls |\n937\t| `packages/site` | Modified | Public docs for registry, extension tools, approval ceiling | Add docs and CLI reference updates |\n938\t| `.compozy/tasks/tools-registry/*` | New | Research, ADRs, final TechSpec | Keep analysis paths referenced in later tasks |\n939\t\n940\t## Test Strategy\n941\t\n942\t### Unit Tests\n943\t\n944\tTest:\n945\t\n946\t- `ToolID` validation, parsing, wildcard matching, and collision rejection.\n947\t- External name sanitization and fail-closed collision behavior.\n948\t- `Descriptor` validation and schema size limits.\n949\t- Availability state transitions and reason-code composition.\n950\t- Policy matrix across `deny-all`, `approve-reads`, and `approve-all`.\n951\t- Source defaults for built-in, extension, MCP, and dynamic tools.\n952\t- Agent allow/deny/toolset expansion.\n953\t- Session lineage concrete `ToolID` subset validation.\n954\t- Dispatch pipeline ordering.\n955\t- JSON schema input validation.\n956\t- Result truncation and redaction.\n957\t- Hook deny/patch/result behavior.\n958\t- Extension manifest backend validation.\n959\t- MCP auth status mapping from `internal/mcp/auth.StatusValue` to registry availability reason codes.\n960\t- MCP server resource cloning/projection preserves `Transport`, `URL`, and `Auth` when remote MCP resources flow into registry diagnostics.\n961\t\n962\tMocks are acceptable for provider I/O boundaries, but policy/dispatch correctness must be tested with real registry instances.\n963\t\n964\t### Integration Tests\n965\t\n966\tTest:\n967\t\n968\t- Extension manifest declares a tool and it appears in operator registry projection.\n969\t- Disabling/removing an extension removes or marks the tool unavailable.\n970\t- TypeScript extension declares a manifest-authoritative `extension_host` tool, registers a matching SDK function, and dispatch succeeds through `Registry.Call`.\n971\t- Go extension declares a manifest-authoritative `extension_host` tool, registers a matching SDK function, and dispatch succeeds through `Registry.Call`.\n972\t- Extension runtime descriptor mismatches for handler, schema digest, risk flags, or missing `tool.provider` grant keep the tool operator-visible and session-hidden with deterministic reason codes.\n973\t- MCP-backed extension tool resolves to an authorized MCP source and dispatch succeeds through the daemon-owned MCP adapter when auth, source policy, approval, hooks, and session lineage pass.\n974\t- MCP-backed extension tool remains operator-visible but unavailable when its backend server is missing, unauthorized, unhealthy, unauthenticated, or conflicted.\n975\t- Remote MCP server with `needs_login`, `expired`, and `invalid` auth statuses appears only in operator diagnostics with redacted `MCPAuthStatus` and is hidden from session/model projections.\n976\t- Existing `agh mcp auth status --refresh \u003cserver\u003e -o json` changes operator diagnostics without requiring a registry-owned OAuth flow.\n977\t- `agh tool info \u003cmcp-tool\u003e` and `GET /api/tools/{id}` show redacted MCP auth diagnostics that match `/api/settings/mcp-servers` `auth_status` for the same server.\n978\t- Remote OAuth token values never appear in tool CLI/API/UDS/MCP responses, SSE payloads, event payloads, logs, or process diagnostics.\n979\t- Hosted MCP bind tokens never appear in ACP diagnostics, settings output, process/crash output, logs, SSE/events, tool results, or MCP responses.\n980\t- Remote MCP configs are not converted to blank stdio ACP servers; hosted-session injection remains the AGH stdio proxy while remote MCP calls happen inside the daemon MCP adapter.\n981\t- Built-in `agh__skill_view` calls real skills registry content.\n982\t- CLI/HTTP/UDS list/search/info parity.\n983\t- `agh tool mcp --session \u003cid\u003e` `tools/list` matches session projection.\n984\t- Hosted MCP rejects a proxy bind without the session-bound token.\n985\t- Hosted MCP derives workspace id from session id and rejects client-supplied workspace context.\n986\t- Hosted MCP routes approval-required calls through ACP `session/request_permission` when available and fails closed with `approval_unreachable` when unavailable.\n987\t- `approve-reads` exposes read-only tools but blocks mutating tools without approval.\n988\t- Mutating, destructive, and open-world extension/MCP tools execute only with explicit `ToolID`/toolset/source/agent grants plus ACP ceiling, approval bridge, session lineage, and hook revalidation.\n989\t- `approve-all` does not bypass explicit registry denies or session lineage narrowing.\n990\t- `deny-all` blocks execution while operator surfaces still show diagnostics.\n991\t- The concrete MVP task tools call only their listed `task.Service` methods; excluded claim/complete/release tools are absent.\n992\t- Hooks can deny pre-call and redact post-call output.\n993\t- Conflicted tool IDs are operator-visible and session-hidden.\n994\t- Result budget truncation is identical across CLI, HTTP, UDS, and MCP.\n995\t\n996\t### E2E Tests\n997\t\n998\tUse the AGH runtime E2E harness:\n999\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:02.80173Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:02.802205Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01KLiXoXdmWTBkiT8JzTsRmT","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01KLiXoXdmWTBkiT8JzTsRmT","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:03.968435Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01KLiXoXdmWTBkiT8JzTsRmT","blocks":[{"type":"tool_use","id":"toolu_01KLiXoXdmWTBkiT8JzTsRmT","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md (1000 - 1499)","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md","limit":500,"offset":1000}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:04.111453Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:04.113982Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01KLiXoXdmWTBkiT8JzTsRmT","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:04.115906Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01KLiXoXdmWTBkiT8JzTsRmT","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01KLiXoXdmWTBkiT8JzTsRmT","content":"```\n1000\t- Start daemon with isolated `AGH_HOME`.\n1001\t- Create a session with hosted AGH MCP enabled.\n1002\t- Verify the agent session receives registry-backed MCP tools.\n1003\t- Invoke a safe built-in tool through the hosted MCP path.\n1004\t- Verify events, telemetry, CLI status, HTTP status, and UDS status agree.\n1005\t- Install a test extension publishing a tool descriptor and MCP backend metadata.\n1006\t- Install TypeScript and Go test extensions publishing executable `extension_host` tools.\n1007\t- Verify operator diagnostics, runtime reconciliation, session visibility, successful invocation, disable/remove lifecycle, and conflict handling.\n1008\t- Configure a local MCP test server and verify descriptor discovery plus a real `tools/call` through the daemon MCP adapter.\n1009\t- Configure a remote OAuth-backed MCP server in isolated `AGH_HOME`, exercise `agh mcp auth login/status/logout` against a local OAuth test server, and verify registry tool diagnostics never expose access tokens, refresh tokens, authorization codes, PKCE verifiers, or hosted MCP bind tokens.\n1010\t\n1011\tRun full `make verify` before completing implementation tasks.\n1012\t\n1013\t## Implementation Steps\n1014\t\n1015\t### Build Order\n1016\t\n1017\tOrdered implementation sequence respecting dependencies:\n1018\t\n1019\t1. Add canonical `ToolID`, descriptor, backend kind, source, availability, result, and error contracts in `internal/tools` - no dependencies.\n1020\t2. Replace metadata-only provider semantics with executable runtime provider/handle interfaces - depends on step 1.\n1021\t3. Implement registry indexing, collision detection, MCP auth diagnostic mapping, and operator/session projections - depends on steps 1-2 and existing `internal/mcp/auth`.\n1022\t4. Implement policy evaluator with ACP ceiling, agent policy, session lineage, source defaults, allow/deny, and toolsets - depends on step 3.\n1023\t5. Implement dispatch pipeline with schema validation, availability recheck, hooks, budgets, handle call, normalization, and telemetry seams - depends on steps 3-4.\n1024\t6. Add built-in provider for bootstrap AGH tools: `agh__tool_*`, `agh__skill_*`, `agh__network_*`, and only the enumerated MVP task tools - depends on step 5.\n1025\t7. Add extension manifest backend metadata, manifest-authoritative validation, and runtime reconciliation contracts - depends on steps 1-3 and existing extension resources.\n1026\t8. Add extension protocol capability `tool.provider`, `provide_tools`, and `tools/call` invocation through the existing subprocess manager - depends on steps 5 and 7.\n1027\t9. Add TypeScript SDK `extension.tool(...)`, schema digesting, and handler registration - depends on step 8.\n1028\t10. Add public Go extension SDK with function-based tool helpers equivalent to TypeScript SDK - depends on step 8.\n1029\t11. Add daemon-owned MCP descriptor discovery and `tools/call` call-through adapter using existing MCP config/auth and token redaction boundaries - depends on steps 3-5 and existing `internal/mcp/auth`.\n1030\t12. Add hosted MCP stdio proxy command `agh tool mcp --session`, session-bound token bind, approval bridge, and existing MCP resource/auth preservation - depends on steps 3-6 and 11.\n1031\t13. Add API contract DTOs and `internal/api/core` handlers - depends on steps 3-6, 8, and 11.\n1032\t14. Wire HTTP, UDS, CLI commands, and UDS client methods - depends on step 13.\n1033\t15. Wire hooks and canonical `tool_id` payload updates end-to-end, including typed hook payloads, matchers, fixture builders, extension-author docs, and no dual identity mid-PR - depends on step 5.\n1034\t16. Add config lifecycle, validation, generated docs, CLI docs, site docs, integration/E2E coverage, and run `make verify` - depends on all prior steps.\n1035\t\n1036\t### Technical Dependencies\n1037\t\n1038\tBlocking dependencies that must be resolved before implementation:\n1039\t\n1040\t- Existing ACP `permissions.mode` behavior in `internal/acp/permission.go`.\n1041\t- Existing session lineage permission atoms in `internal/store/session_lineage.go`.\n1042\t- Existing extension resource publication and `resources.publish.families = [\"tools\"]`.\n1043\t- Existing extension subprocess lifecycle, JSON-RPC `process.Call`, Host API capability checks, and TypeScript `Extension.handle(...)` handler pattern.\n1044\t- Existing hooks payload system.\n1045\t- Existing MCP server configuration/resource lifecycle in `internal/config/provider.go`, `internal/config/mcpjson.go`, `internal/config/mcp_resource.go`, `internal/skills/mcp.go`, and `internal/extension/resource_publication.go`.\n1046\t- Existing MCP auth lifecycle in `internal/mcp/auth`, `internal/store/globaldb/global_db_mcp_auth.go`, `internal/cli/mcp_auth.go`, `internal/settings`, and `internal/daemon/settings.go`.\n1047\t- Existing skills registry APIs.\n1048\t- Existing task/network services.\n1049\t- OpenAPI/codegen path for web contract updates.\n1050\t\n1051\t### Safety Invariants\n1052\t\n1053\t1. Every AGH-owned tool call enters `internal/tools.Registry.Call`; CLI, HTTP, UDS, hosted MCP, extension, and session paths cannot bypass the registry dispatch pipeline.\n1054\t2. Dispatch recomputes availability and `EffectiveToolDecision` at call time, even when discovery already filtered the tool.\n1055\t3. ACP `permissions.mode` is always a ceiling. Registry, source, agent, session, and hook policy can narrow authority but cannot raise it above the effective ACP/session mode.\n1056\t4. `approve-all` skips approval prompts only for otherwise allowed tools; it does not bypass explicit denies, source grants, session lineage, conflicts, unavailable backends, or hooks.\n1057\t5. `approve-reads` applies only to registry-classified read-only tools. Mutating, destructive, open-world, network-send, and task-write tools cannot inherit read approval by display title or ACP kind.\n1058\t6. Session lineage stores concrete canonical `ToolID` atoms after toolset expansion; child sessions can only receive a subset of parent concrete atoms.\n1059\t7. Tool ID collisions fail closed. A conflicted tool is operator-visible with reason codes and absent from session/model-visible projections.\n1060\t8. Extension-installed tools can become executable in MVP only when the manifest-authoritative descriptor, source policy, extension health, runtime `provide_tools` descriptor, and `tools/call` handler all agree.\n1061\t9. Third-party extension tool handlers never run in-process in the daemon during MVP; TypeScript and Go function handlers run inside supervised extension subprocesses.\n1062\t10. Hooks dispatch at the call site and cannot tail event tables, spawn parallel queues, or mutate durable ownership state outside typed hook contracts.\n1063\t11. Tool result limiting and redaction run before results cross CLI, HTTP, UDS, MCP, SSE, logs, memory, or event payloads.\n1064\t12. Raw `claim_token`, MCP auth tokens, OAuth codes, PKCE verifiers, and secret bindings never appear in tool inputs/outputs persisted or emitted by AGH-owned surfaces.\n1065\t13. Hosted MCP `tools/list` is a projection of `GET /api/sessions/{id}/tools`; divergence is a test failure.\n1066\t14. Operator-visible diagnostics are not security boundaries. Hidden session projection plus dispatch-time revalidation is the security boundary.\n1067\t15. Toolsets are expanded deterministically and cycle-checked before session projection; recursive expansion cannot happen lazily during dispatch.\n1068\t16. Hosted MCP cannot bind to a session without a daemon-minted, single-use, session-bound proxy token. The raw token is redacted like `claim_token`, never logged, never accepted from client tool input, and invalidated on bind/session end/disconnect/TTL.\n1069\t17. Hosted MCP `tools/call` cannot pass an `approval_required` decision unless the daemon completes an ACP/session-mediated approval equivalent to CLI/HTTP/UDS approval semantics.\n1070\t18. No `agh__task_*` tool may bypass `task.Service.ClaimNextRun`, `Spawn`, session manager terminal-state authority, or task lifecycle authority. Claim/release/complete/fail/run-start operations are excluded from MVP tools.\n1071\t19. External `extension_host` and `mcp` backend tools are executable only through their registered runtime handles; missing handlers, missing MCP clients, missing capabilities, source denies, auth failures, or runtime mismatches fail closed before user code or remote tools run.\n1072\t20. Remote MCP OAuth/PKCE credentials are owned only by `internal/mcp/auth` and its `TokenStore`; the registry may consume redacted status and call through a narrow `internal/mcp/auth`-owned interface, but cannot persist, log, refresh, revoke, or copy access/refresh tokens. Raw tokens stay out of descriptors, resources, events, API responses, CLI output, MCP responses, and tool results.\n1073\t21. Hosted MCP bind tokens and remote MCP OAuth tokens have separate issuers, storage, lifetimes, redaction labels, and failure codes. A `hosted_mcp_bind_token` must never satisfy a remote MCP server auth check, and a remote MCP OAuth token must never bind an AGH hosted MCP proxy.\n1074\t\n1075\t## Monitoring and Observability\n1076\t\n1077\tEvents:\n1078\t\n1079\t- `tool.registry.refresh_started`\n1080\t- `tool.registry.refresh_completed`\n1081\t- `tool.registered`\n1082\t- `tool.updated`\n1083\t- `tool.removed`\n1084\t- `tool.conflicted`\n1085\t- `tool.availability_changed`\n1086\t- `tool.policy_evaluated`\n1087\t- `tool.call_started`\n1088\t- `tool.call_completed`\n1089\t- `tool.call_failed`\n1090\t- `tool.call_denied`\n1091\t- `tool.result_truncated`\n1092\t\n1093\tRequired fields:\n1094\t\n1095\t- `tool_id`\n1096\t- `display_title`\n1097\t- `source_kind`\n1098\t- `source_owner`\n1099\t- `workspace_id`\n1100\t- `session_id`\n1101\t- `parent_session_id`\n1102\t- `root_session_id`\n1103\t- `agent_name`\n1104\t- `risk`\n1105\t- `read_only`\n1106\t- `destructive`\n1107\t- `open_world`\n1108\t- `approval_mode`\n1109\t- `decision`\n1110\t- `reason_codes`\n1111\t- `duration_ms`\n1112\t- `result_bytes`\n1113\t- `truncated`\n1114\t- `correlation_id`\n1115\t\n1116\tMetrics:\n1117\t\n1118\t- registered tools by source kind\n1119\t- conflicted tools by source kind\n1120\t- available/callable tools by session\n1121\t- calls started/completed/failed/denied\n1122\t- approval-required counts\n1123\t- result truncation counts\n1124\t- backend latency by source kind\n1125\t- hook-denied counts\n1126\t- policy-denied counts\n1127\t\n1128\tRedaction:\n1129\t\n1130\t- Never log raw claim tokens, MCP auth tokens, OAuth codes, PKCE verifiers, secret bindings, or full tool payloads when marked sensitive.\n1131\t- Use digests for large inputs/results.\n1132\t- Preserve enough metadata to debug policy and availability without exposing secrets.\n1133\t\n1134\t## Technical Considerations\n1135\t\n1136\t### Key Decisions\n1137\t\n1138\tDecision: Tool Registry is an AGH daemon runtime service, not an ACP registry.\n1139\t\n1140\tRationale: ACP has no callable tool registry and relies on MCP for tool discovery.\n1141\t\n1142\tTrade-off: AGH must maintain its own registry semantics and expose them through MCP/session surfaces.\n1143\t\n1144\tDecision: Use one canonical `ToolID` with `__` namespace separators.\n1145\t\n1146\tRationale: Avoid dotted/internal plus wire alias ambiguity and stay compatible with stricter provider naming limits.\n1147\t\n1148\tTrade-off: `agh__skill_view` is less visually elegant than `agh.skill.view`, but it prevents dual identity bugs.\n1149\t\n1150\tDecision: Built-ins execute in-process; TypeScript and Go extension tools execute out-of-process through `extension_host`.\n1151\t\n1152\tRationale: Daemon safety and extension isolation matter more than plugin convenience.\n1153\t\n1154\tTrade-off: Extension tool latency and setup are higher, but failure containment is better and extension authors still get function-based APIs.\n1155\t\n1156\tDecision: Remote MCP-backed tools execute in MVP through daemon-owned MCP clients.\n1157\t\n1158\tRationale: MCP config/auth already exists and the registry must not become useful only for descriptors.\n1159\t\n1160\tTrade-off: The MVP must implement stricter auth redaction and adapter tests, but operators get one coherent tool model.\n1161\t\n1162\tDecision: ACP `permissions.mode` is the approval ceiling.\n1163\t\n1164\tRationale: Avoid contradictory policy layers and keep existing settings truthful.\n1165\t\n1166\tTrade-off: Registry policy must compute effective decisions rather than simple allow/deny flags.\n1167\t\n1168\tDecision: Operator and session projections differ.\n1169\t\n1170\tRationale: Operators need diagnostics; models should see only callable tools.\n1171\t\n1172\tTrade-off: More projection logic, but less model confusion and fewer unsafe calls.\n1173\t\n1174\tDecision: Keep cold resource records separate from runtime handles.\n1175\t\n1176\tRationale: Resource/projector system is good for installed metadata; executable dispatch needs function/protocol handles and live health.\n1177\t\n1178\tTrade-off: More types, but cleaner lifecycle and safer extension support.\n1179\t\n1180\t### Known Risks\n1181\t\n1182\tRisk: Mutating tools mislabeled as read-only.\n1183\t\n1184\tMitigation: Descriptor validation, review tests, policy matrix tests, and hook/audit visibility.\n1185\t\n1186\tRisk: External tool name collisions force late breaking changes.\n1187\t\n1188\tMitigation: Enforce canonical `ToolID` grammar and fail-closed collision handling in MVP.\n1189\t\n1190\tRisk: Hosted MCP path diverges from CLI/HTTP/UDS behavior.\n1191\t\n1192\tMitigation: Hosted MCP proxies through UDS into the same registry dispatch path.\n1193\t\n1194\tRisk: Extension tools become visible before safe runtime execution is confirmed.\n1195\t\n1196\tMitigation: Operator-visible only until manifest/runtime reconciliation, availability, source policy, and backend handle all pass.\n1197\t\n1198\tRisk: `approve-all` is misunderstood as \"execute everything.\"\n1199\t\n1200\tMitigation: Docs and UI copy must clarify it auto-approves otherwise allowed calls; it does not bypass explicit denies, source grants, availability, lineage, or hooks.\n1201\t\n1202\tRisk: Toolsets create ambiguous policy language.\n1203\t\n1204\tMitigation: Keep `tools` and `toolsets` as separate typed fields; expand toolsets to concrete `ToolID`s for session lineage.\n1205\t\n1206\tRisk: Result payloads leak secrets or overwhelm context.\n1207\t\n1208\tMitigation: Central result limiter, redaction metadata, digesting, and output budgets.\n1209\t\n1210\t### Delete Targets\n1211\t\n1212\tBecause AGH is greenfield alpha, the implementation should hard-cut ambiguous old tool concepts instead of adding compatibility bridges:\n1213\t\n1214\t- Replace metadata-only `ToolProvider.Tools(ctx)` as the runtime extension point with provider/descriptor/handle contracts.\n1215\t- Remove descriptor-only MVP wording and `backend_not_implemented` behavior for `extension_host` and `mcp` tools.\n1216\t- Remove any public standalone `subprocess` backend in favor of `extension_host` subprocess isolation.\n1217\t- Replace `internal/tools.Tool.Name` as a registry identity with canonical `ToolID` in new public contracts.\n1218\t- Remove any new public use of dotted tool IDs or separate MCP wire aliases.\n1219\t- Replace hook policy identity based on `tool_name` + `tool_namespace` with canonical `tool_id` for registry-owned tool calls.\n1220\t- Update docs, tests, CLI/API examples, and task artifacts that refer to dotted IDs or dual aliasing.\n1221\t\n1222\t## Architecture Decision Records\n1223\t\n1224\t- [ADR-001: Extension Tool Execution Boundary](adrs/adr-001-extension-tool-execution-boundary.md) - extension tools are manifest-first, executable, and out-of-process in MVP.\n1225\t- [ADR-002: Session Tool Exposure Path](adrs/adr-002-session-tool-exposure-path.md) - expose AGH registry tools through hosted local MCP plus shared CLI/HTTP/UDS.\n1226\t- [ADR-003: Runtime Registry Package Boundary](adrs/adr-003-runtime-registry-package-boundary.md) - `internal/tools` owns runtime registry and dispatch; `internal/catalog` remains thin.\n1227\t- [ADR-004: MVP Native Tool Scope](adrs/adr-004-mvp-native-tool-scope.md) - bootstrap catalog/skill tools plus selected network/task tools.\n1228\t- [ADR-005: ACP Approval Policy Integration](adrs/adr-005-acp-approval-policy-integration.md) - ACP approval mode is the system/session ceiling.\n1229\t- [ADR-006: Tool Visibility By Surface](adrs/adr-006-tool-visibility-by-surface.md) - operator projections show diagnostics; model projections show callable tools only.\n1230\t- [ADR-007: Canonical Tool ID Format](adrs/adr-007-canonical-tool-id-format.md) - one provider-safe `ToolID` using reserved `__` namespace separators.\n1231\t- [ADR-008: Manifest-Authoritative Extension Tool Descriptors](adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md) - `extension.toml` is source of truth and runtime descriptors reconcile against it.\n1232\t- [ADR-009: Public Go Extension Tool SDK](adrs/adr-009-public-go-extension-tool-sdk.md) - Go extensions get function-based subprocess SDK APIs equivalent to TypeScript.\n1233\t- [ADR-010: Remote MCP Call-Through](adrs/adr-010-remote-mcp-call-through.md) - remote MCP tools are executable in MVP through daemon-owned MCP adapters.\n1234\t\n1235\t## Nits\n1236\t\n1237\tPeer review round 1 nits and disposition:\n1238\t\n1239\t- `N-001` ToolsetID grammar: addressed in Data Models by sharing the `ToolID` grammar.\n1240\t- `N-002` Tool pattern grammar: addressed in Config Lifecycle with explicit allowed/disallowed pattern forms.\n1241\t- `N-003` `approval_token` semantics: addressed in API Endpoints and Hosted MCP Approval Bridge.\n1242\t- `N-004` `dynamic` source kind: addressed in Data Models as reserved with no MVP producer.\n1243\t- `N-005` hosted MCP lifecycle: addressed in Hosted MCP lifecycle.\n1244\t- `N-006` `agh__skill_view` result budget: addressed in Integration Points / Skills.\n1245\t- `N-007` hook identity migration co-ship: addressed in Implementation Steps step 11.\n1246\t- `N-008` invalid existing session lineage atoms: addressed in Config Lifecycle / Agent Definitions.\n1247\t- `N-009` hosted MCP workspace resolution: addressed in Hosted MCP lifecycle/authentication.\n1248\t- `N-010` MVP tool risk classification: addressed in Network And Tasks tables.\n1249\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:06.476288Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:06.851593Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01J7dbQWG4JXfkmGchYVHw2G","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01J7dbQWG4JXfkmGchYVHw2G","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:08.096469Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01J7dbQWG4JXfkmGchYVHw2G","blocks":[{"type":"tool_use","id":"toolu_01J7dbQWG4JXfkmGchYVHw2G","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/synthesis.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/synthesis.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/synthesis.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:08.096673Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01A8QJgqi9Y6FRXFsCtnyf7a","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01A8QJgqi9Y6FRXFsCtnyf7a","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:08.677447Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01A8QJgqi9Y6FRXFsCtnyf7a","blocks":[{"type":"tool_use","id":"toolu_01A8QJgqi9Y6FRXFsCtnyf7a","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:08.72652Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:08.737301Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01J7dbQWG4JXfkmGchYVHw2G","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:08.73753Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01A8QJgqi9Y6FRXFsCtnyf7a","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:08.738938Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01J7dbQWG4JXfkmGchYVHw2G","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01J7dbQWG4JXfkmGchYVHw2G","content":"```\n1\t# Tool Registry Synthesis and Proposed Direction\n2\t\n3\t## Executive Summary\n4\t\n5\tAGH should build a Tool Registry as a daemon-owned runtime service that composes tool metadata, availability, policy, execution, hooks, telemetry, and extension adapters.\n6\t\n7\tThe foundation should not be \"add many built-in tools.\" The foundation should be:\n8\t\n9\t1. a first-class runtime tool contract,\n10\t2. a registry that aggregates built-ins, MCP, extensions, and dynamic providers,\n11\t3. a policy/availability projection for each agent/session,\n12\t4. a single dispatch pipeline,\n13\t5. an extension-safe execution boundary,\n14\t6. agent-manageable CLI/HTTP/UDS/session surfaces,\n15\t7. toolsets/bundles comparable to skills.\n16\t\n17\tThe ACP inventory found `.resources/openfang` present, but with no meaningful ACP evidence.\n18\t\n19\t## Recommended Architecture\n20\t\n21\t### Accepted decisions so far\n22\t\n23\t- Extension tool execution boundary: manifest-first descriptors with out-of-process execution only for extension tools in the MVP.\n24\t- Session exposure path: AGH-hosted local MCP server plus shared CLI/HTTP/UDS contracts.\n25\t- Package boundary: `internal/tools` owns runtime registry contracts and dispatch; a thin `internal/catalog` facade composes tools and skills for cross-domain discovery.\n26\t- MVP native tool scope: bootstrap catalog/skill tools plus network and bounded task tools (`agh__tool_*`, `agh__skill_*`, `agh__network_peers`, `agh__network_send`, `agh__task_*`).\n27\t- Policy integration: existing ACP `permissions.mode` is the system/session approval ceiling; registry policy is a granular layer below it and cannot silently grant more authority than ACP policy allows.\n28\t- Visibility by surface: operator surfaces show unavailable/unauthorized/conflicted tools with reason codes; session/model-visible surfaces expose only tools callable in that effective session context.\n29\t- Naming/collision policy: one canonical public `ToolID` uses provider-safe lower snake segments separated by reserved `__`, for example `agh__skill_view` and `mcp__github__create_issue`; this is captured in ADR-007.\n30\t\n31\t### ACP compatibility finding\n32\t\n33\tACP does not define a durable callable-tool registry. It defines session lifecycle, `mcpServers` bootstrap fields, client authority callbacks, permission requests, and observable tool-call events. ACP `ToolCall` records have `toolCallId`, human-readable `title`, coarse `kind`, status, locations, raw input/output, and content, but no programmatic tool `name` equivalent to MCP `Tool.name`.\n34\t\n35\tThis means the Tool Registry should remain an AGH daemon/runtime service. Session exposure should use the accepted AGH-hosted MCP path, where AGH exposes the canonical `ToolID` directly as the hosted MCP `Tool.name`.\n36\t\n37\tAccepted identity format:\n38\t\n39\t- `ToolID`: stable provider-safe id with reserved `__` namespace separators, such as `agh__skill_view`.\n40\t- `DisplayTitle`: human-readable and non-unique.\n41\t- `SourceRef`: structured provenance, not inferred only from prefixes.\n42\t\n43\tCollision handling must be fail-closed. Canonical ID collisions are provider registration errors or operator diagnostics. Sanitized external-name collisions make the affected tools unavailable to that session until resolved. Display title collisions are allowed because titles are not policy identities.\n44\t\n45\t### 1. Split descriptor, runtime handle, and resource record\n46\t\n47\tKeep the existing `internal/tools.Tool` resource shape as the cold catalog/desired-state record, but introduce a runtime contract with separate types:\n48\t\n49\t- `ToolID`: stable provider-safe id such as `agh__skill_view`, `mcp__github__create_issue`, `ext__linear__search`.\n50\t- `Descriptor`: identity, description, input schema, optional output schema, read-only/destructive/open-world/concurrency metadata, source/provenance, visibility, tags, owner, result budget.\n51\t- `Handle`: descriptor plus `Availability(ctx, ToolContext)` and `Call(ctx, ToolCall)` for executable tools.\n52\t- `Provider`: contributes descriptors/handles and can refresh.\n53\t- `Registry`: owns provider registration, indexing, listing, search, policy projection, and dispatch.\n54\t- `ToolResult`: structured output, preview, artifacts, redactions, bytes, display title, metadata.\n55\t\n56\tThis avoids overloading the desired-state resource with function pointers while still allowing resource records to feed the runtime registry.\n57\t\n58\t### 2. Use manifest-first extension tools\n59\t\n60\tExtension manifests should continue to declare tool metadata statically. Add enough metadata to connect the declaration to a backend:\n61\t\n62\t- backend kind: `mcp`, `extension_host`, `subprocess`, or `builtin` where appropriate;\n63\t- namespace/owner;\n64\t- visibility;\n65\t- risk class;\n66\t- required config/env/capabilities;\n67\t- optional toolset memberships.\n68\t\n69\tThe registry can list these tools without executing extension code. A tool becomes executable only when its backend adapter is healthy, authorized, and has a callable handle.\n70\t\n71\tRecommendation for MVP: no in-process third-party handlers. Built-in Go tools can register in-process. Extension tools should execute through MCP or an extension sidecar/Host API adapter.\n72\t\n73\t### 3. Make availability a state machine, not a boolean\n74\t\n75\tUse explicit status:\n76\t\n77\t- `registered`: descriptor exists.\n78\t- `enabled`: operator/session policy has not disabled it.\n79\t- `available`: dependencies are present and backend is healthy.\n80\t- `authorized`: caller policy permits visibility/use.\n81\t- `executable`: there is a live handle for dispatch.\n82\t- `conflicted`: id/name collision requires resolution.\n83\t\n84\tDiscovery can hide unavailable/unauthorized tools from agents while operator surfaces show reasons. Dispatch must recheck availability and authorization.\n85\t\n86\tThe registry should expose separate operator and session projections. The operator projection includes diagnostics, source/provenance, policy reasons, availability reasons, and conflicts. The session projection powers hosted MCP and future driver injection and includes only tools that pass effective visibility/execution gates for that session.\n87\t\n88\t### 4. Centralize dispatch\n89\t\n90\tEvery AGH-owned tool call should pass through:\n91\t\n92\t1. resolve tool id/alias in context,\n93\t2. validate input against schema,\n94\t3. compute availability,\n95\t4. evaluate policy and session permission atoms,\n96\t5. run `tool.pre_call` hooks,\n97\t6. enforce concurrency/rate/result budgets,\n98\t7. call provider adapter,\n99\t8. normalize result,\n100\t9. redact/truncate/persist output,\n101\t10. run `tool.post_call` or `tool.post_error` hooks,\n102\t11. emit telemetry.\n103\t\n104\tNo CLI, HTTP, UDS, MCP, extension, or session path should bypass this pipeline.\n105\t\n106\t### 5. Model policy as overlays\n107\t\n108\tUse one policy engine that combines:\n109\t\n110\t- system/session ACP `permissions.mode`,\n111\t- daemon defaults,\n112\t- workspace config,\n113\t- extension grants,\n114\t- agent definition,\n115\t- session lineage `SessionPermissionPolicy.Tools`,\n116\t- skill/command scoped grants where relevant,\n117\t- explicit allow/deny patterns,\n118\t- named toolsets,\n119\t- risk defaults.\n120\t\n121\tToolsets should be recursive resources/config entries. This copies Hermes' strongest idea while fitting AGH's resource model.\n122\t\n123\tThe registry must not create a second approval system that contradicts ACP. `approve-all` removes automatic approval prompts for otherwise allowed tools, but explicit registry denies, source grants, session lineage restrictions, availability failures, and hooks still apply. `approve-reads` auto-approves only registry-classified read-only tools. `deny-all` denies execution by default and requires an explicit approval path.\n124\t\n125\t### 6. Provide a small bootstrap native toolset\n126\t\n127\tThe TechSpec should not enumerate every future AGH tool. It should require a small proving set:\n128\t\n129\t- `agh__tool_list`\n130\t- `agh__tool_search`\n131\t- `agh__tool_info`\n132\t- `agh__skill_list`\n133\t- `agh__skill_view`\n134\t\n135\tOptional later groups:\n136\t\n137\t- `agh__skill_install`\n138\t- `agh__network_peers`\n139\t- `agh__network_send`\n140\t- `agh__task_*`\n141\t- `agh__extension_*`\n142\t\n143\tThe bootstrap set proves discovery, schema loading, skill body loading, policy, result budget, and telemetry without overcommitting the whole daemon.\n144\t\n145\t### 7. Expose agent-manageable surfaces\n146\t\n147\tThe registry should have shared contract types used by:\n148\t\n149\t- CLI: `agh tool list/search/info/invoke`.\n150\t- HTTP: `/api/tools`, `/api/tools/{id}`, `/api/tools/{id}/invoke`.\n151\t- UDS: same operations for local agents and internal tools.\n152\t- Session-visible tool surface: either an AGH-hosted MCP server, driver-specific ACP tool injection where possible, or a fallback where agents can use `agh` CLI/UDS through their runtime.\n153\t\n154\tThe TechSpec should pick one MVP path and keep the others as contract-compatible surfaces.\n155\t\n156\t### 8. Treat Tool Search as provider-neutral\n157\t\n158\tClaude Code's `tool_reference` mechanism is useful but not portable. AGH should implement registry search as a normal catalog operation first:\n159\t\n160\t- search over name, namespace, description, tags, source, toolset, and search hints;\n161\t- return metadata first;\n162\t- load schema/details on demand;\n163\t- optionally persist discovered state per session later.\n164\t\n165\tDriver-specific schema-on-demand integration can be a future enhancement.\n166\t\n167\t### 9. Reuse existing AGH infrastructure\n168\t\n169\tBuild on:\n170\t\n171\t- `internal/tools` for contracts and registry,\n172\t- `internal/resources` for desired-state records,\n173\t- `internal/extension` manifest publication,\n174\t- `internal/hooks` for pre/post/error dispatch gates,\n175\t- `internal/store.SessionPermissionPolicy` for lineage constraints,\n176\t- `internal/skills.Registry` for skill listing/content,\n177\t- `internal/toolruntime` for subprocess ownership if extension tools need process handles,\n178\t- `internal/api/contract` for shared HTTP/UDS payloads.\n179\t\n180\tAvoid a large generic `internal/catalog` at first unless it only coordinates cross-domain search. The runtime tool registry belongs in or near `internal/tools`; a catalog facade can compose tools and skills for `agh__tool_*` / `agh__skill_*`.\n181\t\n182\t## Proposed MVP Scope\n183\t\n184\t### In scope\n185\t\n186\t- Runtime tool registry contract and central dispatch pipeline.\n187\t- Built-in provider for `agh__tool_list`, `agh__tool_search`, `agh__tool_info`, `agh__skill_list`, `agh__skill_search`, `agh__skill_view`, `agh__network_peers`, `agh__network_send`, and a bounded `agh__task_*` set.\n188\t- Resource-backed descriptors from existing `tool` records.\n189\t- Extension manifest backend metadata for future executable extension tools.\n190\t- MCP adapter design, even if full MCP call-through is deferred.\n191\t- Context-specific list/search/info APIs.\n192\t- Tool policy with allow/deny patterns and named toolsets.\n193\t- Availability model and reason codes.\n194\t- Hook integration for pre/post/error.\n195\t- Telemetry events for list/search/info/call and failures.\n196\t- CLI/HTTP/UDS contract surfaces.\n197\t\n198\t### Out of scope for MVP\n199\t\n200\t- Full shell/browser/file tool replacement for ACP runtimes.\n201\t- Provider-specific Anthropic `tool_reference` integration.\n202\t- In-process third-party extension handlers.\n203\t- Large catalog of AGH-native tools beyond the selected catalog/skill/network/task set.\n204\t- Skill install/remove/update tools unless explicitly paired with supply-chain policy/scanning work.\n205\t- Network peer remote tool execution.\n206\t- Marketplace signing/trust overhaul, except for explicit risk hooks needed by extension tools.\n207\t\n208\t## Critical Decisions Before TechSpec\n209\t\n210\t1. Extension execution boundary: out-of-process only, metadata-only first, or trusted in-process handlers.\n211\t2. Session exposure path: hosted MCP, direct ACP extension, CLI/UDS fallback, or all in phases.\n212\t3. Package boundary: runtime registry in `internal/tools` with a catalog facade, or a new `internal/catalog` owning tools and skills together.\n213\t4. MVP tool set: only list/search/info/view, or include mutating install/network/task tools.\n214\t5. Policy defaults: external tools disabled, ask, or visible-but-not-callable until granted.\n215\t6. Availability visibility: hide unavailable tools from agents, show unavailable tools with reasons, or configurable by surface.\n216\t7. Naming/collision policy: accepted in ADR-007. Use one canonical provider-safe `ToolID` with reserved `__` namespace separators, display-only title, structured provenance, and no shadowing or silent sanitized-name collisions.\n217\t\n218\t## Competitor Pattern Matrix\n219\t\n220\t| Pattern | Hermes | Claude Code | GoClaw | OpenClaw | AGH Recommendation |\n221\t|---|---:|---:|---:|---:|---|\n222\t| Single normalized tool contract | Yes | Yes | Yes | Yes | Required |\n223\t| Runtime executable registry | Yes | Distributed | Yes | Yes | Required |\n224\t| Manifest-first extension discovery | Partial | Plugin metadata | Partial | Strong | Required |\n225\t| MCP as adapter | Yes | Strong | Yes | Strong | Required |\n226\t| Availability gating | Strong discovery | `isEnabled` + MCP state | Policy/lazy checks | Lifecycle state | Required at discovery and dispatch |\n227\t| Central dispatch | Mostly | Strong | Mostly | Gateway + adapters | Required with no bypass |\n228\t| Toolsets/bundles | Strong | Policy lists | Groups | Policy groups | Required |\n229\t| Deferred search | Partial | Strong | Search helpers | Partial | Provider-neutral MVP |\n230\t| Concurrency metadata | Partial | Strong | Partial | Partial | Required metadata, scheduling can evolve |\n231\t| Extension in-process handlers | Yes | No native direct tools | Some | Plugin API | Avoid for MVP |\n232\t\n233\t## Implementation Shape to Explore in TechSpec\n234\t\n235\t```go\n236\ttype Descriptor struct {\n237\t ID ToolID\n238\t DisplayName string\n239\t Description string\n240\t InputSchema json.RawMessage\n241\t OutputSchema json.RawMessage\n242\t Source SourceRef\n243\t Visibility Visibility\n244\t Risk RiskClass\n245\t ReadOnly bool\n246\t Destructive bool\n247\t OpenWorld bool\n248\t ConcurrencySafe bool\n249\t MaxResultBytes int64\n250\t Toolsets []string\n251\t Tags []string\n252\t}\n253\t\n254\ttype Handle interface {\n255\t Descriptor() Descriptor\n256\t Availability(ctx context.Context, call ToolContext) Availability\n257\t Call(ctx context.Context, call ToolCall) (ToolResult, error)\n258\t}\n259\t\n260\ttype Provider interface {\n261\t ID() string\n262\t ListTools(ctx context.Context) ([]Descriptor, error)\n263\t Resolve(ctx context.Context, id ToolID) (Handle, bool, error)\n264\t}\n265\t\n266\ttype Registry interface {\n267\t List(ctx context.Context, scope Scope) ([]ToolView, error)\n268\t Search(ctx context.Context, scope Scope, query SearchQuery) ([]ToolView, error)\n269\t Get(ctx context.Context, scope Scope, id ToolID) (ToolView, error)\n270\t Call(ctx context.Context, scope Scope, req CallRequest) (ToolResult, error)\n271\t}\n272\t```\n273\t\n274\tThe exact Go shape can change, but the separation should hold.\n275\t\n276\t## Risks\n277\t\n278\tIf AGH exposes tool invocation before policy and availability are in place, it will create a broader attack surface than today's ACP-delegated tools.\n279\t\n280\tIf extension tools can run in-process, one bad extension can compromise the daemon.\n281\t\n282\tIf the registry only lists resources but does not dispatch, AGH will still lack the \"last mile\" that motivated the feature.\n283\t\n284\tIf the registry only works through one surface, agents will not be able to manage it consistently.\n285\t\n286\tIf name collisions are postponed, extension/MCP adoption will force a breaking change later.\n287\t\n288\t## Evidence Index\n289\t\n290\t- AGH current state: `analysis_agh_current_state.md`.\n291\t- Hermes reference: `analysis_hermes.md`.\n292\t- Claude Code reference: `analysis_claude-code.md`.\n293\t- GoClaw reference: `analysis_goclaw.md`.\n294\t- OpenClaw reference: `analysis_openclaw.md`.\n295\t- Local Claude Code ideas: `analysis_claude_code_ideas.md`.\n296\t- Prior autonomy gaps: `.compozy/tasks/autonomous/analysis/analysis_skills_tools_registry.md`.\n297\t- Security constraints: `.compozy/tasks/hermes/analysis/analysis_tools_security.md`.\n298\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:08.739351Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01A8QJgqi9Y6FRXFsCtnyf7a","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01A8QJgqi9Y6FRXFsCtnyf7a","content":"```\n1\t# AGH Current State: Tool Registry Foundation\n2\t\n3\t## Overview\n4\t\n5\tAGH already has the cold side of a tool catalog, but not the runtime side.\n6\t`internal/tools` defines a canonical `tool` resource shape and the daemon projects tool records through the generic resources system. Extensions can publish static tool metadata from `extension.toml`. Sessions can also carry concrete permission atoms for tools in lineage metadata.\n7\t\n8\tWhat is missing is the executable registry: a central service that can answer which tools exist for a specific agent/session, whether each tool is available now, whether the caller is allowed to use it, and how to dispatch the call through one uniform pipeline.\n9\t\n10\tThis matters because AGH's product premise is agent-first manageability. A tool registry is incomplete if it only helps internal Go code list metadata or if each ACP runtime owns a private tool universe that AGH cannot inspect, govern, or extend.\n11\t\n12\t## Existing Mechanisms\n13\t\n14\t### Tool resource metadata\n15\t\n16\t`internal/tools/tool.go` defines `ToolSource` values for `builtin`, `mcp`, `extension`, and `dynamic`, plus a small `Tool` record:\n17\t\n18\t- `Name`\n19\t- `Description`\n20\t- `InputSchema`\n21\t- `ReadOnly`\n22\t- `Source`\n23\t\n24\tThe only provider interface is:\n25\t\n26\t```go\n27\ttype ToolProvider interface {\n28\t Tools(ctx context.Context) ([]Tool, error)\n29\t}\n30\t```\n31\t\n32\tThere is no `Call`, `Availability`, `CheckPermission`, `Aliases`, `Namespace`, `IsConcurrencySafe`, `IsDestructive`, `MaxResultBytes`, `Owner`, `Visibility`, or provenance-rich source metadata.\n33\t\n34\t### Desired-state resources\n35\t\n36\t`internal/tools/resource.go` defines `ToolResourceKind = \"tool\"` and validates tool records as JSON-object specs with a maximum size of 256 KiB. This is a good base for persisted inventory, desired-state reconciliation, and extension-published tool metadata.\n37\t\n38\tThe daemon already has a generic `resourceCatalog[T]` in `internal/daemon/tool_mcp_resources.go` and a `newToolProjector` that projects reconciled `tool` records into a daemon-local snapshot. This catalog is descriptive and revisioned, but it is not executable.\n39\t\n40\t### Extension-published tools\n41\t\n42\t`internal/extension/manifest.go` already lets extensions declare:\n43\t\n44\t```go\n45\ttype ResourcesConfig struct {\n46\t Tools map[string]ToolConfig `toml:\"tools,omitempty\" json:\"tools,omitempty\"`\n47\t MCPServers map[string]MCPServerConfig `toml:\"mcp_servers,omitempty\" json:\"mcp_servers,omitempty\"`\n48\t}\n49\t```\n50\t\n51\t`ToolConfig` carries description, input schema, and read-only status. `ResolveManifestToolResources` converts these manifest entries into `toolspkg.Tool` records with `Source = ToolSourceExtension`. The daemon syncer publishes them into the resource graph with source keys like `extension/\u003cname\u003e/tool/\u003ctool\u003e`.\n52\t\n53\tThis is close to OpenClaw's manifest-first model, but AGH currently stops at metadata. A manifest-declared extension tool is not callable unless some separate ACP/MCP/runtime surface happens to expose it.\n54\t\n55\t### MCP resources\n56\t\n57\tThe same extension manifest can declare MCP servers, and the daemon sync path resolves them into desired-state MCP server resources. Skills can also declare MCP sidecars. This gives AGH a strong candidate adapter for extension tools: manifest tools may be backed by an MCP server, an extension sidecar Host API endpoint, or native AGH built-ins, but all should normalize into one registry contract.\n58\t\n59\t### Session permission atoms\n60\t\n61\t`internal/store/session_lineage.go` defines `SessionPermissionPolicy` with concrete atoms:\n62\t\n63\t- `Tools`\n64\t- `Skills`\n65\t- `MCPServers`\n66\t- `WorkspacePaths`\n67\t- `NetworkChannels`\n68\t- `SandboxProfiles`\n69\t\n70\t`internal/session/spawn.go` validates child permissions as a subset of parent permissions. This is an important base for runtime tool policy because it is already persisted with session lineage and already participates in spawn delegation.\n71\t\n72\tThe current agent definition has a flat `Tools []string` field in `internal/config/agent.go`. It lacks allow/deny overlays, named toolsets, namespace patterns, visibility tiers, and risk classes.\n73\t\n74\t### Hooks around tool calls\n75\t\n76\t`internal/hooks/payloads.go` already defines `ToolPreCallPayload`, `ToolPostCallPayload`, `ToolPostErrorPayload`, `ToolCallPatch`, and `ToolResultPatch`. This is the right policy extension point for a centralized dispatch pipeline:\n77\t\n78\t- pre-call hooks can deny or mutate input\n79\t- post-call hooks can redact or mutate output\n80\t- post-error hooks can classify or recover failures\n81\t\n82\tThe current gap is that AGH does not have a single dispatch pipeline that all AGH-owned tools must pass through.\n83\t\n84\t### Skills registry contrast\n85\t\n86\t`internal/skills.Registry` is much more mature than tools. It has global snapshots, workspace overlays, content loading, verification, install provenance, and `GlobalVersion()` for invalidation. Skills are injected as a static prompt catalog at session start, while tool resources are not exposed as a session-callable registry.\n87\t\n88\tThe Tool Registry should copy the skills registry's useful properties where they fit: global/workspace overlays, versioned snapshots, resource projection, progressive disclosure, and explicit content/schema loading.\n89\t\n90\t## Gaps\n91\t\n92\t1. No central runtime registry that owns executable tool handles.\n93\t2. No agent-facing discovery API for tools.\n94\t3. No native AGH tools such as `agh__tool_search`, `agh__skill_view`, or `agh__network_send`.\n95\t4. No availability model for env vars, binaries, MCP health, extension health, workspace scope, or policy state.\n96\t5. No central permission pipeline for AGH-owned tools.\n97\t6. No namespace or structured provenance model, so duplicate names would be ambiguous.\n98\t7. No toolsets or bundles comparable to skills/capabilities.\n99\t8. No direct extension execution boundary for manifest-declared tools.\n100\t9. No consistent way to expose tools over CLI, HTTP, UDS, and session-visible agent surfaces.\n101\t10. No usage telemetry by tool or skill.\n102\t11. No result-size budget, persistence policy, redaction, or output mapping at registry level.\n103\t12. The `dynamic` source enum exists but has no producer.\n104\t\n105\t## Relevant Code Paths\n106\t\n107\t- `internal/tools/tool.go:14-136`: tool source enum, metadata-only `Tool`, and list-only `ToolProvider`.\n108\t- `internal/tools/resource.go:13-61`: `ToolResourceKind` codec and JSON schema validation.\n109\t- `internal/daemon/tool_mcp_resources.go:20-122`: generic daemon `resourceCatalog` and `newToolProjector`.\n110\t- `internal/daemon/tool_mcp_resources.go:620-640`: extension manifest tools are projected into desired-state resources.\n111\t- `internal/extension/manifest.go:55-62`: extension resources can include tools and MCP servers.\n112\t- `internal/extension/manifest.go:154-160`: extension `ToolConfig` is static metadata only.\n113\t- `internal/extension/resource_publication.go:13-31`: manifest tool declarations become `toolspkg.Tool` records.\n114\t- `internal/config/agent.go:14-23`: `AgentDef.Tools []string` is flat.\n115\t- `internal/store/session_lineage.go:31-39`: session lineage has concrete `Tools` permission atoms.\n116\t- `internal/session/interfaces.go:244-251`: `AgentDriver` has no catalog-delta or AGH tool injection extension.\n117\t- `internal/hooks/payloads.go:520-568`: tool pre/post/error payloads already exist.\n118\t- `internal/skills/registry.go:100-103`: skills expose a global version suitable for catalog delta detection.\n119\t- `.compozy/tasks/autonomous/analysis/analysis_skills_tools_registry.md`: prior autonomy gap analysis with G1-G12 and P1-P9 proposals.\n120\t- `.compozy/tasks/hermes/analysis/analysis_tools_security.md`: security gaps relevant once AGH exposes agent-callable tools.\n121\t\n122\t## Design Constraints for the TechSpec\n123\t\n124\tThe Tool Registry should be a foundation, not a pile of built-in commands. It should define the contracts, policy path, extension boundary, and surfaces first, then add a small bootstrap set of native AGH tools to prove the system.\n125\t\n126\tAGH should avoid copying in-process plugin patterns from Python/TypeScript systems. Third-party executable tools should cross a process/protocol boundary: MCP, extension sidecar Host API, subprocess adapter, or future bridge SDK. Built-in Go tools can register in-process because they are part of the daemon binary.\n127\t\n128\tThe cold `tool` resource should remain valuable as catalog metadata and desired state, but executable dispatch must be modeled separately. A manifest-declared tool can be installed and discoverable while still being unavailable until its backend is healthy and authorized.\n129\t\n130\tAvailability and authorization must both be rechecked at dispatch time. Hiding unavailable tools from discovery is useful, but it is not a security boundary.\n131\t\n132\t## Open Questions\n133\t\n134\t1. Should extension tools be executable in the MVP, or should MVP only make them discoverable with an explicit unavailable state?\n135\t2. If executable, should extension tools be allowed only through MCP/sidecar boundaries, or should trusted bundled extensions get in-process handlers?\n136\t3. Should the first AGH-native tool surface be injected into ACP sessions, exposed as an MCP server hosted by AGH, or exposed only through CLI/UDS/HTTP for drivers to call indirectly?\n137\t4. Should `internal/catalog` coordinate tools and skills, or should `internal/tools` own runtime tools while a thinner catalog/search service composes skills and tools?\n138\t5. Which visibility tiers are needed for MVP: internal, CLI/HTTP, agent-visible, model-visible, deferred-discoverable, extension-private?\n139\t\n140\t## Evidence\n141\t\n142\t- `internal/tools/tool.go:91-136`: current `Tool` and `ToolProvider` are descriptive and list-only.\n143\t- `internal/tools/resource.go:13-61`: tool resource codec validates metadata records.\n144\t- `internal/extension/manifest.go:55-62`: extension manifests can publish `resources.tools`.\n145\t- `internal/extension/resource_publication.go:13-31`: extension manifest tools become static tool resources.\n146\t- `internal/daemon/tool_mcp_resources.go:620-640`: daemon sync publishes extension tools and MCP servers into resource desired state.\n147\t- `internal/store/session_lineage.go:31-39`: session permission policy already includes `Tools`.\n148\t- `internal/hooks/payloads.go:520-568`: tool lifecycle hooks are already typed.\n149\t- `.compozy/tasks/autonomous/analysis/analysis_skills_tools_registry.md:1-220`: prior gap analysis identifies no runtime tool registry, no discovery API, no availability, and no agent-facing skill/tool call surface.\n150\t- `.compozy/tasks/hermes/analysis/analysis_tools_security.md:1-140`: security analysis warns that URL-capable, command-capable, MCP, and skill-install surfaces require stronger guardrails before broad exposure.\n151\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:11.219852Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:11.22042Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01WjHW6RPDMKChnrb6MGQ6ua","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01WjHW6RPDMKChnrb6MGQ6ua","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:11.800575Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WjHW6RPDMKChnrb6MGQ6ua","blocks":[{"type":"tool_use","id":"toolu_01WjHW6RPDMKChnrb6MGQ6ua","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/internal/CLAUDE.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/internal/CLAUDE.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/internal/CLAUDE.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:11.844863Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:11.848497Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WjHW6RPDMKChnrb6MGQ6ua","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:11.850369Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WjHW6RPDMKChnrb6MGQ6ua","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01WjHW6RPDMKChnrb6MGQ6ua","content":"```\n1\t# Internal Backend (Go)\n2\t\n3\tThe Go runtime — `internal/*` packages composed by `internal/daemon`, plus the API transports under `internal/api/*`. ACP subprocess management, SQLite persistence, HTTP/SSE + UDS APIs, autonomy kernel, AGH Network. Entry binary lives in `cmd/agh`.\n4\t\n5\tRepo-wide rules (Critical Rules, Workflow, Build, Commits, Skill Dispatch, Memory \u0026 Skills RFC, CI/Release) live in the **root `CLAUDE.md`**. This file owns architecture, package boundaries, autonomy contracts, security invariants, and `internal/`-specific debugging/forensics.\n6\t\n7\t## Architecture\n8\t\n9\t### Principles\n10\t\n11\t- **Designed for incremental extension** — new capabilities arrive as new packages wired into `daemon/`, without modifying existing packages. Small interfaces + dependency injection. Every capability plan decides which extension points, hooks, capabilities, tools/resources, bundles, registries, bridge SDKs, and docs must be added, updated, or removed.\n12\t- **Pragmatic Flat with Discipline** — packages under `internal/`, API transports grouped under `api/`, no domain/infra split, no event bus.\n13\t- **`daemon/` is the sole composition root** — the only package that imports all others. Reconciliation logic running at boot belongs to composition root and is not \"legacy support\".\n14\t- **No package imports `daemon/`, `api/`, or `cli/`** — dependencies flow downward only.\n15\t- **Interfaces defined where consumed** (Go-style) — `session/` defines `AgentDriver`, `acp/` implements it.\n16\t- **Direct function calls through interfaces** — no event bus, no reflection-based routing, no NATS as inter-package coordination. NATS is permitted **only** inside `internal/network` as the embedded wire transport for the AGH Network protocol; daemon packages communicate via interfaces and the Notifier pattern, never by publishing to subjects.\n17\t- **Notifier pattern for fan-out** — typed interface for observability and SSE, not a generic bus.\n18\t- **No back-pointers between packages** — inject callbacks or interfaces.\n19\t- **Functional options for constructors** — `NewManager(opts ...Option)`.\n20\t- **Maps for \u003c10 items** — no registry interfaces for small collections.\n21\t- **File-level organization** within packages — sub-packages only when complexity justifies it.\n22\t- **CI-enforceable boundaries** — `mage Boundaries` rules prevent import cycles. Update `magefile.go` Boundaries() in the same commit that introduces a new `internal/api/*` subpackage.\n23\t- **`internal/api/core` is the canonical handler home.** REST/UDS endpoints exist as shared `BaseHandlers` methods; HTTP and UDS only choose registration and authentication. No transport-duplicated parsing/validation.\n24\t- **Authoritative primitives are exclusive.** When a primitive owns a state transition (`task.Service.ClaimNextRun`, `Spawn`, `EnsureMigration`), no peer package may replicate it. Wake/observe/sweep are allowed; claim/own is not. The mechanical scheduler does not call `ClaimNextRun`.\n25\t- **Hooks are typed dispatch, not an event bus.** Dispatch at the call site that owns the state transition. Never tail event/log tables to fire hooks. Hooks may deny/narrow/annotate but cannot bypass safety primitives (claim tokens, leases, TTL, lineage, spawn caps, permission narrowing).\n26\t- **Agent-manageable by default.** User-visible runtime capabilities must expose stable machine-readable control surfaces for agents: CLI verbs with `-o json`/`-o jsonl` where relevant, HTTP/UDS parity when state crosses the daemon boundary, discoverable status/config output, and docs that describe the agent path. UI-only manageability is incomplete.\n27\t- **No partial-surface completions.** Any change touching a public surface closes the loop end-to-end in one pass: contract → HTTP handler → UDS handler → CLI client → CLI command → extension/config/docs surfaces → tests → docs.\n28\t\n29\t### Concurrency\n30\t\n31\tGeneric Go concurrency patterns (goroutine ownership, channels vs mutexes, `select`/`ctx.Done()` discipline, no `time.Sleep` in orchestration) live in `agh-code-guidelines`. Architectural invariants below are load-bearing for design decisions:\n32\t\n33\t- **Goroutines spawned by `internal/session/manager_*.go` MUST be tracked by Manager-owned WaitGroup and joined in Manager shutdown.** Never put goroutine-owned channels in a struct field that another goroutine mutates — use a per-run handle.\n34\t- **Detached execution lifetime.** Any work that outlives an HTTP/UDS request — prompts, network channel sends, automation jobs — MUST detach via `context.WithoutCancel(ctx)`. Never tie execution lifetime to request lifetime. Expose explicit cancel endpoints (e.g., `POST /api/sessions/:id/prompt/cancel`).\n35\t- **`context.WithoutCancel` does NOT preserve deadlines.** Re-attach a deadline if needed.\n36\t- **Subprocess managed-stop** must respect `ctx.Done()` between Shutdown and Wait. Wrap `proc.Wait()` in `select { case \u003c-proc.Done(): case \u003c-ctx.Done(): }`.\n37\t- **Process-group supervision parity.** Unix uses process groups; Windows uses forced-exit fallback. Always cross-build with `GOOS=windows GOARCH=amd64 go build` before claiming subprocess work complete. Centralize signaling helpers in `internal/procutil`.\n38\t\n39\t### Runtime\n40\t\n41\t- Single-binary and local-first. Sidecars or external control planes require a written techspec.\n42\t- Keep execution paths deterministic and observable.\n43\t- **Daemon runs in background by default.** No daemon should require a foreground terminal.\n44\t- **`compozy exec` is headless.** `--format text` returns a single string; `--format json` returns a stream of valid JSON objects; the TUI is opt-in via `--tui`. `exec` does not persist artifacts to `.compozy/runs/` unless `--persist` is given.\n45\t- **Agent operations must not depend on the web UI.** If agents need to inspect, configure, start, stop, approve, claim, release, or repair a capability, the spec must provide a CLI/HTTP/UDS path with structured output and deterministic errors.\n46\t\n47\t### Observability\n48\t\n49\t- Every domain operation emits a canonical event with correlation keys (`workspace_id`, `session_id`, `parent_session_id`, `root_session_id`, `agent_name`, `task_id`, `run_id`, `claim_token_hash`, `lease_until`, `workflow_id`, `coordinator_session_id`, `scheduler_reason`, `hook_event`, `hook_name`, `spawn_depth`, `actor_kind`, `actor_id`, `release_reason`).\n50\t- Cover with a coverage matrix test that fails if any required lifecycle path doesn't emit its canonical event.\n51\t- Append-only event store (`runtime.db`) is the canonical operational ledger; session DBs are projections, not authority.\n52\t- Live broadcasters publish only after durable append; reconnect/replay uses `after_seq`.\n53\t\n54\t## Security Invariants\n55\t\n56\t- **`claim_token` redaction is non-negotiable.** Raw `claim_token` (`agh_claim_*`), MCP auth tokens, OAuth codes, PKCE verifiers, and secret bindings MUST NEVER appear in logs, status APIs, settings views, error payloads, channel messages, SSE, web UI, or memory. Use hash forms (`claim_token_hash`) over the wire. Network layer rejects raw `claim_token` in metadata.\n57\t- **Symlink escape hardening.** Skill sidecars, skill files, managed-extension dependency copies, and bundle install paths MUST verify resolved targets remain inside approved roots. Use `EvalSymlinks` + path-prefix check, not naive joins. Handle macOS `/private/var/folders` quirk (canonicalize source root before containment check).\n58\t- **Path security helpers.** Filesystem helpers resolving user-controlled or agent-controlled paths use the `sanitizePathKey` + `realpathDeepestExisting` pattern (defenses against null-byte, URL-encoded traversal, Unicode normalization, symlink-escape).\n59\t- **Identity proof-stripping defense.** In any signed-message processing path (AGH Network v1), an identity in verified format (`nickname@fingerprint`) without valid `proof` MUST classify as `rejected`, not `unverified`.\n60\t- **External-call timeouts.** Outbound HTTP/network calls MUST use a client with an explicit timeout. `http.DefaultClient` is forbidden in production code paths.\n61\t- **Load-time security scan.** Every non-bundled skill is scanned via `internal/skills.VerifyContent` on every load (not just install). Critical findings block; warning findings log; info findings log silently. Bundled skills are exempt because `go:embed` provides immutability.\n62\t\n63\t## Package Layout\n64\t\n65\t| Path | Responsibility |\n66\t| ------------------------------- | ----------------------------------------------------------------------------- |\n67\t| `cmd/agh` | Main entry point, CLI binary |\n68\t| `internal/config` | TOML loading, validation, merge, home paths, agent def parsing |\n69\t| `internal/acp` | ACP client: subprocess spawn, JSON-RPC over stdio |\n70\t| `internal/agentidentity` | Caller-identity inference from `AGH_SESSION_ID`/`AGH_AGENT` |\n71\t| `internal/automation` | Cron, webhook, and scheduled triggers; durable scheduler state |\n72\t| `internal/bridges` | External messaging adapters (Slack, Telegram, etc.) |\n73\t| `internal/bridgesdk` | Bridge SDK / contract types |\n74\t| `internal/bundles` | Bundle activation projector |\n75\t| `internal/cli` | Cobra commands |\n76\t| `internal/codegen` | OpenAPI → TS generator helpers |\n77\t| `internal/coordinator` | Coordinator-agent bootstrap and lifecycle |\n78\t| `internal/daemon` | Composition root, lock, boot, shutdown |\n79\t| `internal/diagnostics` | Diagnostics + health probes |\n80\t| `internal/e2elane` | E2E lane harness wiring |\n81\t| `internal/sandbox` | Sandbox profile resolution and provider runtime |\n82\t| `internal/extension` | Extension manifest, registry, host API, install runtime |\n83\t| `internal/extensiontest` | Extension test harness |\n84\t| `internal/filesnap` | File snapshot utilities |\n85\t| `internal/fileutil` | Shared filesystem helpers |\n86\t| `internal/frontmatter` | YAML frontmatter parsing |\n87\t| `internal/hooks` | Typed hook taxonomy + dispatch |\n88\t| `internal/logger` | Structured logging (slog) |\n89\t| `internal/mcp` | MCP server lifecycle / sidecars |\n90\t| `internal/memory` | Persistent dual-scope memory (global + workspace + agent), provenance, recall |\n91\t| `internal/memory/consolidation` | Dream consolidation runtime (Time → Sessions → Lock gate cascade) |\n92\t| `internal/network` | AGH Network channels/peers/wire, NATS profile |\n93\t| `internal/observe` | Event recording, health metrics, query engine |\n94\t| `internal/procutil` | Process utilities, process-group signaling, Windows fallback |\n95\t| `internal/registry` | Skill/agent/capability registry helpers |\n96\t| `internal/resources` | Resource projector / codec / validate |\n97\t| `internal/retry` | Retry primitives |\n98\t| `internal/scheduler` | Mechanical scheduler (idle registry, wakeups, sweep, recovery) |\n99\t| `internal/session` | Session lifecycle, Manager, state machine |\n100\t| `internal/settings` | Settings overlay/projection |\n101\t| `internal/situation` | Situation surface providers (`/agent/context`) |\n102\t| `internal/skills` | Skills catalog, loader, `VerifyContent`, MCP/hook decl, provenance |\n103\t| `internal/skills/bundled` | Bundled skill definitions |\n104\t| `internal/sse` | Shared SSE helpers |\n105\t| `internal/store` | SQLite shared helpers, migrations registry, validation |\n106\t| `internal/store/globaldb` | Global catalog (`agh.db`): sessions, metadata |\n107\t| `internal/store/sessiondb` | Per-session event store (`events.db`) |\n108\t| `internal/subprocess` | Subprocess signaling primitives |\n109\t| `internal/task` | Task domain, `task_runs` ownership, `ClaimNextRun` |\n110\t| `internal/testutil` | Shared test helpers |\n111\t| `internal/api/contract` | Shared daemon/CLI/HTTP contract types |\n112\t| `internal/api/core` | Shared handler types (`BaseHandlers`), error mapping, SSE helpers |\n113\t| `internal/api/httpapi` | HTTP/SSE server (Gin) for web UI |\n114\t| `internal/api/udsapi` | UDS server for CLI IPC |\n115\t| `internal/api/testutil` | Test helpers for the API layer |\n116\t| `internal/toolruntime` | Tool process registry + interrupts |\n117\t| `internal/tools` | Tool definitions and dispatch |\n118\t| `internal/transcript` | Canonical replay message assembly from persisted events |\n119\t| `internal/version` | Build metadata |\n120\t| `internal/workref` | Work reference helpers |\n121\t| `internal/workspace` | Workspace resolver and entity management |\n122\t\n123\t## Memory \u0026 Skills Runtime (RFC-backed)\n124\t\n125\t- **Five-layer skill/memory/agent precedence**: Bundled → Marketplace → User → Additional → Workspace, with agent-local overriding all. Higher precedence wins on collision; an audit trail logs every shadow.\n126\t- **Memory taxonomy**: `user | feedback | project | reference` types; scopes `agent | workspace | global`. Default write scope declared per agent in `memory.scope`.\n127\t- **Memory consolidation gates**: Time → Sessions → Lock cascade ordered by computational cost. Default gates: 24h, 5 touched sessions, file-lock. Never replace gates with naive heuristics.\n128\t- **Lifecycle hooks** (`on_session_created`, `on_session_stopped`) execute in hierarchy precedence then alphabetical order; configurable timeout (default 5s); fail-open semantics (errors logged, never block); JSON over stdin.\n129\t\n130\t## Forensic Bug Fixes\n131\t\n132\t- **Bug-fix plans open with confirmed reproduction** (timestamp, command, observed evidence) BEFORE listing changes. \"I think\" or \"probably\" is forbidden at the top of a fix plan.\n133\t- **Inactive metadata repair must distinguish startup-pending from crashed.** Sessions in `m.pending` are still starting, not failed.\n134\t- **Stale ACP session ids must be classified, not propagated.** Convert `Resource not found` to fresh-start fallback.\n135\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:14.677022Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:14.677345Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01HpsPgkVJvrHE5m6G6hVKeK","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01HpsPgkVJvrHE5m6G6hVKeK","name":"Bash","title":"Terminal","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:14.940305Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01HpsPgkVJvrHE5m6G6hVKeK","blocks":[{"type":"tool_use","id":"toolu_01HpsPgkVJvrHE5m6G6hVKeK","name":"Task","title":"ls /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/","input":{"prompt":"List lessons learned files"},"rawInput":{"command":"ls /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/","description":"List lessons learned files"}},{"type":"tool_result","toolUseId":"toolu_01HpsPgkVJvrHE5m6G6hVKeK","content":"List lessons learned files"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:15.036534Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:15.109045Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01HpsPgkVJvrHE5m6G6hVKeK","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:15.110071Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01HpsPgkVJvrHE5m6G6hVKeK","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01HpsPgkVJvrHE5m6G6hVKeK","content":"```console\nL-001-detached-prompt-lifetime.md\nL-002-tparallel-vs-tsetenv.md\nL-003-task-runs-single-queue.md\nL-004-manual-equals-peer.md\nL-005-authoritative-primitive-exclusivity.md\nL-006-greenfield-delete-not-adapt.md\nL-007-e2e-follows-runtime-contract.md\nL-008-schema-migrations-mandatory.md\nL-009-concurrent-worktree-deadlock.md\nL-010-model-name-validation.md\nL-011-fraco-test-coverage-pushback.md\nL-012-techspec-prose-only-rework.md\nL-013-prd-must-not-name-implementation.md\nL-014-sandbox-vocabulary-drift.md\nREADME.md\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:17.394954Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:17.395691Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01PTWVEr9i6LSeu3R8kU3iLt","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01PTWVEr9i6LSeu3R8kU3iLt","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:17.828685Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01PTWVEr9i6LSeu3R8kU3iLt","blocks":[{"type":"tool_use","id":"toolu_01PTWVEr9i6LSeu3R8kU3iLt","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-008-schema-migrations-mandatory.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-008-schema-migrations-mandatory.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-008-schema-migrations-mandatory.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:17.829996Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01Pjn3tMb6LJr4kAp7PKxgJC","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01Pjn3tMb6LJr4kAp7PKxgJC","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:18.727954Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Pjn3tMb6LJr4kAp7PKxgJC","blocks":[{"type":"tool_use","id":"toolu_01Pjn3tMb6LJr4kAp7PKxgJC","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-005-authoritative-primitive-exclusivity.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-005-authoritative-primitive-exclusivity.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-005-authoritative-primitive-exclusivity.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:18.728488Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01BhfqNY2BxxSx9t73tf5Rxy","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01BhfqNY2BxxSx9t73tf5Rxy","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:18.905133Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01BhfqNY2BxxSx9t73tf5Rxy","blocks":[{"type":"tool_use","id":"toolu_01BhfqNY2BxxSx9t73tf5Rxy","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-007-e2e-follows-runtime-contract.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-007-e2e-follows-runtime-contract.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-007-e2e-follows-runtime-contract.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:18.979018Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:18.981492Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Pjn3tMb6LJr4kAp7PKxgJC","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:18.981538Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01BhfqNY2BxxSx9t73tf5Rxy","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:18.981633Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01PTWVEr9i6LSeu3R8kU3iLt","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:18.982909Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Pjn3tMb6LJr4kAp7PKxgJC","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01Pjn3tMb6LJr4kAp7PKxgJC","content":"```\n1\t# L-005 — Authoritative primitives are exclusive\n2\t\n3\t**Class:** Architecture\n4\t**Date discovered:** 2026-04-25 (autonomy ADR-004); reinforced 2026-04-26 (autonomy task_11)\n5\t**Evidence sources:** ADR-004 + task_11 memory + global_runs analysis\n6\t\n7\t## Context\n8\t\n9\tThe mechanical scheduler in `internal/scheduler` was tempted to claim runs directly during sweeps and recoveries. That would have collapsed the agent-pull model into a daemon-push model and given two components authority over the same state transition.\n10\t\n11\t## Root cause\n12\t\n13\tWhen two components can perform the same authoritative state transition (claim, spawn, migrate, narrow), they will eventually disagree. Either you serialize them (introducing locks, latency, and complexity), or you accept two sources of truth (introducing races and recovery bugs). The clean answer is to pick one authority per transition and force everything else to _observe and notify_ rather than _act_.\n14\t\n15\t## Rule\n16\t\n17\t\u003e When an authoritative primitive owns a state transition (`task.Service.ClaimNextRun`, `Spawn`, `EnsureMigration`), no peer package may replicate the transition. Wake/observe/sweep are allowed; claim/own is not.\n18\t\n19\t## Examples (canonical authorities in AGH)\n20\t\n21\t| Transition | Authority | Allowed peers |\n22\t| ----------------------------- | ------------------------------------ | -------------------------------------------------------------- |\n23\t| Claim a `task_run` | `task.Service.ClaimNextRun` | `internal/scheduler` may wake idle agents; never claims itself |\n24\t| Spawn a child session | Daemon-managed safe-spawn API | Coordinator submits requests; the daemon decides |\n25\t| Apply a schema migration | `internal/store` migrations registry | `EnsureSchema` is forbidden for column changes |\n26\t| Mutate session terminal state | Session manager | Channels, hooks, observability emit events but cannot mutate |\n27\t| Approve / publish a task | Operator + manual API surface | Coordinators receive enqueues; do not auto-approve |\n28\t\n29\t## Operationalization\n30\t\n31\t- **Scheduler can wake and sweep, but cannot claim.** `internal/scheduler` issues `scheduler.wake.count`/`no_match`/`lease_sweep.count`/`error` metrics — never `task.run.claim.success`.\n32\t- **Hooks can deny/narrow/annotate but cannot bypass safety invariants** (claim tokens, leases, TTL, lineage, spawn caps, permission narrowing).\n33\t- **Coordination channels are NEVER an ownership/status authority.** Channel `status`/`result` messages cannot mutate ownership/terminal state.\n34\t\n35\t## Anti-pattern\n36\t\n37\t- Adding a \"scheduler.\\*\" hook taxonomy that lets external code claim runs.\n38\t- Letting the coordinator bypass `ClaimNextRun` through a \"fast-path\" for trusted runs.\n39\t- Allowing the network layer to write terminal state via channel messages.\n40\t\n41\t## Source\n42\t\n43\t- `.compozy/tasks/autonomous/adrs/adr-004.md` — Split Semantic vs Mechanical Scheduling\n44\t- `.compozy/tasks/autonomous/adrs/adr-009.md` — Hooks First-Class (forbids hook-as-claim)\n45\t- `.compozy/tasks/autonomous/memory/task_11.md` — scheduler scope-creep avoided\n46\t- `../analysis/analysis_global_runs.md` lesson L4\n47\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:18.983322Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01BhfqNY2BxxSx9t73tf5Rxy","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01BhfqNY2BxxSx9t73tf5Rxy","content":"```\n1\t# L-007 — E2E harness regressions follow runtime contract changes\n2\t\n3\t**Class:** Testing\n4\t**Date discovered:** 2026-04-26 (autonomy task_18 QA)\n5\t**Evidence sources:** task_18 BUG-001/002/003 + global_runs\n6\t\n7\t## Context\n8\t\n9\tThe autonomy MVP `make verify` passed. Real-scenario QA via `real-scenario-qa` then surfaced three Playwright/E2E regressions:\n10\t\n11\t- **BUG-001** — workspace onboarding race in the web E2E `TC-AUTO-015` test; absent shared workspace-onboarding wait helper.\n12\t- **BUG-002** — `acpmock` exact-match canonicalization for situation-augmented prompts. Task 04 added a situation-context augmenter that changed the rendered prompt; the deterministic ACP mock fixture matcher still expected the pre-Task-04 shape.\n13\t- **BUG-003** — Tasks browser E2E asserting an empty Agents-panel state; manual-first publish actually rendered an active run.\n14\t\n15\tAll three were rooted in tests written against an _older_ runtime contract. None was a production bug.\n16\t\n17\t## Root cause\n18\t\n19\tWhen a runtime contract changes — a new prompt augmenter, a different fixture canonicalization, a new manual-first UI state — the deterministic test infrastructure (acpmock fixtures, Playwright selectors, browser fixtures) embeds the _old_ contract. `make verify` passes because tests still hit their old expectations. Real-scenario QA exposes the drift.\n20\t\n21\t## Rule\n22\t\n23\t\u003e When a runtime contract changes (prompt augmenter, situation context, fixture format, manual-first UI state), the E2E mock and matchers ship in the same PR. Do not let the test infrastructure encode a stale contract.\n24\t\n25\t## Operationalization\n26\t\n27\t- For ACP fixture work: replace fragile string-matching with structured prompt metadata. acpmock uses typed metadata, not rendered prompt substrings.\n28\t- For Playwright E2E: add shared wait helpers (`web/e2e/fixtures/selectors.ts`) for workspace onboarding, session creation, manual-first publish. New runtime states require helper updates in the same PR.\n29\t- Real-scenario QA is the canonical regression net. `make verify` is necessary but not sufficient.\n30\t- E2E regressions surfaced in the QA pass are NOT production bugs unless they reveal divergent runtime behavior. Fix the test infrastructure, not the runtime.\n31\t\n32\t## Anti-pattern\n33\t\n34\t- Adding a `time.Sleep(2 * time.Second)` to \"stabilize\" a flaky Playwright spec.\n35\t- Loosening an acpmock matcher to substring instead of metadata.\n36\t- Skipping E2E in the QA pass because \"the unit tests cover it.\"\n37\t\n38\t## Source\n39\t\n40\t- `.compozy/tasks/autonomous/memory/task_18.md`\n41\t- `.compozy/tasks/autonomous/qa/verification-report.md`\n42\t- `.compozy/tasks/autonomous/qa/issues/BUG-001.md`, `BUG-002.md`, `BUG-003.md`\n43\t- `.codex/plans/2026-04-17-e2e-confidence-hardening.md` — root-cause plan for the structured-metadata switch\n44\t- `../analysis/analysis_compozy_tasks.md` (task_18 findings), `../analysis/analysis_global_runs.md` (autonomy QA section)\n45\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:18.983665Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01PTWVEr9i6LSeu3R8kU3iLt","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01PTWVEr9i6LSeu3R8kU3iLt","content":"```\n1\t# L-008 — Schema migrations are required even on fresh DBs\n2\t\n3\t**Class:** Persistence\n4\t**Date discovered:** 2026-04-25 (Hermes BUG-002, Critical)\n5\t**Evidence sources:** Hermes BUG-002 + multiple Hermes/autonomy review issues\n6\t\n7\t## Context\n8\t\n9\tThe Hermes track widened the `memory_operation_log` table to add `scope`, `workspace_root`, `filename` columns. The change worked on fresh installs because `internal/memory/catalog.go` boots through `storepkg.EnsureSchema`, which created the new shape. But existing databases kept the old five-column table — `EnsureSchema` skipped existing tables — and `agh memory write` failed on upgrade with `no such column: scope`.\n10\t\n11\tCodeRabbit flagged it as Critical. The fix: schema migration v6 added through the deterministic migrations runner.\n12\t\n13\t## Root cause\n14\t\n15\t`EnsureSchema`-style boot reconciliation has a fundamental gap: it creates tables that don't exist but does not mutate tables that do. Any column/index/constraint addition needs a real migration; a migration is required _even when fresh installs already work_, because upgrade is a first-class scenario in alpha.\n16\t\n17\tA second contributor: AGH had two schema paths — the global DB used a real migrations registry, while the catalog DB used `EnsureSchema`. Drift between these paths produced the bug.\n18\t\n19\t## Rule\n20\t\n21\t\u003e Any change to a SQLite column, index, or constraint MUST add a versioned migration in the migrations registry. `EnsureSchema`-style boot reconciliation is forbidden for column changes. Test fresh-DB AND reopen-after-restart paths.\n22\t\n23\t## Operationalization\n24\t\n25\t- **One schema migration primitive shared by all SQLite databases** (`agh.db`, `events.db`, catalog DBs, automation scheduler state, memory operation log).\n26\t- **Numbered migrations** are recorded in `schema_migrations`. Idempotent. Wrapped in transactions.\n27\t- **Test the upgrade path:** every schema change must include a `Test*FreshDB` test AND a `Test*ReopenAfterRestart` test.\n28\t- **`SQLite recovery code paths must rename or remove `-wal`and`-shm`companions, not only the`.db` file.** (Cross-cutting hygiene: see refac-v2 issue 001 Critical WAL recovery bug.)\n29\t- **Schema-version bump** lives in the migrations registry — do not encode it in code constants.\n30\t\n31\t## Allowed exception\n32\t\n33\tIn greenfield alpha, a hard-cut rename + table rewrite without compat migration is allowed when:\n34\t\n35\t1. The change is documented in the techspec's \"Delete Targets\" section.\n36\t2. All callers of the old shape are deleted in the same change.\n37\t3. Per-developer wipe of local SQLite is acceptable cost.\n38\t\n39\t## Anti-pattern\n40\t\n41\t- `CREATE TABLE IF NOT EXISTS new_columns ...` then expecting the table to grow.\n42\t- Schema-version constants in Go code that don't match the migrations registry.\n43\t- Tests that only cover fresh-DB.\n44\t\n45\t## Source\n46\t\n47\t- `.codex/ledger/2026-04-25-MEMORY-hermes-qa-execution.md` (BUG-002)\n48\t- `.compozy/tasks/hermes/reviews-001/issue_020.md` (Critical)\n49\t- `.compozy/tasks/refac-v2/reviews-001/issue_001.md` (WAL/SHM Critical)\n50\t- `.compozy/tasks/autonomous/memory/task_07.md` (claim/lease schema v7)\n51\t- `../analysis/analysis_global_runs.md` lesson L1, `../analysis/analysis_local_runs.md` lesson LL-2\n52\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:21.626599Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:21.627079Z","update":{"kind":"tool_call_started","toolCallId":"toolu_0112CmNmAFZfaPdk9dqcebdf","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_0112CmNmAFZfaPdk9dqcebdf","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:22.764454Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01SU8EcukH2hZCYRQBTxXY41","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01SU8EcukH2hZCYRQBTxXY41","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:22.764682Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_0112CmNmAFZfaPdk9dqcebdf","blocks":[{"type":"tool_use","id":"toolu_0112CmNmAFZfaPdk9dqcebdf","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-006-greenfield-delete-not-adapt.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-006-greenfield-delete-not-adapt.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-006-greenfield-delete-not-adapt.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:23.57704Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01SU8EcukH2hZCYRQBTxXY41","blocks":[{"type":"tool_use","id":"toolu_01SU8EcukH2hZCYRQBTxXY41","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-012-techspec-prose-only-rework.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-012-techspec-prose-only-rework.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-012-techspec-prose-only-rework.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:23.577324Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01WXjoBnAyz5bSG4okWg22eC","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01WXjoBnAyz5bSG4okWg22eC","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:24.514406Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WXjoBnAyz5bSG4okWg22eC","blocks":[{"type":"tool_use","id":"toolu_01WXjoBnAyz5bSG4okWg22eC","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_acp_tool_registry_compatibility.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_acp_tool_registry_compatibility.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_acp_tool_registry_compatibility.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:24.541581Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:24.545928Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01SU8EcukH2hZCYRQBTxXY41","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:24.54621Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_0112CmNmAFZfaPdk9dqcebdf","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:24.54644Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WXjoBnAyz5bSG4okWg22eC","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:24.547911Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01SU8EcukH2hZCYRQBTxXY41","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01SU8EcukH2hZCYRQBTxXY41","content":"```\n1\t# L-012 — TechSpec without Go interface signatures triggers heavy review rework\n2\t\n3\t**Class:** Spec authoring\n4\t**Date discovered:** 2026-04-25 / 2026-04-26 (autonomy techspec vs. release-adjustments comparison)\n5\t**Evidence sources:** `../analysis/analysis_compozy_tasks.md` §PRD/TechSpec Quality Patterns\n6\t\n7\t## Context\n8\t\n9\tTwo TechSpecs from the same period delivered radically different review trajectories:\n10\t\n11\t- **Autonomy techspec** (`.compozy/tasks/autonomous/_techspec.md`, 706 lines): MVP boundary at top, listed Architectural Boundaries, Go interface signatures pasted as code blocks (`ClaimCriteria`, `ClaimedRun`, `TaskClaimer`, `SpawnOpts`, `PermissionNarrower`), data-model fields with rationale, side-table-vs-JSON decisions explicit, lease invariants enumerated as a numbered list. Eighteen tasks executed cleanly with **one** review round.\n12\t- **Release-adjustments / qa-review** (no `_techspec.md`, just review-only directories): unresolved review queues persisted across multiple PRs. Tasks 07-09 of autonomy that touched contract-laden interfaces had **exactly one** round of fixes because the techspec gave the implementer no contract ambiguity.\n13\t\n14\tThe differentiator was not length — it was concreteness. Specs that paste signatures, list fields with rationale, and enumerate invariants leave nothing to interpretation. Specs that describe the same mechanics in prose force the implementer to invent shapes that reviewers then reject.\n15\t\n16\t## Root cause\n17\t\n18\tProse-only descriptions produce N implementations, where N is the number of agents that read the spec. Reviewers then converge each implementation toward the implicit intent through review rounds — that is the rework. Code blocks (interface signatures, struct fields, SQL DDL, enum values) are unambiguous; reviewers either approve or reject specific tokens, and the spec author resolves the ambiguity once instead of N times.\n19\t\n20\t## Rule\n21\t\n22\t\u003e A TechSpec is not ready for review until it carries the **six quality markers**:\n23\t\u003e\n24\t\u003e 1. MVP boundary statement at top.\n25\t\u003e 2. Architectural Boundaries section.\n26\t\u003e 3. Concrete Go interface signatures pasted as code blocks (not prose).\n27\t\u003e 4. Data-model field rationale (purpose + shape per new column / frontmatter field / config key).\n28\t\u003e 5. Side-table-vs-JSON decision stated for every new domain entity.\n29\t\u003e 6. Lease / safety invariants as a numbered list.\n30\t\u003e\n31\t\u003e Specs without these markers are pre-rejected — they will need multiple review rounds.\n32\t\n33\t## Operationalization\n34\t\n35\t`cy-spec-peer-review` invokes Opus with a six-marker checklist embedded in the prompt. `cy-spec-preflight` blocks `cy-create-techspec` from completing until the six markers are present.\n36\t\n37\tWhen a spec is missing a marker, fix the spec — do not start tasks against the gap.\n38\t\n39\t## Anti-patterns\n40\t\n41\t- \"The function will accept the relevant config and return the result.\" (no signature)\n42\t- \"Add a column for ownership tracking.\" (no rationale, no name, no type)\n43\t- \"Choose the appropriate storage shape.\" (no decision)\n44\t- \"Ensure the lease is held safely.\" (no invariants)\n45\t- \"We'll figure out the interface during implementation.\" (defers the ambiguity)\n46\t\n47\t## Source\n48\t\n49\t- `.compozy/tasks/autonomous/_techspec.md` — the cleanest exemplar\n50\t- `.compozy/tasks/autonomous/adrs/adr-003.md` (lease invariants numbered)\n51\t- `.compozy/tasks/autonomous/adrs/adr-006.md` (safe spawn caps numbered)\n52\t- `.compozy/tasks/release-adjustments/` and `.compozy/tasks/qa-review/` — review-only dirs without TechSpecs, persistent unresolved review queues\n53\t- `../analysis/analysis_compozy_tasks.md` §\"Markers of 'good enough to execute'\" and §\"Markers of trouble\"\n54\t- `docs/_memory/_synthesis.md` skill candidate S-M9 `agh-techspec-quality-gate`\n55\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:24.548325Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_0112CmNmAFZfaPdk9dqcebdf","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_0112CmNmAFZfaPdk9dqcebdf","content":"```\n1\t# L-006 — Greenfield + zero-legacy means _delete_, not _adapt_\n2\t\n3\t**Class:** Project posture\n4\t**Date discovered:** 2026-04-17 (harness TechSpec review, Portuguese-language reviewer)\n5\t**Evidence sources:** Harness review + `remove-legacy-alpha.md` standing directive + multiple ADRs\n6\t\n7\t## Context\n8\t\n9\tThe harness TechSpec proposed migrating an `inputAugmenter` callback to a `TurnAugmenter` pipeline. The spec did not say whether the old callback was deleted, kept as an adapter, or coexisting. The reviewer (in Portuguese) flagged this directly: _\"política zero-legacy exige declarar 'delete'\"_ — the zero-legacy policy _requires_ the spec to declare what is deleted.\n10\t\n11\tThis is a stronger application of the CLAUDE.md \"Greenfield Alpha — Zero Legacy Tolerance\" rule: it's not enough to _allow_ deletion; specs must _enumerate_ what is deleted.\n12\t\n13\t## Root cause\n14\t\n15\tWhen a spec says \"we are migrating to X\" without naming the delete-target, agents default to keeping both. Compatibility shims, adapters, and \"preserve old behavior\" branches accumulate as technical debt. Greenfield discipline only works if every breaking-change spec explicitly names what disappears.\n16\t\n17\t## Rule\n18\t\n19\t\u003e Every breaking-change techspec MUST explicitly name its delete targets. \"Delete the old thing\" is not a default; it is a checklist item that must be enumerated.\n20\t\n21\t## Operationalization\n22\t\n23\tIn every TechSpec that changes a public surface (or any meaningful internal contract), include a section like:\n24\t\n25\t```markdown\n26\t## Delete Targets\n27\t\n28\t- `internal/foo.OldType` (replaced by `internal/foo.NewType` in step 3)\n29\t- `pkg/bar.LegacyAdapter` (no callers after migration; remove in step 5)\n30\t- TOML key `[old.section]` (renamed; no backward alias)\n31\t- HTTP endpoint `/v0/old/path` (replaced by `/v1/new/path`; no redirect)\n32\t```\n33\t\n34\tRenames sweep code, storage, APIs, CLI, extensions, specs, RFCs, AND `.compozy/tasks/*` artifacts in the same change. No aliases, no dual fields, no migration code.\n35\t\n36\t## Allowed exception (single-pass repair)\n37\t\n38\tWhen the cost of \"delete the old thing\" is \"every developer rebuilds their local SQLite,\" in-place ALTER + one-shot repair is allowed if and only if:\n39\t\n40\t1. Repair is bounded to a single boot.\n41\t2. Strict semantics resume immediately after repair.\n42\t3. The exception is documented in an ADR.\n43\t\n44\tReference: `session-driver-override/adrs/adr-005.md`.\n45\t\n46\t## Source\n47\t\n48\t- `.codex/plans/remove-legacy-alpha.md` (standing directive)\n49\t- `.codex/sessions/2026/04/17/.../exec-20260417-232547-929722000/turns/0001/response.txt` (harness review, Portuguese)\n50\t- `.compozy/tasks/autonomous/adrs/adr-001.md`\n51\t- Multiple `network-rename`, `assistant-ui-hard-cut`, `workspace-menu-hardcut` plans in `.codex/plans/`\n52\t- `../analysis/analysis_local_runs.md` lesson LL-1\n53\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:24.549639Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WXjoBnAyz5bSG4okWg22eC","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01WXjoBnAyz5bSG4okWg22eC","content":"```\n1\t# Analysis: ACP Tool Registry Compatibility\n2\t\n3\t## Scope\n4\t\n5\tThis analysis answers whether ACP imposes a tool registry pattern that AGH must follow, and which `.resources/*` projects materially use ACP in ways that affect the Tool Registry TechSpec. The research combines official ACP/MCP documentation with read-only subagent passes over `.resources/rayclaw`, `.resources/harnss`, `.resources/acpx`, `.resources/openclaw`, `.resources/opencode`, and an inventory pass across every top-level `.resources/*` project.\n6\t\n7\t## Executive Conclusion\n8\t\n9\tACP does not define a durable, programmatic tool registry for callable tools. ACP defines session lifecycle, prompt streaming, client authority callbacks, permission requests, MCP server bootstrap fields, and observable tool-call events. Those tool-call events carry `toolCallId`, `title`, `kind`, `status`, locations, raw input, raw output, and content, but they do not carry a stable `name` field equivalent to MCP `Tool.name`.\n10\t\n11\tTherefore, AGH should not model its Tool Registry as an ACP registry, and should not use ACP `title` as a durable policy or collision key.\n12\t\n13\tFor session-visible AGH tools, the strongest compatibility path remains the accepted MVP path: an AGH-hosted MCP server backed by the daemon Tool Registry. MCP supplies the externally callable `Tool.name`; ACP supplies the way ACP-compatible agents receive `mcpServers`, report tool execution, and request permission.\n14\t\n15\tThe practical design correction is:\n16\t\n17\t- AGH should use one canonical provider-safe `ToolID` everywhere, using reserved double-underscore namespace separators, for example `agh__skill_view`, `mcp__github__create_issue`, or `ext__linear__search`.\n18\t- The same `ToolID` should be the hosted MCP `Tool.name`; AGH should not introduce a second wire alias in the MVP.\n19\t- ACP `title` is display-only and event-only.\n20\t- ACP `ToolKind` is a risk/display hint, not registry identity.\n21\t- `permissions.mode` remains the session approval ceiling; registry policy remains the granular layer below it.\n22\t\n23\t## Official Protocol Constraints\n24\t\n25\t### ACP\n26\t\n27\tOfficial ACP schema evidence:\n28\t\n29\t- ACP `session/new`, `session/load`, and `session/resume` include `mcpServers`; agents are expected to connect to those MCP servers for the session. Source: \u003chttps://agentclientprotocol.com/protocol/schema\u003e.\n30\t- ACP `ToolCall` is event/reporting data with `toolCallId`, `title`, `kind`, `status`, `rawInput`, `rawOutput`, `locations`, and `content`. The schema describes `title` as human-readable and `toolCallId` as unique within a session. It does not expose a durable callable `name` field. Source: \u003chttps://agentclientprotocol.com/protocol/schema\u003e.\n31\t- ACP `ToolKind` values are coarse categories such as `read`, `edit`, `delete`, `move`, `search`, `execute`, `think`, `fetch`, `switch_mode`, and `other`. The schema says these help clients pick icons and display progress, which is weaker than registry identity. Source: \u003chttps://agentclientprotocol.com/protocol/schema\u003e.\n32\t- ACP `session/request_permission` carries a `toolCall` object plus permission options. It is a permission bridge for a concrete tool call, not a registry discovery API. Source: \u003chttps://agentclientprotocol.com/protocol/schema\u003e.\n33\t- The official \"ACP Registry\" is an agent registry: a catalog of ACP-compatible agents and their install/run metadata, not a callable tool registry. Source: \u003chttps://agentclientprotocol.com/registry\u003e.\n34\t\n35\t### MCP\n36\t\n37\tOfficial MCP schema evidence:\n38\t\n39\t- MCP `tools/list` returns `Tool[]`.\n40\t- MCP `Tool` has `name`, optional `title`, optional `description`, `inputSchema`, optional `outputSchema`, annotations, execution metadata, and `_meta`.\n41\t- MCP describes `name` as intended for programmatic/logical use and `title` as intended for UI/end-user contexts. Source: \u003chttps://modelcontextprotocol.io/specification/draft/schema\u003e.\n42\t\n43\tImplication: AGH should treat MCP `Tool.name` as the session wire name when exposing AGH registry tools through hosted MCP. ACP does not replace that name.\n44\t\n45\t## ACP Usage Inventory Across `.resources/*`\n46\t\n47\t| Project | ACP usage | Tool registry relevance |\n48\t|---|---|---|\n49\t| `.resources/acpx` | ACP client/orchestrator and conformance tooling. | Has an agent/adapter registry, not a callable tool registry. Passes `mcpServers`; models tool calls by `toolCallId`, title, kind, status, raw input/output. |\n50\t| `.resources/collaborator-ai` | ACP client/orchestrator. | No registry found. Uses ACP tool update titles for display. |\n51\t| `.resources/goclaw` | ACP client/orchestrator. | No formal registry. Uses method switches and permission heuristics for ACP callbacks. |\n52\t| `.resources/harnss` | ACP client/orchestrator with Electron bridge. | Has ACP agent registry and UI rendering adapters, not a tool registry. Converts configured MCP servers to ACP `McpServer[]`. |\n53\t| `.resources/hermes` | ACP server/agent implementation plus ACP client shim. | Relevant: registers ACP-provided MCP servers into Hermes agent state and valid tool names; maps Hermes tools to ACP `ToolKind` and titles. |\n54\t| `.resources/multica` | ACP client/orchestrator. | No formal registry. Parses titles such as `terminal:` and `read:` for UI normalization. |\n55\t| `.resources/openclaw` | ACP server/client/runtime bridge. | Has internal tool catalog and plugin/MCP surfaces, but main ACP bridge does not expose an ACP tool registry and rejects per-session `mcpServers`. |\n56\t| `.resources/opencode` | Native ACP server. | Has a real internal `ToolRegistry`, but ACP does not expose it as a registry API. Accepts ACP `mcpServers` and converts them into internal MCP config. |\n57\t| `.resources/paperclip` | Docs/reference only. | Conceptual ACP references only. |\n58\t| `.resources/rayclaw` | ACP client/orchestrator. | Exposes ACP control as local `acp_*` tools; ACP-reported tool calls are telemetry, not registry entries. |\n59\t| `.resources/sandbox-agent` | ACP adapter/proxy/client package. | Agent launch registry only; no ACP tool registry found. |\n60\t| `.resources/t3code` | ACP schema/client/runtime package. | Schema and runtime tracking for ACP tool events; no broad tool registry. |\n61\t\n62\tNo meaningful ACP evidence was found in `.resources/cc-posts`, `.resources/chat`, `.resources/openfang`, `.resources/pi`, or `.resources/symphony`. `.resources/claude-code` had an `ACP` false positive inside an embedded/base64-like string, not implementation evidence.\n63\t\n64\t## Deep Dives\n65\t\n66\t### RayClaw\n67\t\n68\tRayClaw is an ACP client/orchestrator. It spawns configured ACP agents, runs JSON-RPC lifecycle calls (`initialize`, `session/new`, `session/prompt`, `session/end`), and exposes ACP orchestration to RayClaw's primary LLM through local wrapper tools named `acp_coding`, `acp_new_session`, `acp_prompt`, `acp_end_session`, `acp_list_sessions`, `acp_submit_job`, and `acp_job_status`.\n69\t\n70\tThose `acp_*` names are RayClaw's local tool registry convention, not ACP. RayClaw's ACP tool-call handling treats incoming `session/update` tool calls as observations and records them by title/raw input. It does not dispatch those reported ACP tool calls through RayClaw's local registry.\n71\t\n72\tImportant evidence:\n73\t\n74\t- `.resources/rayclaw/src/acp.rs:436-453` initializes ACP with client capabilities, not a host tool registry.\n75\t- `.resources/rayclaw/src/acp.rs:752-843` handles `session/request_permission`.\n76\t- `.resources/rayclaw/src/acp.rs:846-984` parses ACP tool-call progress.\n77\t- `.resources/rayclaw/src/acp.rs:1521-1531` creates sessions with `mcpServers: []`.\n78\t- `.resources/rayclaw/src/tools/acp.rs:16-40` registers the local `acp_*` wrapper tools.\n79\t- `.resources/rayclaw/tests/acp_integration.rs:128-240` enforces local tool-name uniqueness, allowed characters, length, and collision checks.\n80\t\n81\tTransferable points:\n82\t\n83\t- Separate ACP orchestration tools from normal runtime tools.\n84\t- Treat ACP tool calls as child-agent telemetry unless AGH deliberately bridges them.\n85\t- Do not prefer `allow_always` as an automatic approval default the way RayClaw does under `auto_approve`; AGH should keep durable grants explicit.\n86\t- Reject or disambiguate sanitized name collisions rather than truncating.\n87\t\n88\t### Harnss\n89\t\n90\tHarnss is an ACP client/orchestrator with an Electron bridge and React UI. It converts renderer MCP server configs into ACP SDK `McpServer[]`, including stdio and remote transports, then passes them into `newSession` and `loadSession`. It also supports live reload through ACP `loadSession` when available.\n91\t\n92\tHarnss does not consume or expose an ACP tool registry. Its \"registry\" evidence is an ACP agent registry and a UI-side static MCP renderer table. ACP tool calls are converted into UI messages keyed by `toolCallId`, using title/kind/raw input/output normalization.\n93\t\n94\tImportant evidence:\n95\t\n96\t- `.resources/harnss/electron/src/ipc/acp-sessions.ts:193-215` converts MCP configs to ACP `McpServer[]`.\n97\t- `.resources/harnss/electron/src/ipc/acp-sessions.ts:365-483` wires ACP connection callbacks, event forwarding, and permission bridge.\n98\t- `.resources/harnss/electron/src/ipc/acp-sessions.ts:521-546` starts ACP sessions with MCP servers.\n99\t- `.resources/harnss/electron/src/ipc/acp-sessions.ts:793-828` reloads sessions with MCP servers.\n100\t- `.resources/harnss/src/hooks/useACP.ts:194-337` converts ACP tool events into UI messages.\n101\t- `.resources/harnss/src/hooks/useACP.ts:413-473` handles ACP permission requests.\n102\t- `.resources/harnss/src/lib/engine/acp-adapter.ts:267-358` derives display/tool renderer names from ACP title/kind/raw input.\n103\t- `.resources/harnss/src/components/McpToolContent.tsx:83-138` supports SDK-style `mcp__Server__tool` names and ACP-style `Tool: Server/tool` titles in UI rendering.\n104\t\n105\tTransferable points:\n106\t\n107\t- Normalize ACP event data at the boundary into AGH's canonical tool-call observation model.\n108\t- Preserve raw ACP permission options; do not collapse manual allow/deny into \"first allow\" or \"first reject\" if the protocol provides multiple option IDs.\n109\t- Keep rendering names separate from policy names.\n110\t- Pass MCP servers as session bootstrap/load data when AGH chooses per-session MCP support.\n111\t\n112\t### ACPX\n113\t\n114\tACPX is a headless ACP client/orchestrator. Its registry is an agent/adapter registry mapping names like `codex`, `claude`, `gemini`, and others to launch commands. This is not a callable tool registry.\n115\t\n116\tACPX implements client authority callbacks such as filesystem read/write, terminal create/output/wait/kill/release, and `session/request_permission`. It parses `mcpServers` from config and passes them through to `session/new` and `session/load`. It does not discover or normalize MCP tools into a registry.\n117\t\n118\tImportant evidence:\n119\t\n120\t- `.resources/acpx/src/agent-registry.ts:38-107` maps adapter names to commands.\n121\t- `.resources/acpx/src/mcp-servers.ts:100-177` parses MCP server configs.\n122\t- `.resources/acpx/src/acp/client.ts:475-538` wires ACP client callbacks and initialize capabilities.\n123\t- `.resources/acpx/src/acp/client.ts:638-693` passes `mcpServers` to `session/new` and `session/load`.\n124\t- `.resources/acpx/src/permissions.ts:98-152` implements coarse permission decisions.\n125\t- `.resources/acpx/src/session/conversation-model.ts:310-353` persists tool events keyed by tool call ID.\n126\t- `.resources/acpx/conformance/cases/021-prompt-post-success-drain.json:1-50` shows late tool updates can arrive after prompt success.\n127\t\n128\tTransferable points:\n129\t\n130\t- Keep agent/provider registries separate from Tool Registry.\n131\t- ACP compatibility includes filesystem and terminal callbacks where advertised; those callbacks must share AGH's registry policy engine or be routed through equivalent approval gates.\n132\t- Preserve distinct identities: AGH record IDs, ACP session IDs, provider-native session IDs, tool call IDs, and registry tool IDs.\n133\t- Add a protocol-aware drain/settle window for late `tool_call_update` events.\n134\t\n135\t### OpenClaw\n136\t\n137\tOpenClaw's main `openclaw acp` bridge is a Gateway-backed ACP server. It forwards prompts to the Gateway and translates Gateway events into ACP session updates. The main bridge advertises MCP HTTP/SSE support as disabled and rejects non-empty per-session `mcpServers`; its docs say MCP should be configured at the Gateway/agent layer.\n138\t\n139\tOpenClaw has rich internal tool catalogs and plugin/MCP surfaces, but the main ACP bridge does not expose them as an ACP tool registry. Tool identity in ACP is display/event identity: title formatting plus inferred `ToolKind`.\n140\t\n141\tImportant evidence:\n142\t\n143\t- `.resources/openclaw/src/acp/server.ts:4-13` and `.resources/openclaw/src/acp/server.ts:104-122` bootstrap the ACP stdio server.\n144\t- `.resources/openclaw/src/acp/translator.ts:519-540` advertises ACP capabilities.\n145\t- `.resources/openclaw/src/acp/translator.ts:542-603` handles session creation/loading.\n146\t- `.resources/openclaw/src/acp/translator.ts:1417-1424` rejects non-empty `mcpServers`.\n147\t- `.resources/openclaw/src/acp/translator.ts:848-940` maps Gateway tool events to ACP tool updates.\n148\t- `.resources/openclaw/src/acp/event-mapper.ts:297-342` formats tool titles and infers tool kind.\n149\t- `.resources/openclaw/src/agents/tool-catalog.ts:20-37` and `.resources/openclaw/src/agents/tool-catalog.ts:306-393` define a separate internal tool catalog.\n150\t- `.resources/openclaw/extensions/acpx/src/runtime-internals/mcp-proxy.mjs:33-64` shows the ACPX extension can inject MCP servers into embedded ACP sessions, unlike the main gateway bridge.\n151\t\n152\tTransferable points:\n153\t\n154\t- AGH must explicitly choose whether its ACP bridge accepts per-session `mcpServers` like OpenCode/Harnss/ACPX or rejects them like OpenClaw's gateway bridge.\n155\t- If AGH supports both runtime-managed MCP and ACP-provided MCP servers, precedence and collision rules must be explicit.\n156\t- Do not use substring heuristics for registry policy where explicit tool metadata is available.\n157\t\n158\t### OpenCode\n159\t\n160\tOpenCode implements a native ACP server and has a real internal `ToolRegistry`. This is the strongest local reference for how an agent can maintain a rich internal registry while ACP still sees only session lifecycle, MCP bootstrap, tool-call updates, and permission requests.\n161\t\n162\tOpenCode accepts ACP per-session `mcpServers`, stores them in ACP session state, converts them into internal MCP config, and adds them through its SDK. Its internal MCP naming pattern exposes MCP tools as `sanitize(server) + \"_\" + sanitize(tool)` while preserving the original MCP tool name for the actual call.\n163\t\n164\tImportant evidence:\n165\t\n166\t- `.resources/opencode/packages/opencode/src/cli/cmd/acp.ts:23-60` bootstraps `opencode acp`.\n167\t- `.resources/opencode/packages/opencode/src/acp/types.ts:1-16` and `.resources/opencode/packages/opencode/src/acp/session.ts:8-75` store ACP session state, including `mcpServers`.\n168\t- `.resources/opencode/packages/opencode/src/acp/agent.ts:534-578` advertises MCP support.\n169\t- `.resources/opencode/packages/opencode/src/acp/agent.ts:584-687` accepts MCP servers on session creation/loading.\n170\t- `.resources/opencode/packages/opencode/src/acp/agent.ts:1216-1254` converts ACP MCP servers into internal MCP config.\n171\t- `.resources/opencode/packages/opencode/src/mcp/index.ts:115-146` and `.resources/opencode/packages/opencode/src/mcp/index.ts:618-651` implement sanitized server/tool naming while preserving raw MCP names.\n172\t- `.resources/opencode/packages/opencode/src/tool/tool.ts:34-43` and `.resources/opencode/packages/opencode/src/tool/registry.ts:163-207` define internal tool definitions and registry behavior.\n173\t- `.resources/opencode/packages/opencode/src/acp/agent.ts:273-455` emits ACP tool-call lifecycle updates.\n174\t- `.resources/opencode/packages/opencode/src/acp/agent.ts:190-271` bridges internal permission events to ACP `session/request_permission`.\n175\t\n176\tTransferable points:\n177\t\n178\t- Keep AGH's internal registry richer than ACP.\n179\t- Store ACP-provided MCP servers in session state, not global daemon config.\n180\t- Preserve raw MCP server/tool names separately from the canonical AGH `ToolID`.\n181\t- Emit a stable ACP lifecycle, preferably `pending -\u003e in_progress -\u003e completed/failed`, even when the underlying runtime first reports a running event.\n182\t- Do not rely on a single-underscore sanitized naming scheme without collision diagnostics.\n183\t\n184\t### Hermes, Multica, GoClaw, Sandbox-Agent, T3Code\n185\t\n186\tThese projects reinforce the same split:\n187\t\n188\t- `.resources/hermes` is relevant because it registers ACP-provided MCP servers into agent state and valid tool names, then maps tool events into ACP kinds/titles. It has useful registry ideas, but ACP remains the session/event layer.\n189\t- `.resources/multica` and `.resources/goclaw` normalize ACP tool titles/kinds for display and permission heuristics; neither shows a protocol-level tool registry.\n190\t- `.resources/sandbox-agent` has ACP HTTP-to-stdio adapter and launch registry logic, but no callable ACP tool registry.\n191\t- `.resources/t3code` provides ACP schema/client/runtime tracking for tool-call events, not a broad registry.\n192\t\n193\t## Design Implications For AGH\n194\t\n195\t1. ACP compatibility is not a reason to avoid a daemon Tool Registry. ACP leaves tool discovery/execution models to the agent/runtime, or to MCP servers supplied to the session.\n196\t\n197\t2. AGH should expose daemon-owned session tools through an AGH-hosted MCP server in the MVP. This matches the accepted ADR-002 direction and aligns with ACP's `mcpServers` field.\n198\t\n199\t3. The registry's canonical ID must not be ACP `title`. Use one stable provider-safe `ToolID` across AGH and hosted MCP.\n200\t\n201\t4. The registry should store one canonical callable identity plus metadata:\n202\t - `ToolID`: provider-safe lower snake segments separated by reserved `__`, for example `agh__skill_view`.\n203\t - `DisplayTitle`: user-facing title only.\n204\t - `SourceRef`: structured provenance, for example built-in, MCP server, extension ID, bundle ID, provider ID.\n205\t\n206\t5. Collision handling must be fail-closed:\n207\t - Canonical `ToolID` collision: provider registration error or conflicted diagnostic.\n208\t - Sanitized external-name collision: tool is not exposed to the session until disambiguated.\n209\t - Display title collision: allowed, because titles are not policy identities.\n210\t\n211\t6. Operator and session projections should remain separate:\n212\t - Operator surfaces show unavailable, unauthorized, and conflicted tools with reason codes.\n213\t - Session/model surfaces expose only callable tools after availability, authorization, approval ceiling, and collision checks.\n214\t\n215\t7. ACP permission policy integration must remain ceiling-based:\n216\t - `deny-all` denies by default.\n217\t - `approve-reads` auto-approves only registry-classified read-only tools and ACP read/search callbacks AGH classifies as read-only.\n218\t - `approve-all` skips approval prompts for otherwise allowed tools, but does not bypass registry deny rules, extension grants, session lineage, source trust, availability, hooks, or conflict checks.\n219\t\n220\t8. ACP filesystem and terminal callbacks, if AGH advertises them, must not bypass Tool Registry policy. Either route them through the registry as first-class built-in tools or share the same policy/approval engine with equivalent telemetry and hooks.\n221\t\n222\t9. ACP `ToolKind` should be explicit metadata on AGH descriptors. Heuristics from title/kind are fallback-only for external ACP events that AGH observes but does not own.\n223\t\n224\t10. AGH should persist observed ACP tool calls separately from registry definitions. Observations are keyed by `toolCallId` within a session and carry title/kind/status/raw input/output. Registry entries are keyed by canonical `ToolID`.\n225\t\n226\t11. AGH should support late tool-call updates after prompt completion by draining the ACP event stream for a bounded window or until protocol-specific completion conditions are met.\n227\t\n228\t12. AGH should decide explicitly whether to accept third-party ACP `mcpServers` from clients:\n229\t - If accepted, store them as session-scoped tool sources with clear precedence and conflict policy.\n230\t - If rejected, document the OpenClaw-style stance and require MCP sources to be configured through AGH's registry/config lifecycle.\n231\t - For this TechSpec, the safer MVP path is AGH-managed hosted MCP first, with acceptance of client-supplied MCP servers as a compatibility extension only if collision and source-trust rules are implemented.\n232\t\n233\t## Accepted Naming And Collision Recommendation\n234\t\n235\tAdopt one canonical public `ToolID` format:\n236\t\n237\t- Canonical ID: provider-safe lower snake segments separated by reserved double underscore, for example `agh__skill_view`, `agh__tool_search`, `mcp__github__create_issue`, `ext__linear__search`.\n238\t- Hosted MCP `Tool.name`: same as the canonical `ToolID`.\n239\t- Display title: human-readable and non-unique, for example `View Skill`.\n240\t- Source/provenance: structured fields, not inferred solely from name prefixes.\n241\t- No shadowing: providers cannot replace an existing canonical ID unless they are the same source updating the same record.\n242\t- No silent truncation: if sanitization or length rules would collide, registration/session projection marks the tool conflicted and hides it from the session surface.\n243\t- Policy, dispatch, telemetry, hooks, CLI, HTTP, UDS, and hosted MCP all use the same `ToolID`.\n244\t\n245\tThis keeps AGH's internal registry expressive while respecting MCP wire compatibility and ACP's event-oriented model.\n246\t\n247\t## Evidence Summary\n248\t\n249\tOfficial protocol sources:\n250\t\n251\t- ACP schema: \u003chttps://agentclientprotocol.com/protocol/schema\u003e\n252\t- ACP agent registry: \u003chttps://agentclientprotocol.com/registry\u003e\n253\t- MCP draft schema: \u003chttps://modelcontextprotocol.io/specification/draft/schema\u003e\n254\t\n255\tPrimary local evidence:\n256\t\n257\t- `.resources/rayclaw/src/acp.rs`\n258\t- `.resources/rayclaw/src/tools/acp.rs`\n259\t- `.resources/rayclaw/tests/acp_integration.rs`\n260\t- `.resources/harnss/electron/src/ipc/acp-sessions.ts`\n261\t- `.resources/harnss/src/hooks/useACP.ts`\n262\t- `.resources/harnss/src/lib/engine/acp-adapter.ts`\n263\t- `.resources/acpx/src/acp/client.ts`\n264\t- `.resources/acpx/src/mcp-servers.ts`\n265\t- `.resources/acpx/src/permissions.ts`\n266\t- `.resources/openclaw/src/acp/translator.ts`\n267\t- `.resources/openclaw/src/acp/event-mapper.ts`\n268\t- `.resources/opencode/packages/opencode/src/acp/agent.ts`\n269\t- `.resources/opencode/packages/opencode/src/tool/registry.ts`\n270\t- `.resources/opencode/packages/opencode/src/mcp/index.ts`\n271\t\n272\tSubagent inventory evidence:\n273\t\n274\t- ACP implementations/usages: `acpx`, `collaborator-ai`, `goclaw`, `harnss`, `hermes`, `multica`, `openclaw`, `opencode`, `paperclip`, `rayclaw`, `sandbox-agent`, `t3code`.\n275\t- No meaningful ACP evidence: `cc-posts`, `chat`, `openfang`, `pi`, `symphony`.\n276\t- Excluded false positive: `claude-code`.\n277\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:28.128502Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:28.498904Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01DzGsrw6DnnaY6jLj1ZWQPg","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01DzGsrw6DnnaY6jLj1ZWQPg","name":"Bash","title":"Terminal","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:30.138174Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01WerEiCRGAnZL6fUr2Mwjbq","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01WerEiCRGAnZL6fUr2Mwjbq","name":"Bash","title":"Terminal","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:30.138564Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01DzGsrw6DnnaY6jLj1ZWQPg","blocks":[{"type":"tool_use","id":"toolu_01DzGsrw6DnnaY6jLj1ZWQPg","name":"Task","title":"ls /Users/pedronauck/Dev/compozy/agh/internal/extension/protocol/ 2\u003e/dev/null \u0026\u0026 ls /Users/pedronauck/Dev/compozy/agh/internal/extension/ 2\u003e/dev/null | head -30","input":{"prompt":"List extension protocol files"},"rawInput":{"command":"ls /Users/pedronauck/Dev/compozy/agh/internal/extension/protocol/ 2\u003e/dev/null \u0026\u0026 ls /Users/pedronauck/Dev/compozy/agh/internal/extension/ 2\u003e/dev/null | head -30","description":"List extension protocol files"}},{"type":"tool_result","toolUseId":"toolu_01DzGsrw6DnnaY6jLj1ZWQPg","content":"List extension protocol files"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:30.847073Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WerEiCRGAnZL6fUr2Mwjbq","blocks":[{"type":"tool_use","id":"toolu_01WerEiCRGAnZL6fUr2Mwjbq","name":"Task","title":"ls /Users/pedronauck/Dev/compozy/agh/sdk/ 2\u003e/dev/null","input":{"prompt":"List SDK directory"},"rawInput":{"command":"ls /Users/pedronauck/Dev/compozy/agh/sdk/ 2\u003e/dev/null","description":"List SDK directory"}},{"type":"tool_result","toolUseId":"toolu_01WerEiCRGAnZL6fUr2Mwjbq","content":"List SDK directory"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:30.847384Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01Gj7361kegqBQA6r4w3LGPo","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01Gj7361kegqBQA6r4w3LGPo","name":"Bash","title":"Terminal","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:31.420178Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Gj7361kegqBQA6r4w3LGPo","blocks":[{"type":"tool_use","id":"toolu_01Gj7361kegqBQA6r4w3LGPo","name":"Task","title":"ls /Users/pedronauck/Dev/compozy/agh/internal/mcp/ 2\u003e/dev/null","input":{"prompt":"List MCP package contents"},"rawInput":{"command":"ls /Users/pedronauck/Dev/compozy/agh/internal/mcp/ 2\u003e/dev/null","description":"List MCP package contents"}},{"type":"tool_result","toolUseId":"toolu_01Gj7361kegqBQA6r4w3LGPo","content":"List MCP package contents"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:31.447949Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:31.46931Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Gj7361kegqBQA6r4w3LGPo","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:31.469437Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WerEiCRGAnZL6fUr2Mwjbq","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:31.470231Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Gj7361kegqBQA6r4w3LGPo","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01Gj7361kegqBQA6r4w3LGPo","content":"```console\nauth\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:31.470956Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WerEiCRGAnZL6fUr2Mwjbq","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01WerEiCRGAnZL6fUr2Mwjbq","content":"```console\ncreate-extension\nexamples\ntypescript\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:31.47155Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01DzGsrw6DnnaY6jLj1ZWQPg","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:31.4724Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01DzGsrw6DnnaY6jLj1ZWQPg","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01DzGsrw6DnnaY6jLj1ZWQPg","content":"```console\nhost_api.go\nhost_api_test.go\nbridge_delivery_integration_test.go\nbridge_delivery_notifier.go\nbridge_delivery_notifier_test.go\nbundle.go\nbundle_additional_test.go\ncapability.go\ncapability_test.go\ncontract\ndescribe.go\ndescribe_test.go\ndiscord_provider_integration_test.go\ngchat_provider_integration_test.go\ngithub_provider_integration_test.go\nhost_api.go\nhost_api_bridges.go\nhost_api_bridges_render_test.go\nhost_api_integration_test.go\nhost_api_resources.go\nhost_api_tasks.go\nhost_api_test.go\ninstall_managed.go\ninstall_managed_test.go\nlinear_provider_integration_test.go\nmanager.go\nmanager_integration_test.go\nmanager_test.go\nmanifest.go\nmanifest_integration_test.go\nmanifest_test.go\nperf_bench_test.go\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:35.423429Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:35.423881Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01QsU5mh3AJxyFjGVAhVs7WW","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01QsU5mh3AJxyFjGVAhVs7WW","name":"Grep","title":"grep","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:36.923644Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01QsU5mh3AJxyFjGVAhVs7WW","blocks":[{"type":"tool_use","id":"toolu_01QsU5mh3AJxyFjGVAhVs7WW","name":"Grep","title":"grep -l \"CapabilityProvide|tool.provider|tools/call|provide_tools\" /Users/pedronauck/Dev/compozy/agh/internal/extension/protocol","input":{"path":"/Users/pedronauck/Dev/compozy/agh/internal/extension/protocol","pattern":"CapabilityProvide|tool.provider|tools/call|provide_tools"},"rawInput":{"output_mode":"files_with_matches","path":"/Users/pedronauck/Dev/compozy/agh/internal/extension/protocol","pattern":"CapabilityProvide|tool.provider|tools/call|provide_tools"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:36.933914Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01Xroyujwc14LakVAbBgcFn1","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01Xroyujwc14LakVAbBgcFn1","name":"Bash","title":"Terminal","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:37.806516Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Xroyujwc14LakVAbBgcFn1","blocks":[{"type":"tool_use","id":"toolu_01Xroyujwc14LakVAbBgcFn1","name":"Task","title":"ls /Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/ 2\u003e/dev/null","input":{"prompt":"List MCP auth directory"},"rawInput":{"command":"ls /Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/ 2\u003e/dev/null","description":"List MCP auth directory"}},{"type":"tool_result","toolUseId":"toolu_01Xroyujwc14LakVAbBgcFn1","content":"List MCP auth directory"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:37.807083Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01AgVvKhmiMeRxqty3Ziu4PE","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01AgVvKhmiMeRxqty3Ziu4PE","name":"Grep","title":"grep","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:38.56014Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01AgVvKhmiMeRxqty3Ziu4PE","blocks":[{"type":"tool_use","id":"toolu_01AgVvKhmiMeRxqty3Ziu4PE","name":"Grep","title":"grep -l \"^func \\w+Tool|extension\\.tool|Tool\\(\" /Users/pedronauck/Dev/compozy/agh/sdk/typescript","input":{"path":"/Users/pedronauck/Dev/compozy/agh/sdk/typescript","pattern":"^func \\w+Tool|extension\\.tool|Tool\\("},"rawInput":{"output_mode":"files_with_matches","path":"/Users/pedronauck/Dev/compozy/agh/sdk/typescript","pattern":"^func \\w+Tool|extension\\.tool|Tool\\("}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:38.633312Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:38.648128Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01QsU5mh3AJxyFjGVAhVs7WW","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:38.648576Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Xroyujwc14LakVAbBgcFn1","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:38.649419Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01QsU5mh3AJxyFjGVAhVs7WW","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01QsU5mh3AJxyFjGVAhVs7WW","content":"Found 1 file\ninternal/extension/protocol/host_api.go"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:38.649771Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Xroyujwc14LakVAbBgcFn1","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01Xroyujwc14LakVAbBgcFn1","content":"```console\nmetadata.go\nmetadata_test.go\npkce.go\npkce_test.go\nservice.go\nservice_test.go\ntypes.go\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:38.651131Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01AgVvKhmiMeRxqty3Ziu4PE","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:38.651671Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01AgVvKhmiMeRxqty3Ziu4PE","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01AgVvKhmiMeRxqty3Ziu4PE","content":"No files found"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:41.195263Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:41.201212Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01WRWGgWjSfyDcteyGo5MvAP","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01WRWGgWjSfyDcteyGo5MvAP","name":"Grep","title":"grep","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:42.765782Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01KArvh3tyyRCfd2ydLGtUQY","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01KArvh3tyyRCfd2ydLGtUQY","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:42.76693Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WRWGgWjSfyDcteyGo5MvAP","blocks":[{"type":"tool_use","id":"toolu_01WRWGgWjSfyDcteyGo5MvAP","name":"Grep","title":"grep -n | head -50 \"Capability|tool\\.provider|ServiceMethod\" /Users/pedronauck/Dev/compozy/agh/internal/extension/protocol/host_api.go","input":{"path":"/Users/pedronauck/Dev/compozy/agh/internal/extension/protocol/host_api.go","pattern":"Capability|tool\\.provider|ServiceMethod"},"rawInput":{"-n":true,"head_limit":50,"output_mode":"content","path":"/Users/pedronauck/Dev/compozy/agh/internal/extension/protocol/host_api.go","pattern":"Capability|tool\\.provider|ServiceMethod"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:42.916783Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01KArvh3tyyRCfd2ydLGtUQY","blocks":[{"type":"tool_use","id":"toolu_01KArvh3tyyRCfd2ydLGtUQY","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/sdk/typescript","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/sdk/typescript"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/sdk/typescript"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:42.992358Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:42.997538Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01KArvh3tyyRCfd2ydLGtUQY","toolCallState":"failed","blocks":[{"type":"tool_result","toolUseId":"toolu_01KArvh3tyyRCfd2ydLGtUQY","content":"```\nEISDIR: illegal operation on a directory, read '/Users/pedronauck/Dev/compozy/agh/sdk/typescript'\n```","isError":true}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:43.004285Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WRWGgWjSfyDcteyGo5MvAP","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:43.005409Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WRWGgWjSfyDcteyGo5MvAP","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01WRWGgWjSfyDcteyGo5MvAP","content":"12:\t// CapabilityProvideMemoryBackend is the provide surface for daemon-managed memory backends.\n13:\tCapabilityProvideMemoryBackend = \"memory.backend\"\n14:\t// CapabilityProvideBridgeAdapter is the provide surface for bridge-capable adapter extensions.\n15:\tCapabilityProvideBridgeAdapter = \"bridge.adapter\"\n18:// ExtensionServiceMethod identifies one AGH -\u003e extension capability service request.\n19:type ExtensionServiceMethod string\n22:\tExtensionServiceMethodMemoryStore ExtensionServiceMethod = \"memory/store\"\n23:\tExtensionServiceMethodMemoryRecall ExtensionServiceMethod = \"memory/recall\"\n24:\tExtensionServiceMethodMemoryForget ExtensionServiceMethod = \"memory/forget\"\n25:\tExtensionServiceMethodBridgesDeliver ExtensionServiceMethod = \"bridges/deliver\"\n147:var capabilityServiceMethods = map[string][]ExtensionServiceMethod{\n148:\tCapabilityProvideMemoryBackend: {\n149:\t\tExtensionServiceMethodMemoryStore,\n150:\t\tExtensionServiceMethodMemoryRecall,\n151:\t\tExtensionServiceMethodMemoryForget,\n153:\tCapabilityProvideBridgeAdapter: {\n154:\t\tExtensionServiceMethodBridgesDeliver,\n158:// CapabilityServiceMethods returns the negotiated AGH -\u003e extension service methods\n160:func CapabilityServiceMethods(provides []string) []string {\n168:\t\tfor _, method := range capabilityServiceMethods[provide] {"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:45.247074Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:45.247941Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01Xw9zMvoKNdZpUpFo3B8cdT","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01Xw9zMvoKNdZpUpFo3B8cdT","name":"Bash","title":"Terminal","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:45.479116Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Xw9zMvoKNdZpUpFo3B8cdT","blocks":[{"type":"tool_use","id":"toolu_01Xw9zMvoKNdZpUpFo3B8cdT","name":"Task","title":"ls /Users/pedronauck/Dev/compozy/agh/sdk/typescript/src/ 2\u003e/dev/null","input":{"prompt":"List TypeScript SDK src"},"rawInput":{"command":"ls /Users/pedronauck/Dev/compozy/agh/sdk/typescript/src/ 2\u003e/dev/null","description":"List TypeScript SDK src"}},{"type":"tool_result","toolUseId":"toolu_01Xw9zMvoKNdZpUpFo3B8cdT","content":"List TypeScript SDK src"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:45.521851Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:45.535478Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Xw9zMvoKNdZpUpFo3B8cdT","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:45.536448Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Xw9zMvoKNdZpUpFo3B8cdT","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01Xw9zMvoKNdZpUpFo3B8cdT","content":"```console\nbase-types.ts\nerrors.test.ts\nerrors.ts\nextension.test.ts\nextension.ts\ngenerated\nhost-api.test.ts\nhost-api.ts\nindex.ts\nintegration.test.ts\ntesting\ntransport.test.ts\ntransport.ts\ntypes.ts\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:47.682367Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:47.684603Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01TNcjWM8ySXLMc9jZHXXaEa","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01TNcjWM8ySXLMc9jZHXXaEa","name":"Grep","title":"grep","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:49.255848Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01TNcjWM8ySXLMc9jZHXXaEa","blocks":[{"type":"tool_use","id":"toolu_01TNcjWM8ySXLMc9jZHXXaEa","name":"Grep","title":"grep -n | head -40 \"handle\\(|Extension\\.handle|export.*function|class Extension\" /Users/pedronauck/Dev/compozy/agh/sdk/typescript/src/extension.ts","input":{"path":"/Users/pedronauck/Dev/compozy/agh/sdk/typescript/src/extension.ts","pattern":"handle\\(|Extension\\.handle|export.*function|class Extension"},"rawInput":{"-n":true,"head_limit":40,"output_mode":"content","path":"/Users/pedronauck/Dev/compozy/agh/sdk/typescript/src/extension.ts","pattern":"handle\\(|Extension\\.handle|export.*function|class Extension"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:49.257519Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01CBwAeTpkPJJv4oKD55L4Z2","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01CBwAeTpkPJJv4oKD55L4Z2","name":"Bash","title":"Terminal","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:50.16629Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01CBwAeTpkPJJv4oKD55L4Z2","blocks":[{"type":"tool_use","id":"toolu_01CBwAeTpkPJJv4oKD55L4Z2","name":"Task","title":"ls /Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/types.go 2\u003e/dev/null \u0026\u0026 wc -l /Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service.go 2\u003e/dev/null","input":{"prompt":"Check MCP auth files"},"rawInput":{"command":"ls /Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/types.go 2\u003e/dev/null \u0026\u0026 wc -l /Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service.go 2\u003e/dev/null","description":"Check MCP auth files"}},{"type":"tool_result","toolUseId":"toolu_01CBwAeTpkPJJv4oKD55L4Z2","content":"Check MCP auth files"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:50.181094Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:50.188614Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01TNcjWM8ySXLMc9jZHXXaEa","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:50.189773Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01TNcjWM8ySXLMc9jZHXXaEa","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01TNcjWM8ySXLMc9jZHXXaEa","content":"64:export class Extension {\n175: this.transport.handle(\n182: this.transport.handle("}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:50.192653Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01CBwAeTpkPJJv4oKD55L4Z2","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:50.193447Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01CBwAeTpkPJJv4oKD55L4Z2","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01CBwAeTpkPJJv4oKD55L4Z2","content":"```console\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/types.go\n 546 /Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service.go\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:52.124164Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:52.12458Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01FiRktphcRu5u7og4sB3voo","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01FiRktphcRu5u7og4sB3voo","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:52.882913Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01FiRktphcRu5u7og4sB3voo","blocks":[{"type":"tool_use","id":"toolu_01FiRktphcRu5u7og4sB3voo","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/sdk/typescript/src/extension.ts (60 - 189)","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/sdk/typescript/src/extension.ts"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/sdk/typescript/src/extension.ts","limit":130,"offset":60}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:52.883279Z","update":{"kind":"tool_call_started","toolCallId":"toolu_013HPgbmmTvYELyddTsKYNFU","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_013HPgbmmTvYELyddTsKYNFU","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:53.198384Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_013HPgbmmTvYELyddTsKYNFU","blocks":[{"type":"tool_use","id":"toolu_013HPgbmmTvYELyddTsKYNFU","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/types.go","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/types.go"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/types.go"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:53.260567Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:53.264982Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01FiRktphcRu5u7og4sB3voo","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:53.265197Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_013HPgbmmTvYELyddTsKYNFU","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:53.266385Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01FiRktphcRu5u7og4sB3voo","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01FiRktphcRu5u7og4sB3voo","content":"```\n60\t) =\u003e Promise\u003cTResult\u003e | TResult;\n61\t\n62\ttype ReadyCallback = (host: HostAPI, session: ExtensionSession) =\u003e Promise\u003cvoid\u003e | void;\n63\t\n64\texport class Extension {\n65\t private transport: TransportLike;\n66\t private readonly stderr: NodeJS.WritableStream;\n67\t private readonly sdkVersion: string;\n68\t private readonly handlers = new Map\u003cstring, ExtensionHandler\u003e();\n69\t private readonly readyCallbacks = new Set\u003cReadyCallback\u003e();\n70\t private readonly transportBindings = new Set\u003cstring\u003e();\n71\t private readonly host: HostAPI;\n72\t private initialized = false;\n73\t private shutdownStarted = false;\n74\t private shutdownDeadlineMS: number | undefined;\n75\t private session: ExtensionSession | undefined;\n76\t private startPromise: Promise\u003cHostAPI\u003e | undefined;\n77\t private resolveStart: ((host: HostAPI) =\u003e void) | undefined;\n78\t private rejectStart: ((reason: unknown) =\u003e void) | undefined;\n79\t\n80\t public constructor(\n81\t public readonly definition: ExtensionDefinition,\n82\t options: ExtensionOptions = {}\n83\t ) {\n84\t this.transport = options.transport ?? new StdioTransport();\n85\t this.stderr = options.stderr ?? process.stderr;\n86\t this.sdkVersion = options.sdkVersion ?? SDK_VERSION;\n87\t this.host = new HostAPI(\n88\t {\n89\t call: async \u003cTResult\u003e(method: string, params?: unknown): Promise\u003cTResult\u003e =\u003e\n90\t await this.transport.call\u003cTResult\u003e(method, params),\n91\t },\n92\t { isReady: () =\u003e this.initialized \u0026\u0026 !this.shutdownStarted }\n93\t );\n94\t\n95\t this.bindTransportHandlers();\n96\t }\n97\t\n98\t public bindTransport(transport: TransportLike): this {\n99\t if (this.startPromise) {\n100\t throw new Error(\"transport may only be swapped before start()\");\n101\t }\n102\t this.transport = transport;\n103\t this.transportBindings.clear();\n104\t this.bindTransportHandlers();\n105\t return this;\n106\t }\n107\t\n108\t public handle\u003cTParams = unknown, TResult = unknown\u003e(\n109\t method: string,\n110\t handler: ExtensionHandler\u003cTParams, TResult\u003e\n111\t ): this {\n112\t const cleanMethod = method.trim();\n113\t if (cleanMethod === \"initialize\") {\n114\t throw new Error(\"initialize is reserved by the SDK\");\n115\t }\n116\t this.handlers.set(cleanMethod, handler as ExtensionHandler);\n117\t this.bindMethod(cleanMethod);\n118\t return this;\n119\t }\n120\t\n121\t public onReady(callback: ReadyCallback): this {\n122\t this.readyCallbacks.add(callback);\n123\t if (this.initialized \u0026\u0026 this.session) {\n124\t queueMicrotask(() =\u003e {\n125\t void this.runReadyCallback(callback, this.session!);\n126\t });\n127\t }\n128\t return this;\n129\t }\n130\t\n131\t public async start(): Promise\u003cHostAPI\u003e {\n132\t if (this.startPromise) {\n133\t return await this.startPromise;\n134\t }\n135\t\n136\t this.startPromise = new Promise\u003cHostAPI\u003e((resolve, reject) =\u003e {\n137\t this.resolveStart = resolve;\n138\t this.rejectStart = reject;\n139\t });\n140\t\n141\t this.transport.onTransportError(error =\u003e {\n142\t if (!this.initialized \u0026\u0026 this.rejectStart) {\n143\t this.rejectStart(error);\n144\t }\n145\t this.logError(\"transport error\", error);\n146\t });\n147\t this.transport.start();\n148\t\n149\t return await this.startPromise;\n150\t }\n151\t\n152\t public getImplementedMethods(): string[] {\n153\t const methods = new Set\u003cstring\u003e([\"health_check\", \"shutdown\"]);\n154\t for (const method of this.handlers.keys()) {\n155\t methods.add(method);\n156\t }\n157\t return Array.from(methods).sort();\n158\t }\n159\t\n160\t public getSupportedHookEvents(): HookEvent[] {\n161\t return [...(this.definition.supported_hook_events ?? [])];\n162\t }\n163\t\n164\t private bindTransportHandlers(): void {\n165\t this.bindMethod(\"initialize\");\n166\t this.bindMethod(\"health_check\");\n167\t this.bindMethod(\"shutdown\");\n168\t for (const method of this.handlers.keys()) {\n169\t this.bindMethod(method);\n170\t }\n171\t }\n172\t\n173\t private bindMethod(method: string): void {\n174\t if (this.transportBindings.has(method)) {\n175\t this.transport.handle(\n176\t method,\n177\t async (params, request) =\u003e await this.dispatch(method, params, request)\n178\t );\n179\t return;\n180\t }\n181\t this.transportBindings.add(method);\n182\t this.transport.handle(\n183\t method,\n184\t async (params, request) =\u003e await this.dispatch(method, params, request)\n185\t );\n186\t }\n187\t\n188\t private async dispatch(\n189\t method: string,\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:53.267369Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_013HPgbmmTvYELyddTsKYNFU","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_013HPgbmmTvYELyddTsKYNFU","content":"```\n1\tpackage auth\n2\t\n3\timport (\n4\t\t\"context\"\n5\t\t\"errors\"\n6\t\t\"strings\"\n7\t\t\"time\"\n8\t\n9\t\taghconfig \"github.com/pedronauck/agh/internal/config\"\n10\t)\n11\t\n12\t// ErrTokenNotFound reports missing persisted MCP auth state for one server.\n13\tvar ErrTokenNotFound = errors.New(\"mcp auth: token not found\")\n14\t\n15\t// StatusValue is the redacted operator-facing authentication state.\n16\ttype StatusValue string\n17\t\n18\tconst (\n19\t\tStatusUnconfigured StatusValue = \"unconfigured\"\n20\t\tStatusNeedsLogin StatusValue = \"needs_login\"\n21\t\tStatusAuthenticated StatusValue = \"authenticated\"\n22\t\tStatusExpired StatusValue = \"expired\"\n23\t\tStatusInvalid StatusValue = \"invalid\"\n24\t)\n25\t\n26\t// ServerConfig is the token-free auth configuration used by the OAuth service.\n27\ttype ServerConfig struct {\n28\t\tServerName string\n29\t\tTransport string\n30\t\tRemoteURL string\n31\t\tType string\n32\t\tIssuerURL string\n33\t\tMetadataURL string\n34\t\tAuthorizationURL string\n35\t\tTokenURL string\n36\t\tRevocationURL string\n37\t\tClientID string\n38\t\tClientSecret string\n39\t\tClientSecretEnv string\n40\t\tScopes []string\n41\t}\n42\t\n43\t// Metadata is the OAuth authorization server metadata needed for PKCE flows.\n44\ttype Metadata struct {\n45\t\tIssuer string `json:\"issuer,omitempty\"`\n46\t\tAuthorizationEndpoint string `json:\"authorization_endpoint\"`\n47\t\tTokenEndpoint string `json:\"token_endpoint\"`\n48\t\tRevocationEndpoint string `json:\"revocation_endpoint,omitempty\"`\n49\t\tCodeChallengeMethodsSupported []string `json:\"code_challenge_methods_supported,omitempty\"`\n50\t\tScopesSupported []string `json:\"scopes_supported,omitempty\"`\n51\t}\n52\t\n53\t// TokenRecord is the durable token-store row. It must never be rendered\n54\t// directly in public API or CLI output.\n55\ttype TokenRecord struct {\n56\t\tServerName string\n57\t\tIssuer string\n58\t\tClientID string\n59\t\tScopes []string\n60\t\tAccessToken string\n61\t\tRefreshToken string\n62\t\tTokenType string\n63\t\tExpiresAt time.Time\n64\t\tObtainedAt time.Time\n65\t\tUpdatedAt time.Time\n66\t}\n67\t\n68\t// Status is the token-redacted state used by CLI and settings APIs.\n69\ttype Status struct {\n70\t\tServerName string `json:\"server_name\"`\n71\t\tStatus StatusValue `json:\"status\"`\n72\t\tRemoteURL string `json:\"remote_url,omitempty\"`\n73\t\tAuthType string `json:\"auth_type,omitempty\"`\n74\t\tClientID string `json:\"client_id,omitempty\"`\n75\t\tIssuer string `json:\"issuer,omitempty\"`\n76\t\tScopes []string `json:\"scopes,omitempty\"`\n77\t\tExpiresAt *time.Time `json:\"expires_at,omitempty\"`\n78\t\tUpdatedAt *time.Time `json:\"updated_at,omitempty\"`\n79\t\tRefreshable bool `json:\"refreshable\"`\n80\t\tTokenPresent bool `json:\"token_present\"`\n81\t\tRevocationURL string `json:\"revocation_url,omitempty\"`\n82\t\tDiagnostic string `json:\"diagnostic,omitempty\"`\n83\t\tAuthorizationURL string `json:\"authorization_url,omitempty\"`\n84\t}\n85\t\n86\t// TokenStore persists OAuth token material behind a narrow boundary.\n87\ttype TokenStore interface {\n88\t\tSaveMCPAuthToken(ctx context.Context, token TokenRecord) error\n89\t\tGetMCPAuthToken(ctx context.Context, serverName string) (TokenRecord, error)\n90\t\tListMCPAuthTokens(ctx context.Context) ([]TokenRecord, error)\n91\t\tDeleteMCPAuthToken(ctx context.Context, serverName string) error\n92\t}\n93\t\n94\t// ServerConfigFromMCP converts a config MCP server into token-free auth\n95\t// service input. lookupSecret receives the configured client_secret_env and\n96\t// returns the actual secret value when present.\n97\tfunc ServerConfigFromMCP(\n98\t\tserver aghconfig.MCPServer,\n99\t\tlookupSecret func(string) string,\n100\t) (ServerConfig, error) {\n101\t\tif err := server.Validate(\"mcp_server\"); err != nil {\n102\t\t\treturn ServerConfig{}, err\n103\t\t}\n104\t\tif server.Auth.IsZero() {\n105\t\t\treturn ServerConfig{\n106\t\t\t\tServerName: strings.TrimSpace(server.Name),\n107\t\t\t\tTransport: string(server.EffectiveTransport()),\n108\t\t\t\tRemoteURL: strings.TrimSpace(server.URL),\n109\t\t\t}, nil\n110\t\t}\n111\t\n112\t\tsecretEnv := strings.TrimSpace(server.Auth.ClientSecretEnv)\n113\t\tsecret := \"\"\n114\t\tif secretEnv != \"\" \u0026\u0026 lookupSecret != nil {\n115\t\t\tsecret = lookupSecret(secretEnv)\n116\t\t}\n117\t\n118\t\treturn ServerConfig{\n119\t\t\tServerName: strings.TrimSpace(server.Name),\n120\t\t\tTransport: string(server.EffectiveTransport()),\n121\t\t\tRemoteURL: strings.TrimSpace(server.URL),\n122\t\t\tType: strings.TrimSpace(string(server.Auth.Type)),\n123\t\t\tIssuerURL: strings.TrimSpace(server.Auth.IssuerURL),\n124\t\t\tMetadataURL: strings.TrimSpace(server.Auth.MetadataURL),\n125\t\t\tAuthorizationURL: strings.TrimSpace(server.Auth.AuthorizationURL),\n126\t\t\tTokenURL: strings.TrimSpace(server.Auth.TokenURL),\n127\t\t\tRevocationURL: strings.TrimSpace(server.Auth.RevocationURL),\n128\t\t\tClientID: strings.TrimSpace(server.Auth.ClientID),\n129\t\t\tClientSecret: secret,\n130\t\t\tClientSecretEnv: secretEnv,\n131\t\t\tScopes: trimStrings(server.Auth.Scopes),\n132\t\t}, nil\n133\t}\n134\t\n135\t// ServerConfigsFromMCP returns auth service configs for every auth-enabled MCP\n136\t// server in the supplied list.\n137\tfunc ServerConfigsFromMCP(\n138\t\tservers []aghconfig.MCPServer,\n139\t\tlookupSecret func(string) string,\n140\t) ([]ServerConfig, error) {\n141\t\tconfigs := make([]ServerConfig, 0, len(servers))\n142\t\tfor _, server := range servers {\n143\t\t\tif server.Auth.IsZero() {\n144\t\t\t\tcontinue\n145\t\t\t}\n146\t\t\tcfg, err := ServerConfigFromMCP(server, lookupSecret)\n147\t\t\tif err != nil {\n148\t\t\t\treturn nil, err\n149\t\t\t}\n150\t\t\tconfigs = append(configs, cfg)\n151\t\t}\n152\t\treturn configs, nil\n153\t}\n154\t\n155\t// Validate checks whether a server config is sufficient for auth actions.\n156\tfunc (c ServerConfig) Validate() error {\n157\t\tswitch {\n158\t\tcase strings.TrimSpace(c.ServerName) == \"\":\n159\t\t\treturn errors.New(\"mcp auth: server name is required\")\n160\t\tcase strings.TrimSpace(c.Type) == \"\":\n161\t\t\treturn errors.New(\"mcp auth: auth type is required\")\n162\t\tcase strings.TrimSpace(c.Type) != string(aghconfig.MCPAuthTypeOAuth2PKCE):\n163\t\t\treturn errors.New(\"mcp auth: auth type must be oauth2_pkce\")\n164\t\tcase strings.TrimSpace(c.ClientID) == \"\":\n165\t\t\treturn errors.New(\"mcp auth: client id is required\")\n166\t\tcase strings.TrimSpace(c.MetadataURL) == \"\" \u0026\u0026\n167\t\t\tstrings.TrimSpace(c.IssuerURL) == \"\" \u0026\u0026\n168\t\t\t(strings.TrimSpace(c.AuthorizationURL) == \"\" || strings.TrimSpace(c.TokenURL) == \"\"):\n169\t\t\treturn errors.New(\"mcp auth: OAuth metadata or authorization/token endpoints are required\")\n170\t\tdefault:\n171\t\t\treturn nil\n172\t\t}\n173\t}\n174\t\n175\tfunc trimStrings(values []string) []string {\n176\t\tif len(values) == 0 {\n177\t\t\treturn nil\n178\t\t}\n179\t\tout := make([]string, 0, len(values))\n180\t\tfor _, value := range values {\n181\t\t\ttrimmed := strings.TrimSpace(value)\n182\t\t\tif trimmed != \"\" {\n183\t\t\t\tout = append(out, trimmed)\n184\t\t\t}\n185\t\t}\n186\t\treturn out\n187\t}\n188\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:55.229168Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:55.230419Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01ErFf23W7mc3RZNtiPgaLGF","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01ErFf23W7mc3RZNtiPgaLGF","name":"Bash","title":"Terminal","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:56.747567Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01ErFf23W7mc3RZNtiPgaLGF","blocks":[{"type":"tool_use","id":"toolu_01ErFf23W7mc3RZNtiPgaLGF","name":"Task","title":"grep -rn \"TokenStore\\|GetMCPAuthToken\" /Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/ | head -20","input":{"prompt":"Look for TokenStore consumers"},"rawInput":{"command":"grep -rn \"TokenStore\\|GetMCPAuthToken\" /Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/ | head -20","description":"Look for TokenStore consumers"}},{"type":"tool_result","toolUseId":"toolu_01ErFf23W7mc3RZNtiPgaLGF","content":"Look for TokenStore consumers"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:56.747928Z","update":{"kind":"tool_call_started","toolCallId":"toolu_015EVrtyskic28REriF6neAd","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_015EVrtyskic28REriF6neAd","name":"Grep","title":"grep","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:58.130871Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_015EVrtyskic28REriF6neAd","blocks":[{"type":"tool_use","id":"toolu_015EVrtyskic28REriF6neAd","name":"Grep","title":"grep -l \"CapabilityProvide|provide_tools|tools/call|ExtensionToolInvoker|MCPToolInvoker\" /Users/pedronauck/Dev/compozy/agh","input":{"path":"/Users/pedronauck/Dev/compozy/agh","pattern":"CapabilityProvide|provide_tools|tools/call|ExtensionToolInvoker|MCPToolInvoker"},"rawInput":{"output_mode":"files_with_matches","path":"/Users/pedronauck/Dev/compozy/agh","pattern":"CapabilityProvide|provide_tools|tools/call|ExtensionToolInvoker|MCPToolInvoker"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:58.184541Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:58.208807Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01ErFf23W7mc3RZNtiPgaLGF","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:58.209718Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01ErFf23W7mc3RZNtiPgaLGF","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01ErFf23W7mc3RZNtiPgaLGF","content":"```console\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service.go:20:\tstore TokenStore\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service.go:27:func NewService(store TokenStore, opts ...ServiceOption) (*Service, error) {\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service.go:157:\tcurrent, err := s.store.GetMCPAuthToken(ctx, cfg.ServerName)\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service.go:199:\ttoken, err := s.store.GetMCPAuthToken(ctx, cfg.ServerName)\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service.go:215:\ttoken, err := s.store.GetMCPAuthToken(ctx, cfg.ServerName)\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/types.go:86:// TokenStore persists OAuth token material behind a narrow boundary.\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/types.go:87:type TokenStore interface {\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/types.go:89:\tGetMCPAuthToken(ctx context.Context, serverName string) (TokenRecord, error)\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service_test.go:22:\tstore := newMemoryTokenStore()\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service_test.go:169:\ttoken, err := store.GetMCPAuthToken(ctx, \"linear\")\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service_test.go:171:\t\tt.Fatalf(\"GetMCPAuthToken() error = %v\", err)\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service_test.go:189:\tif _, err := store.GetMCPAuthToken(ctx, \"linear\"); !errors.Is(err, ErrTokenNotFound) {\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service_test.go:190:\t\tt.Fatalf(\"GetMCPAuthToken(after logout) error = %v, want ErrTokenNotFound\", err)\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service_test.go:199:\tstore := newMemoryTokenStore()\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service_test.go:244:\tstore := newMemoryTokenStore()\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service_test.go:294:\tif _, err := store.GetMCPAuthToken(ctx, \"linear\"); !errors.Is(err, ErrTokenNotFound) {\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service_test.go:295:\t\tt.Fatalf(\"GetMCPAuthToken(after failed revocation logout) error = %v, want ErrTokenNotFound\", err)\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service_test.go:328:\t\tservice, err := NewService(newMemoryTokenStore())\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service_test.go:338:type memoryTokenStore struct {\n/Users/pedronauck/Dev/compozy/agh/internal/mcp/auth/service_test.go:343:func newMemoryTokenStore() *memoryTokenStore {\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:58.480101Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_015EVrtyskic28REriF6neAd","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:20:58.481242Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_015EVrtyskic28REriF6neAd","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_015EVrtyskic28REriF6neAd","content":"Found 45 files\n.compozy/tasks/tools-registry/qa/peer-review-result-round2.json\n.compozy/tasks/tools-registry/_techspec.md\n.compozy/tasks/tools-registry/adrs/adr-009-public-go-extension-tool-sdk.md\n.compozy/tasks/tools-registry/adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md\n.compozy/tasks/tools-registry/adrs/adr-002-session-tool-exposure-path.md\n.compozy/tasks/tools-registry/adrs/adr-001-extension-tool-execution-boundary.md\n.compozy/tasks/tools-registry/qa/peer-review-verdict.json\n.compozy/tasks/tools-registry/qa/peer-review-result.json\ninternal/daemon/daemon_test.go\ninternal/extension/protocol/host_api.go\ninternal/daemon/daemon_integration_test.go\n.compozy/tasks/autonomous/_techspec.md\ninternal/extension/manifest_test.go\ninternal/extension/manager.go\ninternal/extension/manifest.go\ninternal/bridgesdk/test_helpers_test.go\ninternal/daemon/bridges.go\ninternal/subprocess/process_test.go\ninternal/extension/manifest_integration_test.go\ninternal/extension/manager_test.go\ninternal/extension/manager_integration_test.go\npackages/site/content/runtime/core/extensions/develop.mdx\n.compozy/tasks/autonomous/reviews/opus-techspec-review.md\n.compozy/tasks/autonomous/analysis/analysis.md\n.compozy/tasks/autonomous/analysis/analysis_orchestration_control_loop.md\n.compozy/tasks/autonomous/analysis/analysis_task_discovery_claim.md\n.agents/skills/next-best-practices/debug-tricks.md\n.compozy/tasks/_archived/20260417-021722-ext-parity/task_08.md\n.compozy/tasks/_archived/20260417-021722-ext-parity/task_05.md\n.compozy/tasks/_archived/20260417-021722-ext-parity/qa/test-plans/extensibility-parity-test-plan.md\n.compozy/tasks/_archived/20260417-021722-ext-parity/qa/test-cases/TC-INT-013.md\n.compozy/tasks/_archived/20260417-021722-ext-parity/qa/test-cases/TC-FUNC-020.md\n.compozy/tasks/_archived/20260417-021722-ext-parity/qa/test-cases/SMOKE-007.md\n.compozy/tasks/_archived/20260417-021722-ext-parity/memory/task_08.md\n.compozy/tasks/_archived/20260417-021722-ext-parity/memory/MEMORY.md\n.compozy/tasks/_archived/20260417-021722-ext-parity/architecture.html\n.compozy/tasks/_archived/20260417-021722-ext-parity/analysis/analysis.md\n.compozy/tasks/_archived/20260417-021722-ext-parity/adrs/adr-001.md\n.compozy/tasks/_archived/20260417-021722-ext-parity/_techspec.md\n.compozy/tasks/_archived/20260411-014454-ext-architecture/_protocol.md\n.compozy/tasks/_archived/20260411-014454-ext-architecture/_examples.md\n.compozy/tasks/_archived/20260411-014454-ext-architecture/analysis_hermes.md\n.compozy/tasks/_archived/20260411-014454-ext-architecture/_techspec.md\n.compozy/tasks/_archived/20260411-014454-ext-architecture/adrs/adr-004.md\ndocs/ideas/extensability/analysis/analysis_hermes.md"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:21:00.908908Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:21:00.909333Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01KoDHNWw63SDCjio2zGYSF7","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01KoDHNWw63SDCjio2zGYSF7","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:21:01.451957Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01KoDHNWw63SDCjio2zGYSF7","blocks":[{"type":"tool_use","id":"toolu_01KoDHNWw63SDCjio2zGYSF7","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/internal/extension/protocol/host_api.go (1 - 170)","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/internal/extension/protocol/host_api.go"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/internal/extension/protocol/host_api.go","limit":170,"offset":1}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:21:01.524325Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:21:01.534749Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01KoDHNWw63SDCjio2zGYSF7","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:21:01.53778Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01KoDHNWw63SDCjio2zGYSF7","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01KoDHNWw63SDCjio2zGYSF7","content":"```\n1\tpackage protocol\n2\t\n3\timport (\n4\t\t\"slices\"\n5\t\t\"strings\"\n6\t)\n7\t\n8\t// HostAPIMethod identifies one extension -\u003e AGH Host API request.\n9\ttype HostAPIMethod string\n10\t\n11\tconst (\n12\t\t// CapabilityProvideMemoryBackend is the provide surface for daemon-managed memory backends.\n13\t\tCapabilityProvideMemoryBackend = \"memory.backend\"\n14\t\t// CapabilityProvideBridgeAdapter is the provide surface for bridge-capable adapter extensions.\n15\t\tCapabilityProvideBridgeAdapter = \"bridge.adapter\"\n16\t)\n17\t\n18\t// ExtensionServiceMethod identifies one AGH -\u003e extension capability service request.\n19\ttype ExtensionServiceMethod string\n20\t\n21\tconst (\n22\t\tExtensionServiceMethodMemoryStore ExtensionServiceMethod = \"memory/store\"\n23\t\tExtensionServiceMethodMemoryRecall ExtensionServiceMethod = \"memory/recall\"\n24\t\tExtensionServiceMethodMemoryForget ExtensionServiceMethod = \"memory/forget\"\n25\t\tExtensionServiceMethodBridgesDeliver ExtensionServiceMethod = \"bridges/deliver\"\n26\t)\n27\t\n28\tconst (\n29\t\tHostAPIMethodSessionsList HostAPIMethod = \"sessions/list\"\n30\t\tHostAPIMethodSessionsCreate HostAPIMethod = \"sessions/create\"\n31\t\tHostAPIMethodSessionsPrompt HostAPIMethod = \"sessions/prompt\"\n32\t\tHostAPIMethodSessionsStop HostAPIMethod = \"sessions/stop\"\n33\t\tHostAPIMethodSessionsStatus HostAPIMethod = \"sessions/status\"\n34\t\tHostAPIMethodSessionsEvents HostAPIMethod = \"sessions/events\"\n35\t\tHostAPIMethodSandboxList HostAPIMethod = \"sandbox/list\"\n36\t\tHostAPIMethodSandboxInfo HostAPIMethod = \"sandbox/info\"\n37\t\tHostAPIMethodSandboxExec HostAPIMethod = \"sandbox/exec\"\n38\t\tHostAPIMethodMemoryRecall HostAPIMethod = \"memory/recall\"\n39\t\tHostAPIMethodMemoryStore HostAPIMethod = \"memory/store\"\n40\t\tHostAPIMethodMemoryForget HostAPIMethod = \"memory/forget\"\n41\t\tHostAPIMethodObserveHealth HostAPIMethod = \"observe/health\"\n42\t\tHostAPIMethodObserveEvents HostAPIMethod = \"observe/events\"\n43\t\tHostAPIMethodSkillsList HostAPIMethod = \"skills/list\"\n44\t\tHostAPIMethodAutomationJobs HostAPIMethod = \"automation/jobs\"\n45\t\tHostAPIMethodAutomationJobsGet HostAPIMethod = \"automation/jobs/get\"\n46\t\tHostAPIMethodAutomationJobsCreate HostAPIMethod = \"automation/jobs/create\"\n47\t\tHostAPIMethodAutomationJobsUpdate HostAPIMethod = \"automation/jobs/update\"\n48\t\tHostAPIMethodAutomationJobsDelete HostAPIMethod = \"automation/jobs/delete\"\n49\t\tHostAPIMethodAutomationJobsTrigger HostAPIMethod = \"automation/jobs/trigger\"\n50\t\tHostAPIMethodAutomationJobsRuns HostAPIMethod = \"automation/jobs/runs\"\n51\t\tHostAPIMethodAutomationTriggers HostAPIMethod = \"automation/triggers\"\n52\t\tHostAPIMethodAutomationTriggersGet HostAPIMethod = \"automation/triggers/get\"\n53\t\tHostAPIMethodAutomationTriggersCreate HostAPIMethod = \"automation/triggers/create\"\n54\t\tHostAPIMethodAutomationTriggersUpdate HostAPIMethod = \"automation/triggers/update\"\n55\t\tHostAPIMethodAutomationTriggersDelete HostAPIMethod = \"automation/triggers/delete\"\n56\t\tHostAPIMethodAutomationTriggersRuns HostAPIMethod = \"automation/triggers/runs\"\n57\t\tHostAPIMethodAutomationTriggersFire HostAPIMethod = \"automation/triggers/fire\"\n58\t\tHostAPIMethodAutomationRuns HostAPIMethod = \"automation/runs\"\n59\t\tHostAPIMethodTasks HostAPIMethod = \"tasks\"\n60\t\tHostAPIMethodTasksGet HostAPIMethod = \"tasks/get\"\n61\t\tHostAPIMethodTasksTimeline HostAPIMethod = \"tasks/timeline\"\n62\t\tHostAPIMethodTasksTree HostAPIMethod = \"tasks/tree\"\n63\t\tHostAPIMethodTasksDashboard HostAPIMethod = \"tasks/dashboard\"\n64\t\tHostAPIMethodTasksInbox HostAPIMethod = \"tasks/inbox\"\n65\t\tHostAPIMethodTasksCreate HostAPIMethod = \"tasks/create\"\n66\t\tHostAPIMethodTasksUpdate HostAPIMethod = \"tasks/update\"\n67\t\tHostAPIMethodTasksCancel HostAPIMethod = \"tasks/cancel\"\n68\t\tHostAPIMethodTasksRuns HostAPIMethod = \"tasks/runs\"\n69\t\tHostAPIMethodTasksRunsGet HostAPIMethod = \"tasks/runs/get\"\n70\t\tHostAPIMethodTasksRunsEnqueue HostAPIMethod = \"tasks/runs/enqueue\"\n71\t\tHostAPIMethodTasksRunsClaim HostAPIMethod = \"tasks/runs/claim\"\n72\t\tHostAPIMethodTasksRunsStart HostAPIMethod = \"tasks/runs/start\"\n73\t\tHostAPIMethodTasksRunsAttachSession HostAPIMethod = \"tasks/runs/attach_session\"\n74\t\tHostAPIMethodTasksRunsComplete HostAPIMethod = \"tasks/runs/complete\"\n75\t\tHostAPIMethodTasksRunsFail HostAPIMethod = \"tasks/runs/fail\"\n76\t\tHostAPIMethodTasksRunsCancel HostAPIMethod = \"tasks/runs/cancel\"\n77\t\tHostAPIMethodResourcesList HostAPIMethod = \"resources/list\"\n78\t\tHostAPIMethodResourcesGet HostAPIMethod = \"resources/get\"\n79\t\tHostAPIMethodResourcesSnapshot HostAPIMethod = \"resources/snapshot\"\n80\t\tHostAPIMethodBridgesInstancesList HostAPIMethod = \"bridges/instances/list\"\n81\t\tHostAPIMethodBridgesMessagesIngest HostAPIMethod = \"bridges/messages/ingest\"\n82\t\tHostAPIMethodBridgesInstancesGet HostAPIMethod = \"bridges/instances/get\"\n83\t\tHostAPIMethodBridgesInstancesReportState HostAPIMethod = \"bridges/instances/report_state\"\n84\t)\n85\t\n86\t// AllHostAPIMethods returns the canonical Host API method registry in wire order.\n87\tfunc AllHostAPIMethods() []HostAPIMethod {\n88\t\treturn []HostAPIMethod{\n89\t\t\tHostAPIMethodSessionsList,\n90\t\t\tHostAPIMethodSessionsCreate,\n91\t\t\tHostAPIMethodSessionsPrompt,\n92\t\t\tHostAPIMethodSessionsStop,\n93\t\t\tHostAPIMethodSessionsStatus,\n94\t\t\tHostAPIMethodSessionsEvents,\n95\t\t\tHostAPIMethodSandboxList,\n96\t\t\tHostAPIMethodSandboxInfo,\n97\t\t\tHostAPIMethodSandboxExec,\n98\t\t\tHostAPIMethodMemoryRecall,\n99\t\t\tHostAPIMethodMemoryStore,\n100\t\t\tHostAPIMethodMemoryForget,\n101\t\t\tHostAPIMethodObserveHealth,\n102\t\t\tHostAPIMethodObserveEvents,\n103\t\t\tHostAPIMethodSkillsList,\n104\t\t\tHostAPIMethodAutomationJobs,\n105\t\t\tHostAPIMethodAutomationJobsGet,\n106\t\t\tHostAPIMethodAutomationJobsCreate,\n107\t\t\tHostAPIMethodAutomationJobsUpdate,\n108\t\t\tHostAPIMethodAutomationJobsDelete,\n109\t\t\tHostAPIMethodAutomationJobsTrigger,\n110\t\t\tHostAPIMethodAutomationJobsRuns,\n111\t\t\tHostAPIMethodAutomationTriggers,\n112\t\t\tHostAPIMethodAutomationTriggersGet,\n113\t\t\tHostAPIMethodAutomationTriggersCreate,\n114\t\t\tHostAPIMethodAutomationTriggersUpdate,\n115\t\t\tHostAPIMethodAutomationTriggersDelete,\n116\t\t\tHostAPIMethodAutomationTriggersRuns,\n117\t\t\tHostAPIMethodAutomationTriggersFire,\n118\t\t\tHostAPIMethodAutomationRuns,\n119\t\t\tHostAPIMethodTasks,\n120\t\t\tHostAPIMethodTasksGet,\n121\t\t\tHostAPIMethodTasksTimeline,\n122\t\t\tHostAPIMethodTasksTree,\n123\t\t\tHostAPIMethodTasksDashboard,\n124\t\t\tHostAPIMethodTasksInbox,\n125\t\t\tHostAPIMethodTasksCreate,\n126\t\t\tHostAPIMethodTasksUpdate,\n127\t\t\tHostAPIMethodTasksCancel,\n128\t\t\tHostAPIMethodTasksRuns,\n129\t\t\tHostAPIMethodTasksRunsGet,\n130\t\t\tHostAPIMethodTasksRunsEnqueue,\n131\t\t\tHostAPIMethodTasksRunsClaim,\n132\t\t\tHostAPIMethodTasksRunsStart,\n133\t\t\tHostAPIMethodTasksRunsAttachSession,\n134\t\t\tHostAPIMethodTasksRunsComplete,\n135\t\t\tHostAPIMethodTasksRunsFail,\n136\t\t\tHostAPIMethodTasksRunsCancel,\n137\t\t\tHostAPIMethodResourcesList,\n138\t\t\tHostAPIMethodResourcesGet,\n139\t\t\tHostAPIMethodResourcesSnapshot,\n140\t\t\tHostAPIMethodBridgesInstancesList,\n141\t\t\tHostAPIMethodBridgesMessagesIngest,\n142\t\t\tHostAPIMethodBridgesInstancesGet,\n143\t\t\tHostAPIMethodBridgesInstancesReportState,\n144\t\t}\n145\t}\n146\t\n147\tvar capabilityServiceMethods = map[string][]ExtensionServiceMethod{\n148\t\tCapabilityProvideMemoryBackend: {\n149\t\t\tExtensionServiceMethodMemoryStore,\n150\t\t\tExtensionServiceMethodMemoryRecall,\n151\t\t\tExtensionServiceMethodMemoryForget,\n152\t\t},\n153\t\tCapabilityProvideBridgeAdapter: {\n154\t\t\tExtensionServiceMethodBridgesDeliver,\n155\t\t},\n156\t}\n157\t\n158\t// CapabilityServiceMethods returns the negotiated AGH -\u003e extension service methods\n159\t// enabled by the declared provide surfaces.\n160\tfunc CapabilityServiceMethods(provides []string) []string {\n161\t\tif len(provides) == 0 {\n162\t\t\treturn nil\n163\t\t}\n164\t\n165\t\tseen := make(map[string]struct{})\n166\t\tmethods := make([]string, 0)\n167\t\tfor _, provide := range normalizeUniqueStrings(provides) {\n168\t\t\tfor _, method := range capabilityServiceMethods[provide] {\n169\t\t\t\tname := strings.TrimSpace(string(method))\n170\t\t\t\tif name == \"\" {\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:21:03.757457Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:22.441562Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":""}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:22.443337Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"Based on my review of the TechSpec, ADRs, research analyses, less"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:22.882101Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ons learned, and the existing codebase (extension protocol, MCP auth, SDK struct"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:22.907395Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ure), here is my verdict.\n\n```json\n{\n \"blockers\": [\n {\n \"id\": \"B-001\","}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:23.222613Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"\n \"section\": \"Implementation Design / Core Interfaces + Integration"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:23.607196Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Points / Extensions\",\n \"issue\": \"The ExtensionToolInvoker interface is pasted,"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:23.984611Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" but the structs it depends on — ExtensionToolCallRequest and the mat"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:24.365863Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ching response/runtime payload that the TS SDK, Go SDK, and da"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:24.744768Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"emon must all serialize identically — are referenced as named types and never defined as"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:25.174254Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Go code blocks. The wire shape of the new extension service meth"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:25.555615Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ods 'tool.provider' / 'prov"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:25.941416Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ide_tools' / 'tools/call' is also not registered the way existing host_api.go "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:26.317703Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ExtensionServiceMethod constants are (memory/store, memory/recall, b"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:26.699466Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ridges/deliver). Three independent implementations (sdk/typescript, s"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:27.080984Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"dk/go, internal/extension/protocol + internal/extension/manager) pl"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:27.46016Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"us the registry adapter are required to converge on a wire that"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:27.840811Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" the spec does not draw.\",\n \"rationale\": \"L-012 (`tech"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:28.235624Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"spec-prose-only-rework"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:28.616095Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"`) is explicit that contract-laden surfaces require conc"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:29.26041Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"rete Go interface signatures pasted as code blocks; otherwise reviewers converge N"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:29.509285Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" implementations across N rework rounds. ADR-001 and"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:29.77133Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" ADR-008 both depend on bit-identical wire descriptors for manifest/"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:30.146671Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"runtime reconciliation, and the existing protocol package tre"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:30.506656Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ats every AGH→extension method as a typed constant."}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:30.894764Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Leaving the new methods undefined will repeat the aut"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:31.269359Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"onomy-vs-release-adjustments review"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:31.706642Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" trajectory described in L-012.\",\n \"suggested_fix\": \"Paste, in the"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:32.091962Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" TechSpec, full Go struct definitions for ExtensionToolCallRequest, ExtensionToolCallResponse, Ext"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:32.463494Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ensionProvideToolsResponse (or equivalent), MCPToolCallRequ"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:32.841792Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"est, and MCPToolCallResponse, plus the `internal/extension/protocol` additions: `C"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:33.242833Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"apabilityProvideToolProvider = \\\"tool.provider\\\"`, `ExtensionServiceMethodProvideTools = \\\"provide_"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:33.60343Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"tools\\\"`, `ExtensionServiceMethodToolsCall = \\\"tools/call\\\"`, and their entry in `"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:33.988585Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"capabilityServiceMethods`. State explicitly which"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:34.381208Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" fields are wire-stable (digests, ToolID, handler)"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:34.853404Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" vs runtime-only (latency, content blocks).\"\n },\n {\n "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:35.128608Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"\"id\": \"B-002\",\n \"section\": \"Data Models / ExtensionToolRuntimeDescriptor + AD"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:35.509129Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"R-008 Implementation Notes\",\n \"issue\": \"Manifest/runtime reconciliation requ"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:35.893258Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ires `input_schema_digest` and `output_schema_digest` to match"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:36.28368Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" exactly between the manifest, the TS SDK helper output, the"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:37.008604Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Go SDK helper output, and the daemon's manifest validator. The TechSpec never specifies the digest algorithm,"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:37.045834Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" byte canonicalization, or normalization rules (key ordering, escape beh"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:37.428052Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"avior, integer/number representation, $ref hand"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:37.799197Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ling). Without this, identical schemas will hash differently across language toolch"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:38.177052Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ains and produce false `extension_runtime_mismatch` reason codes —"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:38.556666Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" exactly the runtime-vs-test-fixture drift class L"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:38.937215Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"-007 calls out.\",\n \"rationale\": \"ADR-008 makes digests load"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:39.317599Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"-bearing for the executable boundary; CLA"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:39.837625Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"UDE.md's hard-cut rule means the only allowed correction post-MVP is r"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:40.149438Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ip-and-replace, not adding a 'lenient' fallback. A"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:40.470008Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" digest spec that depends on whichever JSON serializer each"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:40.880322Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" SDK happens to use is not a contract. Greenfield posture forb"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:41.412975Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ids 'try strict, then loose' f"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:41.626661Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"allbacks.\",\n \"suggested_fix\": \"In the TechSpec, mandate digest = `sha256(JC"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:41.992726Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"S-canonicalized JSON Schema)` per RFC 8785 (or an"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:42.383487Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" equivalent fully-specified canonicalization), hex-encoded l"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:42.745562Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"owercase. Specify which subtree is hashed ("}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:43.15499Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"the schema only, not surrounding manifest metadata). Require a"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:43.528687Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" shared conformance fixture under `sdk/typescript/test"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:43.9111Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"-fixtures/digest/` and `sdk/go/.../digest/` cons"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:44.298652Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"umed by both SDK tests and `internal/extension/manifest_"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:44.65208Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"test.go` so all three implementations regress against the same by"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:45.034816Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"te vectors.\"\n },\n {\n \"id\": \"B-003\",\n \"section\": \"Implementation Design / Core"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:45.417861Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Interfaces + ADR-010 Implementation Notes\",\n \"issue\": \"The spec correctly forbids `internal/tools` from hol"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:45.795365Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ding remote-MCP token material and exposes only the red"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:46.214406Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"acted `MCPAuthStatusProvider.Status`. But remote-MCP `tools/call` over"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:46.643186Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" HTTP/SSE requires injecting a live `Authorization: Bearer …"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:47.00073Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"` header per call. No interface signature is pasted that lets"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:47.378176Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" the registry trigger a remote call without the registry "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:47.746785Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ever touching the token: ADR-010 only mentions `narrow intern"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:48.202773Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"al/mcp/auth execution interfaces for bearer material` in prose. MCPTo"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:48.513015Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"olInvoker.CallTool takes only `(ctx, source, req)` and returns "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:48.889136Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ToolResult; where the bearer is acquired and b"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:49.270423Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ound to the request is unspecified.\",\n \"rationale\": \"Security inv"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:49.651487Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ariant 20 makes `internal/mcp/auth` the sole cred"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:50.043209Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ential owner; invariant 12 forbids any raw token in t"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:50.413914Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ool inputs/outputs. Without a code-"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:50.793359Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"block contract for the call-through executor, implementers will end up either"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:51.175118Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" (a) plumbing a TokenStore reference into the regist"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:51.564086Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ry adapter (token ownership leak) or (b) creating a par"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:51.953475Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"allel auth helper that re-fetches tokens and drifts from"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:52.364545Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" `internal/mcp/auth.Service`. Both outcomes violate L"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:52.745654Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"-005 (authoritative primitive exclusiv"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:53.139086Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ity) and the ADR-010 'do not duplicate MCP auth' rule.\",\n \"suggested_fix\": \"Paste an"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:53.511501Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" `MCPCallExecutor` interface owned by `internal/mcp` (consu"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:53.896195Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ming `internal/mcp/auth` directly), e.g. `type MCPCallExecutor interface { Call(ctx context"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:54.606274Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":".Context, source SourceRef, req MCPToolCallRequest) (ToolResult, error) }`, document that it intern"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:54.661672Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ally resolves token material from `mcpauth.TokenStore` without"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:55.03445Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" exposing TokenRecord, and prohibit `internal/tools` from importing `internal/mcp"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:55.420786Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"/auth` at all. Add a redaction test that mounts a fake rem"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:55.825322Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ote MCP and asserts that no `Authorization` header value cr"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:56.160674Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"osses any error/log/event boundary owned by `internal/tools`.\""}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:56.556802Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"\n },\n {\n \"id\": \"B-004\",\n \"section\": \"Hosted MCP authentication\",\n \"issue\": \"The hosted_"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:56.939341Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"mcp_bind_token is required to be redacted 'like"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:57.318587Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" claim_token' but the spec passes the raw token through ACP `mcpServers[].env`, which is part"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:57.815205Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" of the `session/new` / `session/load` payload sent to an external agent runtime that"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:58.081162Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" AGH does not control. CLAUDE.md says 'claim_token (raw) never crosses transport, channel, log, or memory.'"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:58.462976Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Treating the bind token as claim_token-equivalent and then hand"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:58.842143Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ing it to a third-party process via the ACP wire is internally contradictory. Red"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:59.238498Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"action tests cover 'ACP payload diagnostics' as observed by AGH, but cannot cover the spaw"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:23:59.633267Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ned ACP runtime's own logging/env-dump behavior.\",\n \"rationale\": \"Stand"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:00.161486Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ing directive on truthful UI /"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:00.366195Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" claim_token redaction, plus L-005 (one authority"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:00.7473Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" per primitive), forbids design choices where a security"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:01.128874Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"-equivalent secret silently leaves the daemon's redaction per"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:01.513669Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"imeter. Either the bind token is genuinely claim_token-equivalent (then it must not"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:01.889483Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" cross the ACP wire) or it is a different class of secret with its own threat model —"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:02.275645Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" but the spec must choose one.\",\n \"suggested_fix\": \"Either (a) replace env-inj"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:02.786802Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ection with a UDS challenge: spawn the proxy with only `--"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:03.035158Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"session \u003cid\u003e` and a short-lived `bind_nonce`, then complete a"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:03.415853Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" UDS handshake where the daemon mints the session-b"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:03.794315Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ound projection bound to the proxy's pid+uds-pe"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:04.174613Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"er-cred without ever shipping a reusable token through"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:04.566638Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" ACP env; or (b) document an explicit threat-model section"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:04.935866Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" stating 'bind_token traverses ACP env to external run"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:05.414548Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"times; mitigations are single-use, ≤Ns T"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:05.796782Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"TL, session-bound, hash-only persistence' and rename"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:06.17543Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" the redaction equivalence: 'redacted in"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:06.571118Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" AGH-owned surfaces; cannot be redacted in third-party ag"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:07.03193Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ent logs.' Pick one and update Security Inv"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:07.413426Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ariant 16 to match exactly.\"\n },\n {\n \"id\": \"B-005\",\n \"section\": \"H"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:07.857995Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"osted MCP approval bridge\",\n \"issue\": \"The bridge says `Registry.Call` "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:08.32494Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"'blocks the MCP `tools/call` response until approved, denied, or timed out' but no timeout,"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:08.603918Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" no config key, no default, and no cancellation semantics are defined. AC"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:08.985532Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"P `session/request_permission` is unbounded; an unresponsive operator"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:09.368548Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" or a half-closed ACP stream will hold a goroutine and"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:09.746915Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" an MCP RPC indefinitely. The spec also gives no beh"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:10.130619Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"avior when the hosted MCP stdio closes mid-appro"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:10.521556Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"val.\",\n \"rationale\": \"Detached lifetime + W"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:10.888609Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"aitGroup discipline (internal/CLAUDE.md Conc"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:11.271804Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"urrency) plus 'External-call timeouts' security"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:11.651675Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" invariant require every blocking external dependency to have an explicit de"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:12.03146Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"adline. An approval that blocks an in-flight tool call is a long"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:12.413581Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"-running external dependency, and the spec leaves it unbounded.\",\n \"suggested_"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:12.874941Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"fix\": \"Add `[tools.policy].approval_timeout_seconds` (default e.g. 120) to the Config L"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:13.221872Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ifecycle section; define reason codes `approval_timed_out` and `approval_canceled`"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:13.605722Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"; specify that `Registry.Call` uses a derived context with the"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:13.988823Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" approval deadline, and that closing the hosted MCP stdio cancels the in-flight appro"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:14.415864Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"val and returns `ErrToolApprovalRequired` with `"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:14.799169Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"approval_canceled`. Add an integration test for both"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:15.182753Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" timeout and proxy-disconnect-mid-approval paths.\"\n }"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:15.594581Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"\n ],\n \"nits\": [\n {\n \"id\": \"N-001\",\n \"section\": \"Implementation Design / Data Models\",\n \"issue\": \"Appro"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:15.938863Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"val timeout and bind-token TTL are referenced but never given concrete defaults or config keys."}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:16.321978Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"\",\n \"suggested_fix\": \"Add `[tools.policy].approval_timeout_seconds` and `[tools.hosted_mcp].bind_token"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:16.789551Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"_ttl_seconds` with defaults; restate in Safety Invariants "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:17.19956Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"16 and 17.\"\n },\n {\n \"id\": \"N-002\",\n \"section\": \"ADR-009 /"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:17.517972Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Implementation Steps step 10\",\n \"issue\": \"The public Go SDK module path is h"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:17.945614Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"edged as `sdk/go` 'or an equivalent non-internal module path', leaving room for div"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:18.377012Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ergence between the techspec and the create-extension scaffolder.\",\n \"suggested_fix\": \"Comm"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:18.655212Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"it to `sdk/go` (mirroring `sdk/typescript`) and remove the"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:19.034364Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" hedge.\"\n },\n {\n \"id\": \"N-003\",\n \"section\": \"Test Strategy / Integration Tests\",\n \"issue\": \"Per"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:19.413836Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" L-007, runtime-contract changes must co-ship with E"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:19.795613Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"2E mock/fixture updates, but the test list"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:20.197461Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" does not call out acpmock fixture updates for new tool-call paths re"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:20.657198Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"aching ACP through hosted MCP.\",\n \"suggested_fix\": \"Add an explicit line:"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:20.985093Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" 'acpmock fixtures and Playwright selectors for hosted MCP tool-"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:21.369755Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"call lifecycle ship in the same PR; match"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:21.862558Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ers use structured tool_id met"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:22.167563Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"adata, never rendered prompt subst"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:22.535925Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"rings.'\"\n },\n {\n \"id\": \"N-004\",\n \"section\": \"Test Strategy\",\n \"issue\": \"The "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:22.920664Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"80% per-package coverage floor and `-race`"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:23.295139Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"/`CGO_ENABLED=1` discipline (agh-test-conventions) are"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:23.655806Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" not restated; verifier agents will not en"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:24.03398Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"force them from prose.\",\n \"suggested_fix\": \"Add a single line under"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:24.464552Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Test Strategy: 'Per-package coverage ≥80%, race-s"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:24.871503Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ensitive packages run under `-race` in CI Linux-"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:25.445418Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"Race lane.'\"\n },\n {\n \"id\": \"N-005\",\n \"section\": \"ADR-007"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:25.821793Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" / Data Models / ToolID\",\n \"issue\": \"S"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:26.266632Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"anitized external MCP/extension names that exceed 64 chars after"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:26.580512Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" sanitization are not given a deterministic disposition (truncate"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:26.890753Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":", hash-suffix, mark conflicted"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:27.27138Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":").\",\n \"suggested_fix\": \"Specify: external sanitized names exceeding 64 chars are marked `"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:27.665104Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"conflicted` with reason `id_too_long`; no truncation, no hash-suffix sh"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:28.03524Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"adow id.\"\n },\n {\n \"id\": \"N-006\",\n \"section\": \"Integ"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:28.427002Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ration Points / Hooks\",\n \"issue\": \"Step 15 says"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:28.797247Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" 'no dual identity mid-PR',"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:29.22447Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" but the Delete Targets section does not enumerate the existing hook"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:29.57429Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" payload fields `tool_name` and `tool_namespace`"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:29.936995Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" as removed.\",\n \"suggested_fix\": \"Add `tool_name` and `tool_namespace` ("}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:30.322124Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"in `internal/hooks/payloads.go` ToolP"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:30.761466Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"reCallPayload / ToolPostCallPayload / ToolPostErrorPayload) to"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:31.555883Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Delete Targets, with the same-PR rename rule made"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:31.574146Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" explicit.\"\n },\n {\n \"id\": \"N-007\",\n \"section\": \"Config Lifecycle / approve-reads\",\n \"issue\": \"`"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:31.852973Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"approve-reads` trusts the descriptor's `read_only` flag, but extension/MCP desc"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:32.305306Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"riptors are externally authored. `external_default=disabled` m"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:32.614772Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"itigates this only until an operator flips a grant;"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:33.016278Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" the trust delegation could be tighter.\",\n \"suggested_fix\": \"State"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:33.368501Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" that `approve-reads` does not auto-approve `extension`/`mcp`"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:33.748045Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"-source tools unless the source is in an"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:34.238278Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" explicit `trusted_sources` allowlist, regardless of declared `read_only`.\""}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:34.601537Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"\n },\n {\n \"id\": \"N-008\",\n \"section\": \"Implementation Design / Data Models\",\n \"issue\": \"`agh__"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:34.941241Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"task_child_create` exists in MVP sc"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:35.406192Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ope, but the spec does not state where lineage subset"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:35.916204Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" checks happen (registry policy vs `task.Service"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:36.165818Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":".CreateChildTask`).\",\n \"suggested_fix\": \"Pin lineage subset en"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:36.462943Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"forcement to `task.Service.CreateChildTask` and add an integration test that proves `agh__task_child_create` c"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:36.844781Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"annot widen child permissions beyond parent.\"\n }\n ],\n \"readiness\":"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:37.223807Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" \"NEEDS_REWORK\",\n \"summary\": \"The architectural dire"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:37.606108Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ction is sound and all the round-1 nits were addressed, but five contract"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:38.008885Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"-level gaps remain that will cause review rework across SDK, daemon, and adapter impl"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:38.373863Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ementations: undefined extension wire payload structs"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:38.887782Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" (`tools/call` / `provide_tools`), undefined schema-digest canonicalization,"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:39.148684Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" undefined remote-MCP bearer-injection interface, an internal cont"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:39.510758Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"radiction in how the hosted MCP bind token traverses the ACP wire while being"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:39.892431Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" declared claim_token-equivalent, and an unbounded approval-bridge w"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:40.402579Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ait. Resolve B-001..B-005 with code"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:40.65088Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"-block-level edits and the TechSpec is ready for tasks;"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:41.066424Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" until then it would force exactly the multi-round"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:41.338376Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" trajectory L-012 warns against.\"\n}\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:41.448229Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:41.54005Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:41.555711Z","update":{"usage":{},"status":"completed"},"usage":{}} +{"type":"run.succeeded","run_id":"exec-20260428-191933-946763000","turn":1,"time":"2026-04-28T19:24:41.566741Z","status":"succeeded","usage":{},"output":"Based on my review of the TechSpec, ADRs, research analyses, lessons learned, and the existing codebase (extension protocol, MCP auth, SDK structure), here is my verdict.\n\n```json\n{\n \"blockers\": [\n {\n \"id\": \"B-001\",\n \"section\": \"Implementation Design / Core Interfaces + Integration Points / Extensions\",\n \"issue\": \"The ExtensionToolInvoker interface is pasted, but the structs it depends on — ExtensionToolCallRequest and the matching response/runtime payload that the TS SDK, Go SDK, and daemon must all serialize identically — are referenced as named types and never defined as Go code blocks. The wire shape of the new extension service methods 'tool.provider' / 'provide_tools' / 'tools/call' is also not registered the way existing host_api.go ExtensionServiceMethod constants are (memory/store, memory/recall, bridges/deliver). Three independent implementations (sdk/typescript, sdk/go, internal/extension/protocol + internal/extension/manager) plus the registry adapter are required to converge on a wire that the spec does not draw.\",\n \"rationale\": \"L-012 (`techspec-prose-only-rework`) is explicit that contract-laden surfaces require concrete Go interface signatures pasted as code blocks; otherwise reviewers converge N implementations across N rework rounds. ADR-001 and ADR-008 both depend on bit-identical wire descriptors for manifest/runtime reconciliation, and the existing protocol package treats every AGH→extension method as a typed constant. Leaving the new methods undefined will repeat the autonomy-vs-release-adjustments review trajectory described in L-012.\",\n \"suggested_fix\": \"Paste, in the TechSpec, full Go struct definitions for ExtensionToolCallRequest, ExtensionToolCallResponse, ExtensionProvideToolsResponse (or equivalent), MCPToolCallRequest, and MCPToolCallResponse, plus the `internal/extension/protocol` additions: `CapabilityProvideToolProvider = \\\"tool.provider\\\"`, `ExtensionServiceMethodProvideTools = \\\"provide_tools\\\"`, `ExtensionServiceMethodToolsCall = \\\"tools/call\\\"`, and their entry in `capabilityServiceMethods`. State explicitly which fields are wire-stable (digests, ToolID, handler) vs runtime-only (latency, content blocks).\"\n },\n {\n \"id\": \"B-002\",\n \"section\": \"Data Models / ExtensionToolRuntimeDescriptor + ADR-008 Implementation Notes\",\n \"issue\": \"Manifest/runtime reconciliation requires `input_schema_digest` and `output_schema_digest` to match exactly between the manifest, the TS SDK helper output, the Go SDK helper output, and the daemon's manifest validator. The TechSpec never specifies the digest algorithm, byte canonicalization, or normalization rules (key ordering, escape behavior, integer/number representation, $ref handling). Without this, identical schemas will hash differently across language toolchains and produce false `extension_runtime_mismatch` reason codes — exactly the runtime-vs-test-fixture drift class L-007 calls out.\",\n \"rationale\": \"ADR-008 makes digests load-bearing for the executable boundary; CLAUDE.md's hard-cut rule means the only allowed correction post-MVP is rip-and-replace, not adding a 'lenient' fallback. A digest spec that depends on whichever JSON serializer each SDK happens to use is not a contract. Greenfield posture forbids 'try strict, then loose' fallbacks.\",\n \"suggested_fix\": \"In the TechSpec, mandate digest = `sha256(JCS-canonicalized JSON Schema)` per RFC 8785 (or an equivalent fully-specified canonicalization), hex-encoded lowercase. Specify which subtree is hashed (the schema only, not surrounding manifest metadata). Require a shared conformance fixture under `sdk/typescript/test-fixtures/digest/` and `sdk/go/.../digest/` consumed by both SDK tests and `internal/extension/manifest_test.go` so all three implementations regress against the same byte vectors.\"\n },\n {\n \"id\": \"B-003\",\n \"section\": \"Implementation Design / Core Interfaces + ADR-010 Implementation Notes\",\n \"issue\": \"The spec correctly forbids `internal/tools` from holding remote-MCP token material and exposes only the redacted `MCPAuthStatusProvider.Status`. But remote-MCP `tools/call` over HTTP/SSE requires injecting a live `Authorization: Bearer …` header per call. No interface signature is pasted that lets the registry trigger a remote call without the registry ever touching the token: ADR-010 only mentions `narrow internal/mcp/auth execution interfaces for bearer material` in prose. MCPTolInvoker.CallTool takes only `(ctx, source, req)` and returns ToolResult; where the bearer is acquired and bound to the request is unspecified.\",\n \"rationale\": \"Security invariant 20 makes `internal/mcp/auth` the sole credential owner; invariant 12 forbids any raw token in tool inputs/outputs. Without a code-block contract for the call-through executor, implementers will end up either (a) plumbing a TokenStore reference into the registry adapter (token ownership leak) or (b) creating a parallel auth helper that re-fetches tokens and drifts from `internal/mcp/auth.Service`. Both outcomes violate L-005 (authoritative primitive exclusivity) and the ADR-010 'do not duplicate MCP auth' rule.\",\n \"suggested_fix\": \"Paste an `MCPCallExecutor` interface owned by `internal/mcp` (consuming `internal/mcp/auth` directly), e.g. `type MCPCallExecutor interface { Call(ctx context.Context, source SourceRef, req MCPToolCallRequest) (ToolResult, error) }`, document that it internally resolves token material from `mcpauth.TokenStore` without exposing TokenRecord, and prohibit `internal/tools` from importing `internal/mcp/auth` at all. Add a redaction test that mounts a fake remote MCP and asserts that no `Authorization` header value crosses any error/log/event boundary owned by `internal/tools`.\"\n },\n {\n \"id\": \"B-004\",\n \"section\": \"Hosted MCP authentication\",\n \"issue\": \"The hosted_mcp_bind_token is required to be redacted 'like claim_token' but the spec passes the raw token through ACP `mcpServers[].env`, which is part of the `session/new` / `session/load` payload sent to an external agent runtime that AGH does not control. CLAUDE.md says 'claim_token (raw) never crosses transport, channel, log, or memory.' Treating the bind token as claim_token-equivalent and then handing it to a third-party process via the ACP wire is internally contradictory. Redaction tests cover 'ACP payload diagnostics' as observed by AGH, but cannot cover the spawned ACP runtime's own logging/env-dump behavior.\",\n \"rationale\": \"Standing directive on truthful UI / claim_token redaction, plus L-005 (one authority per primitive), forbids design choices where a security-equivalent secret silently leaves the daemon's redaction perimeter. Either the bind token is genuinely claim_token-equivalent (then it must not cross the ACP wire) or it is a different class of secret with its own threat model — but the spec must choose one.\",\n \"suggested_fix\": \"Either (a) replace env-injection with a UDS challenge: spawn the proxy with only `--session \u003cid\u003e` and a short-lived `bind_nonce`, then complete a UDS handshake where the daemon mints the session-bound projection bound to the proxy's pid+uds-per-cred without ever shipping a reusable token through ACP env; or (b) document an explicit threat-model section stating 'bind_token traverses ACP env to external runtimes; mitigations are single-use, ≤Ns TL, session-bound, hash-only persistence' and rename the redaction equivalence: 'redacted in AGH-owned surfaces; cannot be redacted in third-party agent logs.' Pick one and update Security Invariant 16 to match exactly.\"\n },\n {\n \"id\": \"B-005\",\n \"section\": \"Hosted MCP approval bridge\",\n \"issue\": \"The bridge says `Registry.Call` 'blocks the MCP `tools/call` response until approved, denied, or timed out' but no timeout, no config key, no default, and no cancellation semantics are defined. ACP `session/request_permission` is unbounded; an unresponsive operator or a half-closed ACP stream will hold a goroutine and an MCP RPC indefinitely. The spec also gives no behavior when the hosted MCP stdio closes mid-approval.\",\n \"rationale\": \"Detached lifetime + WaitGroup discipline (internal/CLAUDE.md Concurrency) plus 'External-call timeouts' security invariant require every blocking external dependency to have an explicit deadline. An approval that blocks an in-flight tool call is a long-running external dependency, and the spec leaves it unbounded.\",\n \"suggested_fix\": \"Add `[tools.policy].approval_timeout_seconds` (default e.g. 120) to the Config Lifecycle section; define reason codes `approval_timed_out` and `approval_canceled`; specify that `Registry.Call` uses a derived context with the approval deadline, and that closing the hosted MCP stdio cancels the in-flight approval and returns `ErrToolApprovalRequired` with `approval_canceled`. Add an integration test for both timeout and proxy-disconnect-mid-approval paths.\"\n }\n ],\n \"nits\": [\n {\n \"id\": \"N-001\",\n \"section\": \"Implementation Design / Data Models\",\n \"issue\": \"Approval timeout and bind-token TTL are referenced but never given concrete defaults or config keys.\",\n \"suggested_fix\": \"Add `[tools.policy].approval_timeout_seconds` and `[tools.hosted_mcp].bind_token_ttl_seconds` with defaults; restate in Safety Invariants 16 and 17.\"\n },\n {\n \"id\": \"N-002\",\n \"section\": \"ADR-009 / Implementation Steps step 10\",\n \"issue\": \"The public Go SDK module path is hedged as `sdk/go` 'or an equivalent non-internal module path', leaving room for divergence between the techspec and the create-extension scaffolder.\",\n \"suggested_fix\": \"Commit to `sdk/go` (mirroring `sdk/typescript`) and remove the hedge.\"\n },\n {\n \"id\": \"N-003\",\n \"section\": \"Test Strategy / Integration Tests\",\n \"issue\": \"Per L-007, runtime-contract changes must co-ship with E2E mock/fixture updates, but the test list does not call out acpmock fixture updates for new tool-call paths reaching ACP through hosted MCP.\",\n \"suggested_fix\": \"Add an explicit line: 'acpmock fixtures and Playwright selectors for hosted MCP tool-call lifecycle ship in the same PR; matchers use structured tool_id metadata, never rendered prompt substrings.'\"\n },\n {\n \"id\": \"N-004\",\n \"section\": \"Test Strategy\",\n \"issue\": \"The 80% per-package coverage floor and `-race`/`CGO_ENABLED=1` discipline (agh-test-conventions) are not restated; verifier agents will not enforce them from prose.\",\n \"suggested_fix\": \"Add a single line under Test Strategy: 'Per-package coverage ≥80%, race-sensitive packages run under `-race` in CI Linux-Race lane.'\"\n },\n {\n \"id\": \"N-005\",\n \"section\": \"ADR-007 / Data Models / ToolID\",\n \"issue\": \"Sanitized external MCP/extension names that exceed 64 chars after sanitization are not given a deterministic disposition (truncate, hash-suffix, mark conflicted).\",\n \"suggested_fix\": \"Specify: external sanitized names exceeding 64 chars are marked `conflicted` with reason `id_too_long`; no truncation, no hash-suffix shadow id.\"\n },\n {\n \"id\": \"N-006\",\n \"section\": \"Integration Points / Hooks\",\n \"issue\": \"Step 15 says 'no dual identity mid-PR', but the Delete Targets section does not enumerate the existing hook payload fields `tool_name` and `tool_namespace` as removed.\",\n \"suggested_fix\": \"Add `tool_name` and `tool_namespace` (in `internal/hooks/payloads.go` ToolPreCallPayload / ToolPostCallPayload / ToolPostErrorPayload) to Delete Targets, with the same-PR rename rule made explicit.\"\n },\n {\n \"id\": \"N-007\",\n \"section\": \"Config Lifecycle / approve-reads\",\n \"issue\": \"`approve-reads` trusts the descriptor's `read_only` flag, but extension/MCP descriptors are externally authored. `external_default=disabled` mitigates this only until an operator flips a grant; the trust delegation could be tighter.\",\n \"suggested_fix\": \"State that `approve-reads` does not auto-approve `extension`/`mcp`-source tools unless the source is in an explicit `trusted_sources` allowlist, regardless of declared `read_only`.\"\n },\n {\n \"id\": \"N-008\",\n \"section\": \"Implementation Design / Data Models\",\n \"issue\": \"`agh__task_child_create` exists in MVP scope, but the spec does not state where lineage subset checks happen (registry policy vs `task.Service.CreateChildTask`).\",\n \"suggested_fix\": \"Pin lineage subset enforcement to `task.Service.CreateChildTask` and add an integration test that proves `agh__task_child_create` cannot widen child permissions beyond parent.\"\n }\n ],\n \"readiness\": \"NEEDS_REWORK\",\n \"summary\": \"The architectural direction is sound and all the round-1 nits were addressed, but five contract-level gaps remain that will cause review rework across SDK, daemon, and adapter implementations: undefined extension wire payload structs (`tools/call` / `provide_tools`), undefined schema-digest canonicalization, undefined remote-MCP bearer-injection interface, an internal contradiction in how the hosted MCP bind token traverses the ACP wire while being declared claim_token-equivalent, and an unbounded approval-bridge wait. Resolve B-001..B-005 with code-block-level edits and the TechSpec is ready for tasks; until then it would force exactly the multi-round trajectory L-012 warns against.\"\n}\n```"} diff --git a/.compozy/tasks/tools-registry/qa/peer-review-result.err b/.compozy/tasks/tools-registry/qa/peer-review-result.err new file mode 100644 index 000000000..e69de29bb diff --git a/.compozy/tasks/tools-registry/qa/peer-review-result.json b/.compozy/tasks/tools-registry/qa/peer-review-result.json new file mode 100644 index 000000000..c860d48b4 --- /dev/null +++ b/.compozy/tasks/tools-registry/qa/peer-review-result.json @@ -0,0 +1,349 @@ +{"type":"run.started","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:03.339981Z","status":"running","usage":{}} +{"type":"session.attached","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:07.044398Z","session":{"acp_session_id":"2bd7cffc-ea35-455f-9ed3-56aa7175182c"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:07.044894Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:09.526129Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:09.906149Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01MYbx76htbiZLVwj82wSToM","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01MYbx76htbiZLVwj82wSToM","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:10.364382Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01MYbx76htbiZLVwj82wSToM","blocks":[{"type":"tool_use","id":"toolu_01MYbx76htbiZLVwj82wSToM","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/_techspec.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:10.364465Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01DQiwbuHGGA6n1x7tNUyfcy","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01DQiwbuHGGA6n1x7tNUyfcy","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:11.470373Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01DQiwbuHGGA6n1x7tNUyfcy","blocks":[{"type":"tool_use","id":"toolu_01DQiwbuHGGA6n1x7tNUyfcy","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-001-extension-tool-execution-boundary.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-001-extension-tool-execution-boundary.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-001-extension-tool-execution-boundary.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:11.470459Z","update":{"kind":"tool_call_started","toolCallId":"toolu_019TVYwCE55DwJXF6fvekw4V","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_019TVYwCE55DwJXF6fvekw4V","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:12.307578Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_019TVYwCE55DwJXF6fvekw4V","blocks":[{"type":"tool_use","id":"toolu_019TVYwCE55DwJXF6fvekw4V","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-002-session-tool-exposure-path.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-002-session-tool-exposure-path.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-002-session-tool-exposure-path.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:12.307722Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01GFv38C8Qzo8AYwWHENVNWT","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01GFv38C8Qzo8AYwWHENVNWT","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:13.020543Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01GFv38C8Qzo8AYwWHENVNWT","blocks":[{"type":"tool_use","id":"toolu_01GFv38C8Qzo8AYwWHENVNWT","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-003-runtime-registry-package-boundary.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-003-runtime-registry-package-boundary.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-003-runtime-registry-package-boundary.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:13.020638Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01Fiio4nNovFW89qQbmZLF33","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01Fiio4nNovFW89qQbmZLF33","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:13.836046Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Fiio4nNovFW89qQbmZLF33","blocks":[{"type":"tool_use","id":"toolu_01Fiio4nNovFW89qQbmZLF33","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-004-mvp-native-tool-scope.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-004-mvp-native-tool-scope.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-004-mvp-native-tool-scope.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:13.846201Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01VfNT1f6f5XexCRcqC6nVtW","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01VfNT1f6f5XexCRcqC6nVtW","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:14.479312Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01VfNT1f6f5XexCRcqC6nVtW","blocks":[{"type":"tool_use","id":"toolu_01VfNT1f6f5XexCRcqC6nVtW","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-005-acp-approval-policy-integration.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-005-acp-approval-policy-integration.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-005-acp-approval-policy-integration.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:14.479504Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01LacT3QoMDarPjBNRRZsvbN","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01LacT3QoMDarPjBNRRZsvbN","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:14.856384Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01LacT3QoMDarPjBNRRZsvbN","blocks":[{"type":"tool_use","id":"toolu_01LacT3QoMDarPjBNRRZsvbN","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-006-tool-visibility-by-surface.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-006-tool-visibility-by-surface.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-006-tool-visibility-by-surface.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:14.856573Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01HDS7AAyjB2Ep1c6oFaffEj","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01HDS7AAyjB2Ep1c6oFaffEj","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.436322Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01HDS7AAyjB2Ep1c6oFaffEj","blocks":[{"type":"tool_use","id":"toolu_01HDS7AAyjB2Ep1c6oFaffEj","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-007-canonical-tool-id-format.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-007-canonical-tool-id-format.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/adrs/adr-007-canonical-tool-id-format.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.454805Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.557147Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Fiio4nNovFW89qQbmZLF33","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.55735Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01DQiwbuHGGA6n1x7tNUyfcy","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.557459Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01GFv38C8Qzo8AYwWHENVNWT","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.557795Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01HDS7AAyjB2Ep1c6oFaffEj","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.558071Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01LacT3QoMDarPjBNRRZsvbN","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.558285Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_019TVYwCE55DwJXF6fvekw4V","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.558502Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01VfNT1f6f5XexCRcqC6nVtW","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.561751Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01Fiio4nNovFW89qQbmZLF33","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01Fiio4nNovFW89qQbmZLF33","content":"```\n1\t# ADR-004: MVP Native Tool Scope\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Context\n8\t\n9\tThe Tool Registry MVP must prove more than metadata listing. It must demonstrate that AGH-owned tools can be discovered, authorized, invoked through the hosted MCP surface, called through CLI/HTTP/UDS contracts, routed through one dispatch pipeline, and observed.\n10\t\n11\tThe smallest read-only bootstrap toolset would prove catalog mechanics, but it would not prove meaningful autonomy. AGH's product direction requires agents to manage coordination and task flows, so network and task tools should be represented in the first implementation slice.\n12\t\n13\tAt the same time, skill installation is a separate supply-chain surface. It requires stricter install policy, trust tiers, approval flows, and scanner decisions. It should not be bundled into the first registry execution proof unless the supply-chain work is explicitly scoped.\n14\t\n15\t## Decision\n16\t\n17\tThe MVP native tool scope includes the bootstrap catalog/skill tools plus network and task tools:\n18\t\n19\t- `agh__tool_list`\n20\t- `agh__tool_search`\n21\t- `agh__tool_info`\n22\t- `agh__skill_list`\n23\t- `agh__skill_search`\n24\t- `agh__skill_view`\n25\t- `agh__network_peers`\n26\t- `agh__network_send`\n27\t- `agh__task_*`\n28\t\n29\t`agh__task_*` will be specified in the TechSpec as a bounded set of first-class task operations, not as an unreviewed wildcard implementation. Candidate operations include list/get/create/update/claim/release/complete where they map to existing or planned AGH task primitives.\n30\t\n31\tSkill install/remove/update tools are not included in the MVP native tool scope unless a later decision explicitly adds the required supply-chain and approval work.\n32\t\n33\t## Consequences\n34\t\n35\tThe MVP must include both read-only and mutating tools. The registry must model risk, read-only/destructive/open-world flags, permission checks, and policy gates from the first implementation.\n36\t\n37\tNetwork and task tools must use the same registry dispatch path as catalog tools. They must not call around policy, availability, hooks, result budgeting, or telemetry.\n38\t\n39\tThe hosted MCP server must expose only the tool subset authorized for the session. Agent-visible discovery must not advertise network/task tools to sessions that lack the required permission atoms.\n40\t\n41\tQA must include real scenario coverage for:\n42\t\n43\t- listing and searching tools,\n44\t- viewing a skill body through `agh__skill_view`,\n45\t- listing peers,\n46\t- sending a network message through `agh__network_send` with permission enforcement,\n47\t- creating/updating or otherwise exercising the bounded `agh__task_*` set,\n48\t- proving unauthorized sessions cannot see or call mutating tools.\n49\t\n50\t## Rejected Alternatives\n51\t\n52\t### Read-only bootstrap only\n53\t\n54\tThis would be safer and simpler, but it would leave the registry unproven for AGH's coordination and autonomy use cases.\n55\t\n56\t### Bootstrap plus skill install\n57\t\n58\tThis would improve agent self-service, but it introduces supply-chain risk that belongs in a dedicated policy/scanning/install decision.\n59\t\n60\t### Foundation only\n61\t\n62\tThis would create the architecture without proving the agent-first experience that motivated the Tool Registry work.\n63\t\n64\t## Evidence\n65\t\n66\t- `.compozy/tasks/tools-registry/analysis/synthesis.md`: recommends a small bootstrap set and identifies network/task tools as later groups.\n67\t- `.compozy/tasks/autonomous/analysis/analysis_skills_tools_registry.md`: prior gaps identify network and task tools as strategically important agent-callable surfaces.\n68\t- `.compozy/tasks/hermes/analysis/analysis_tools_security.md`: mutating and open-world tools require stronger permission and security gates before broad exposure.\n69\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.561984Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01DQiwbuHGGA6n1x7tNUyfcy","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01DQiwbuHGGA6n1x7tNUyfcy","content":"```\n1\t# ADR-001: Extension Tool Execution Boundary\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Context\n8\t\n9\tAGH's Tool Registry must support native/bundled tools and tools contributed by extensions. Competitor research shows three broad patterns:\n10\t\n11\t- Hermes allows in-process plugin/tool registration, which is flexible but broadens daemon compromise risk.\n12\t- Claude Code routes plugin-contributed tools primarily through MCP, keeping third-party tools behind a protocol boundary.\n13\t- OpenClaw uses manifest-first static discovery and runtime materialization through plugin/MCP adapters.\n14\t- AGH already lets extension manifests publish static `resources.tools`, but those records are currently metadata-only.\n15\t\n16\tThe registry foundation must preserve AGH's extensibility goal without letting arbitrary extension code run inside the daemon process as a first implementation step.\n17\t\n18\t## Decision\n19\t\n20\tExtension-contributed tools will use a manifest-first, out-of-process execution boundary for the MVP.\n21\t\n22\tExtensions may declare tool descriptors in their manifest so AGH can discover, index, authorize, and show them without executing extension code. A declared extension tool becomes executable only when it is backed by an approved out-of-process adapter:\n23\t\n24\t- MCP server adapter,\n25\t- extension sidecar / Host API adapter,\n26\t- subprocess adapter managed by AGH runtime supervision,\n27\t- future bridge SDK adapter with equivalent process/protocol isolation.\n28\t\n29\tAGH built-in tools may register in-process because they are compiled into the daemon binary and reviewed as daemon code.\n30\t\n31\tThird-party extension handlers will not run in-process in the daemon for the MVP.\n32\t\n33\t## Consequences\n34\t\n35\tThe registry must separate descriptor records from executable handles. A tool can be installed and discoverable while still being unavailable or non-executable until its backend is healthy and authorized.\n36\t\n37\tExtension manifests need backend metadata or a follow-up mechanism that binds static tool declarations to sidecar/MCP/subprocess handles.\n38\t\n39\tDispatch must recheck availability, authorization, and backend health at call time. Discovery filtering is not a security boundary.\n40\t\n41\tThe MVP can support extension tool discovery before full extension tool invocation. This keeps the foundation extensible while avoiding a daemon plugin ABI or in-process third-party execution model.\n42\t\n43\t## Rejected Alternatives\n44\t\n45\t### Metadata-only extension tools\n46\t\n47\tKeeping extension tools discoverable but never executable in the MVP would reduce scope, but it would not prove the registry's adapter model. It remains acceptable as a staged implementation path for individual extension backends, but not as the architectural boundary.\n48\t\n49\t### Trusted in-process extension handlers\n50\t\n51\tAllowing trusted or bundled extensions to register in-process handlers would be faster for first-party bundles, but it creates a second execution model and risks pressure to admit third-party handlers later. Built-ins should remain daemon code; extensions should cross a process/protocol boundary.\n52\t\n53\t## Evidence\n54\t\n55\t- `.compozy/tasks/tools-registry/analysis/analysis_openclaw.md`: manifest-first discovery and runtime materialization.\n56\t- `.compozy/tasks/tools-registry/analysis/analysis_claude-code.md`: plugin tools primarily flow through MCP adapters.\n57\t- `.compozy/tasks/tools-registry/analysis/analysis_hermes.md`: in-process plugin/tool registration is flexible but mismatched with AGH's desired safety model.\n58\t- `.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md`: AGH extension manifests already publish static tool metadata but have no executable registry handle.\n59\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.562136Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_019TVYwCE55DwJXF6fvekw4V","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_019TVYwCE55DwJXF6fvekw4V","content":"```\n1\t# ADR-002: Session Tool Exposure Path\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Context\n8\t\n9\tThe Tool Registry must be agent-manageable, not only an internal daemon API. AGH manages external ACP-compatible runtimes, so it cannot assume direct control over every provider's internal model API tool loop.\n10\t\n11\tThe registry still needs a model-visible path for AGH-owned tools such as `agh__tool_search` and `agh__skill_view`. Competitor research suggests MCP is the most portable first boundary:\n12\t\n13\t- Claude Code and other runtimes already understand MCP tools.\n14\t- MCP keeps tool exposure protocol-based rather than driver-specific.\n15\t- MCP can be backed by the same registry dispatch path as CLI, HTTP, and UDS.\n16\t- Runtimes without MCP can still reach AGH through CLI/UDS fallback surfaces.\n17\t\n18\t## Decision\n19\t\n20\tThe MVP will expose AGH-native registry tools through an AGH-hosted local MCP server plus shared CLI, HTTP, and UDS contracts.\n21\t\n22\tThe daemon will own one registry contract and dispatch pipeline. Surfaces call into that same pipeline:\n23\t\n24\t- hosted MCP server for model-visible AGH tools in runtimes that support MCP,\n25\t- CLI commands for operator and agent fallback use,\n26\t- HTTP API for web/operator clients,\n27\t- UDS API for local trusted clients and internal AGH tools.\n28\t\n29\tDirect driver/ACP injection can be added later as an optimization for runtimes that support it, but it is not the MVP exposure path.\n30\t\n31\t## Consequences\n32\t\n33\tAGH-native tools must be representable as MCP tools without losing policy, availability, hook, telemetry, and result-budget behavior.\n34\t\n35\tThe registry contract types must be shared below all surfaces rather than copied into each transport.\n36\t\n37\tSession start should be able to attach the hosted AGH MCP server for agents whose runtime supports MCP. Agents/runtimes without MCP remain supported through CLI/UDS fallback.\n38\t\n39\tThe TechSpec must define how the hosted MCP server is authorized, scoped to the session/workspace, and prevented from bypassing registry dispatch.\n40\t\n41\tLive catalog deltas can be designed as a later driver capability. The MVP can refresh the hosted MCP server's tool list and expose search/list tools through the registry.\n42\t\n43\t## Rejected Alternatives\n44\t\n45\t### CLI/UDS only\n46\t\n47\tThis would be simpler and still agent-operable through terminal tools, but it would not provide native tool calls for runtimes with MCP support and would leave the main \"last mile\" gap partially open.\n48\t\n49\t### Direct ACP/driver injection first\n50\t\n51\tThis could be cleaner for a single provider, but it is less portable and would force the first implementation into provider-specific behavior.\n52\t\n53\t### HTTP/UDS only\n54\t\n55\tThis would build the management API but delay the session-visible tool surface, making the foundation less useful to autonomous agents.\n56\t\n57\t## Evidence\n58\t\n59\t- `.compozy/tasks/tools-registry/analysis/analysis_claude-code.md`: MCP tools are adapted into the same local tool contract and refreshed dynamically.\n60\t- `.compozy/tasks/tools-registry/analysis/analysis_openclaw.md`: MCP is a provider backend for plugin/bundle tools.\n61\t- `.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md`: AGH already resolves MCP sidecars and has CLI/HTTP/UDS-style management surfaces elsewhere.\n62\t- `.compozy/tasks/tools-registry/analysis/analysis_claude_code_ideas.md`: AGH should avoid assuming direct LLM API control while still exposing AGH-owned tools through provider-neutral surfaces.\n63\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.56228Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01GFv38C8Qzo8AYwWHENVNWT","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01GFv38C8Qzo8AYwWHENVNWT","content":"```\n1\t# ADR-003: Runtime Registry Package Boundary\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Context\n8\t\n9\tAGH already has `internal/tools`, but it currently defines only metadata records and a list-only provider interface. The Tool Registry foundation needs runtime concerns: executable handles, provider adapters, availability, policy projection, dispatch, hooks, telemetry, result budgeting, and hosted MCP exposure.\n10\t\n11\tThe registry also needs to compose with skills for agent-facing discovery operations such as `agh__tool_search`, `agh__skill_list`, and `agh__skill_view`. That creates a package-boundary question: should the runtime registry live in `internal/tools`, in a new broad `internal/catalog`, or in daemon orchestration code?\n12\t\n13\t## Decision\n14\t\n15\t`internal/tools` will own the runtime Tool Registry contracts and execution path.\n16\t\n17\tThe package will evolve from metadata-only records into the home for:\n18\t\n19\t- tool descriptors and source/provenance types,\n20\t- executable handles and providers,\n21\t- availability and reason codes,\n22\t- policy projection for tool views,\n23\t- central `Registry.Call` dispatch,\n24\t- result normalization and result-budget metadata,\n25\t- provider adapters for built-ins, MCP, extension sidecars, subprocesses, and future bridges.\n26\t\n27\tA thin `internal/catalog` facade may compose `internal/tools` and `internal/skills` for cross-domain list/search/view surfaces and for AGH-native tools such as `agh__tool_search` and `agh__skill_view`.\n28\t\n29\tThe daemon composition root will wire providers and dependencies, but it will not own registry logic.\n30\t\n31\t## Consequences\n32\t\n33\tTool execution remains in the tool domain instead of a broad catalog domain. This keeps dispatch, policy, availability, and telemetry testable without booting the daemon.\n34\t\n35\t`internal/catalog` stays small and read-oriented. It coordinates cross-domain discovery and progressive disclosure, but does not become a second execution engine.\n36\t\n37\tExisting `internal/tools.Tool` resource compatibility must be handled through clear type splits rather than by adding runtime function fields to resource records. The TechSpec should distinguish cold resource specs from runtime descriptors and handles.\n38\t\n39\tDaemon boot must register providers explicitly, following AGH's composition-root discipline.\n40\t\n41\t## Rejected Alternatives\n42\t\n43\t### New `internal/catalog` owns everything\n44\t\n45\tThis would centralize skills and tools, but it would also mix skill content loading, tool dispatch, extension adapter logic, search, and policy into one large package too early.\n46\t\n47\t### Daemon-owned registry logic\n48\t\n49\tKeeping registry logic inside daemon orchestration would reduce package churn, but it would make dispatch harder to test and would couple tool semantics to boot wiring.\n50\t\n51\t## Evidence\n52\t\n53\t- `.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md`: `internal/tools` is currently metadata-only while daemon boot already owns resource projection.\n54\t- `.compozy/tasks/tools-registry/analysis/analysis_goclaw.md`: GoClaw keeps executable tool contracts and policy close to the tools package.\n55\t- `.compozy/tasks/tools-registry/analysis/synthesis.md`: recommends `internal/tools` for runtime contracts and a thin catalog facade for cross-skill/tool search.\n56\t- `internal/CLAUDE.md`: AGH favors interfaces where consumed and composition-root wiring over daemon package logic accumulation.\n57\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.562438Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01VfNT1f6f5XexCRcqC6nVtW","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01VfNT1f6f5XexCRcqC6nVtW","content":"```\n1\t# ADR-005: ACP Approval Policy Integration\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Context\n8\t\n9\tAGH already has a system-level ACP tool approval policy exposed in settings and enforced by the ACP tool host:\n10\t\n11\t- `deny-all`\n12\t- `approve-reads`\n13\t- `approve-all`\n14\t\n15\tThe Tool Registry will add per-tool metadata and policy, including read-only, destructive, open-world, source, toolset, session permission atoms, and extension/MCP grants. If this registry policy is designed as a parallel approval system, AGH can produce contradictory states such as:\n16\t\n17\t- system policy says `deny-all`, but a tool-level policy says allow;\n18\t- system policy says `approve-reads`, but a mutating tool claims read-only;\n19\t- system policy says `approve-all`, but a session/agent intentionally narrows permissions;\n20\t- hosted MCP exposes a tool the ACP host would later block.\n21\t\n22\tThe registry policy must integrate with the existing ACP policy rather than bypass it.\n23\t\n24\t## Decision\n25\t\n26\tThe ACP `permissions.mode` policy is the system approval ceiling for session-visible tool execution.\n27\t\n28\tTool Registry policy operates below that ceiling as a more granular filter. It can narrow, classify, require approval, or deny a tool, but it cannot silently grant more authority than the effective system/session ACP policy allows.\n29\t\n30\tThe effective decision order is:\n31\t\n32\t1. Resolve the system/session ACP approval mode.\n33\t2. Resolve agent/session lineage tool permission atoms.\n34\t3. Resolve registry visibility and allow/deny/toolset policy.\n35\t4. Resolve source/risk defaults for built-in, extension, MCP, and dynamic tools.\n36\t5. Resolve tool descriptor risk flags: read-only, destructive, open-world, requires interaction.\n37\t6. Run availability checks.\n38\t7. Run pre-call hooks.\n39\t8. Dispatch only if the combined decision is allowed or explicitly approved.\n40\t\n41\t`approve-all` is permissive but not a bypass of explicit denies. It removes automatic ACP prompting for allowed tools, but registry deny rules, unavailable state, session lineage restrictions, hooks, and source grants still apply.\n42\t\n43\t`approve-reads` auto-approves only tools classified as read-only by the registry and allowed by session policy. Mutating, destructive, open-world, network-send, and task-write tools require explicit approval or explicit session/tool policy that the approval layer recognizes.\n44\t\n45\t`deny-all` denies by default. The registry may still list operator-visible tools with reasons, but session-visible execution requires explicit approval/allowance through the existing ACP approval path or an equivalent local approval surface.\n46\t\n47\tAgent frontmatter and session lineage can lower permissions relative to system policy. They cannot raise permissions above the system approval ceiling.\n48\t\n49\t## Consequences\n50\t\n51\tThe registry must compute an `EffectiveToolDecision` instead of storing a single boolean. The decision should include:\n52\t\n53\t- system approval mode,\n54\t- session/agent policy result,\n55\t- registry policy result,\n56\t- source/risk default result,\n57\t- availability result,\n58\t- hook result,\n59\t- final visibility decision,\n60\t- final execution decision,\n61\t- user/operator-facing reason codes.\n62\t\n63\tThe hosted AGH MCP server must expose only tools allowed by the effective visibility decision for that session. Dispatch must still revalidate the effective execution decision.\n64\t\n65\tTool descriptors must classify read-only vs mutating accurately. A mutating tool mislabeled as read-only is a correctness and security bug.\n66\t\n67\tThe web settings UI text remains true: `approve-all` auto-approves tool calls, but agents and registry policy can lower permissions. The TechSpec should clarify that \"auto-approved\" does not mean \"all registered tools are visible and executable regardless of registry policy.\"\n68\t\n69\t## Rejected Alternatives\n70\t\n71\t### Registry policy bypasses ACP policy\n72\t\n73\tThis would create inconsistent behavior between ACP-native tools, AGH-hosted MCP tools, and CLI/UDS calls. It would also make the existing settings UI misleading.\n74\t\n75\t### Registry policy replaces ACP policy\n76\t\n77\tThis would require redesigning existing ACP permission handling and settings before the Tool Registry can ship. The MVP should integrate with the existing model and extend it.\n78\t\n79\t### Tool-level policy alone controls execution\n80\t\n81\tPer-tool policy is necessary but insufficient. Session lineage, system approval mode, hooks, availability, and source grants all affect whether a call is safe and authorized.\n82\t\n83\t## Evidence\n84\t\n85\t- `internal/acp/permission.go:75-132`: ACP policy defaults, path validation, and decisions for `approve-all`, `approve-reads`, and `deny-all`.\n86\t- `internal/acp/tool_host.go:64-83`: local tool host is constructed with an ACP permission mode.\n87\t- `web/src/routes/_app/settings/general.tsx:307-315`: settings UI describes the three tool approval policies.\n88\t- `.compozy/tasks/tools-registry/analysis/analysis_claude-code.md`: permission should be an ordered pipeline rather than a tool-local boolean.\n89\t- `.compozy/tasks/tools-registry/analysis/analysis_goclaw.md`: runtime grants should be rechecked at execution time.\n90\t- `.compozy/tasks/tools-registry/analysis/synthesis.md`: dispatch must recheck availability and authorization and use one central pipeline.\n91\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.56259Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01LacT3QoMDarPjBNRRZsvbN","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01LacT3QoMDarPjBNRRZsvbN","content":"```\n1\t# ADR-006: Tool Visibility by Surface\n2\t\n3\t## Status\n4\t\n5\tAccepted\n6\t\n7\t## Context\n8\t\n9\tThe Tool Registry will track more states than \"exists\" or \"does not exist.\" A tool may be registered but disabled, unauthorized, unavailable, unhealthy, missing configuration, missing an MCP backend, blocked by session policy, denied by ACP approval mode, or conflicted by name.\n10\t\n11\tDifferent consumers need different views:\n12\t\n13\t- operators need diagnostics and reason codes to fix configuration and extension problems;\n14\t- agents need a low-noise callable surface that does not invite impossible calls;\n15\t- dispatch still needs to revalidate because discovery visibility is not a security boundary.\n16\t\n17\t## Decision\n18\t\n19\tOperator surfaces show unavailable and unauthorized tools with reason codes. Session-visible and model-visible surfaces expose only tools that are visible and callable for the effective session context.\n20\t\n21\tOperator surfaces include:\n22\t\n23\t- CLI,\n24\t- HTTP API,\n25\t- Web UI,\n26\t- privileged UDS/operator views.\n27\t\n28\tSession-visible/model-visible surfaces include:\n29\t\n30\t- the AGH-hosted MCP tool list exposed to an agent session,\n31\t- any future direct ACP/driver tool injection,\n32\t- non-privileged session-scoped UDS catalog views.\n33\t\n34\tThe registry must compute both:\n35\t\n36\t- `OperatorToolView`: includes all registered tools plus state, reason codes, source/provenance, policy diagnostics, conflict diagnostics, and availability details.\n37\t- `SessionToolView`: includes only tools that pass effective visibility and execution preconditions for that session.\n38\t\n39\tDispatch must revalidate the full effective execution decision even when a tool was present in `SessionToolView`.\n40\t\n41\t## Consequences\n42\t\n43\tAgents are not shown tools that they cannot call in the current session. This avoids prompt/tool-call noise and reduces attempts to invoke unavailable tools.\n44\t\n45\tOperators can still debug why a tool is not appearing to an agent, including whether the cause is ACP approval mode, session lineage, agent policy, source grants, extension health, MCP health, missing config, or a conflict.\n46\t\n47\tCLI/HTTP endpoints need an explicit scope or view mode. For example:\n48\t\n49\t- operator default: include unavailable tools and reasons;\n50\t- session-scoped query: return the same filtered view that the hosted MCP server would expose.\n51\t\n52\tThe hosted MCP server must use `SessionToolView`, not raw registry contents.\n53\t\n54\t## Rejected Alternatives\n55\t\n56\t### Everyone sees unavailable tools\n57\t\n58\tThis improves agent planning transparency but increases noise and risks inducing models to call tools that the daemon will reject.\n59\t\n60\t### Hide unavailable tools everywhere\n61\t\n62\tThis is clean for fail-closed execution, but it makes operator troubleshooting poor and hides extension/MCP/config problems.\n63\t\n64\t### Configurable per surface in MVP\n65\t\n66\tThis offers maximum flexibility but creates a larger behavior matrix before the registry foundation is stable.\n67\t\n68\t## Evidence\n69\t\n70\t- `.compozy/tasks/tools-registry/analysis/analysis_hermes.md`: availability filtering is useful for model-visible definitions.\n71\t- `.compozy/tasks/tools-registry/analysis/analysis_claude-code.md`: request-time tool pools are context-specific and filtered before model exposure.\n72\t- `.compozy/tasks/tools-registry/analysis/analysis_openclaw.md`: lifecycle and policy states need diagnostics while agent projections should be policy-filtered.\n73\t- `.compozy/tasks/tools-registry/analysis/synthesis.md`: discovery can hide unavailable/unauthorized tools from agents while operator surfaces show reasons, but dispatch still rechecks.\n74\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.563035Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01HDS7AAyjB2Ep1c6oFaffEj","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01HDS7AAyjB2Ep1c6oFaffEj","content":"```\n1\t# ADR-007: Canonical Tool ID Format\n2\t\n3\t## Status\n4\t\n5\tAccepted.\n6\t\n7\t## Context\n8\t\n9\tAGH needs one stable tool identifier that can be used across the runtime registry, policy rules, CLI, HTTP, UDS, telemetry, hooks, audit logs, and the AGH-hosted MCP surface.\n10\t\n11\tEarlier options considered dotted internal IDs such as `agh.skill.view` plus a separate MCP-safe wire alias such as `agh_skill_view`. ACP/MCP compatibility research showed this would work technically, but it creates two strong names for the same tool and increases the chance of policy, audit, telemetry, or dispatch code using the wrong identity.\n12\t\n13\tThe identifier also needs to remain compatible with provider and host constraints. MCP allows dots in tool names, but common model tool/function APIs are stricter and accept letters, digits, underscores, and hyphens with a 64-character limit. AGH should choose a lowest-common-denominator format for callable tool IDs rather than rely on a more permissive protocol layer.\n14\t\n15\tClaude Code uses the `mcp__server__tool` convention for MCP tools. This gives an explicit namespace boundary while staying inside provider-safe characters.\n16\t\n17\t## Decision\n18\t\n19\tAGH will use one canonical public `ToolID` format across every surface:\n20\t\n21\t```text\n22\t\u003csegment\u003e( \"__\" \u003csegment\u003e )*\n23\t```\n24\t\n25\tEach segment must match:\n26\t\n27\t```text\n28\t[a-z][a-z0-9_]*\n29\t```\n30\t\n31\tGlobal constraints:\n32\t\n33\t- maximum length: 64 characters;\n34\t- lowercase ASCII only;\n35\t- digits allowed after the first character of each segment;\n36\t- `_` allowed inside a segment;\n37\t- `__` is reserved exclusively as a segment separator;\n38\t- no dot;\n39\t- no hyphen;\n40\t- no uppercase;\n41\t- no empty segment;\n42\t- no leading or trailing `_` inside a segment if it would create an empty separator ambiguity.\n43\t\n44\tExamples:\n45\t\n46\t```text\n47\tagh__tool_list\n48\tagh__tool_search\n49\tagh__tool_info\n50\tagh__skill_list\n51\tagh__skill_view\n52\tagh__network_peers\n53\tagh__network_send\n54\tagh__task_list\n55\tagh__task_read\n56\text__linear__search\n57\text__linear__create_issue\n58\tmcp__github__create_issue\n59\tmcp__context7__query_docs\n60\t```\n61\t\n62\t`ToolID` is the identity used by:\n63\t\n64\t- registry descriptors;\n65\t- provider registration;\n66\t- policy allow/deny rules;\n67\t- toolsets;\n68\t- CLI commands;\n69\t- HTTP and UDS APIs;\n70\t- hooks;\n71\t- telemetry and audit logs;\n72\t- hosted MCP `Tool.name`;\n73\t- dispatch requests.\n74\t\n75\tAGH will not use a second wire alias for the same tool in the MVP. Display titles are UI-only and do not participate in policy, authorization, conflict resolution, or dispatch.\n76\t\n77\tSource/provenance remains structured metadata, not an alternate identity:\n78\t\n79\t```json\n80\t{\n81\t \"id\": \"mcp__github__create_issue\",\n82\t \"source\": {\n83\t \"kind\": \"mcp\",\n84\t \"serverName\": \"github\",\n85\t \"rawToolName\": \"create_issue\"\n86\t }\n87\t}\n88\t```\n89\t\n90\tAGH may show a shorter display title such as `Create Issue`, but the canonical ID remains `mcp__github__create_issue`.\n91\t\n92\t## Collision Rules\n93\t\n94\tRegistration and session projection must fail closed:\n95\t\n96\t- If two providers produce the same `ToolID`, the later registration is rejected or marked `conflicted`.\n97\t- If sanitizing an external MCP/server/extension tool name would collide with an existing `ToolID`, the candidate tool is marked `conflicted` and is not exposed to model-visible surfaces.\n98\t- AGH must not silently truncate, overwrite, or choose \"last writer wins\".\n99\t- Operator surfaces may show conflicted tools with reason codes and provenance.\n100\t- Session/model-visible surfaces expose only non-conflicted callable tools.\n101\t\n102\t## Consequences\n103\t\n104\tPositive:\n105\t\n106\t- One identifier works across registry, wire, policy, telemetry, and dispatch.\n107\t- No dotted-to-wire alias mapping is needed in the MVP.\n108\t- Namespace boundaries remain visible through reserved `__`.\n109\t- The format is compatible with stricter provider tool-name constraints.\n110\t- Policy patterns stay simple, for example `agh__skill_*` and `mcp__github__*`.\n111\t\n112\tTradeoffs:\n113\t\n114\t- Dotted names such as `agh.skill.view` are more visually familiar for namespace trees, but they are not provider-safe enough to use as callable IDs.\n115\t- `__` is less aesthetically clean than dots, but it avoids dual identity.\n116\t- Raw external names must be preserved in `SourceRef` for exact provenance and debugging.\n117\t\n118\t## Follow-Ups\n119\t\n120\t- The TechSpec must update all tool examples to this format.\n121\t- The registry validator must enforce the grammar.\n122\t- Extension and MCP adapters must sanitize external names deterministically and report conflicts.\n123\t- Policy matching must treat `__` as an identity segment separator and `_` as a normal segment character.\n124\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.906604Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01MYbx76htbiZLVwj82wSToM","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:15.916025Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01MYbx76htbiZLVwj82wSToM","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01MYbx76htbiZLVwj82wSToM","content":"```\n1\t# Tool Registry Foundation TechSpec\n2\t\n3\t## Executive Summary\n4\t\n5\tThis TechSpec designs AGH's Tool Registry as a daemon-owned runtime service, not as a static list of built-in commands. The registry will unify tool identity, discovery, availability, policy, execution, hooks, telemetry, extension descriptors, MCP adapters, and session-visible exposure through one central dispatch pipeline.\n6\t\n7\tThere is no existing `_prd.md` for this task. The scope is based on the user request, competitor research under `.compozy/tasks/tools-registry/analysis/`, accepted ADRs under `.compozy/tasks/tools-registry/adrs/`, current AGH code exploration, and the prior autonomous skills/tools registry gap analysis.\n8\t\n9\tThe primary architectural trade-off is to make AGH-native tools visible to sessions through an AGH-hosted local MCP server in the MVP instead of trying to invent an ACP-specific registry. ACP does not define a callable tool registry; it defines session lifecycle, MCP bootstrap, permission callbacks, and tool-call observations. MCP provides the programmatic `Tool.name`, so AGH will expose its canonical `ToolID` directly as the hosted MCP tool name.\n10\t\n11\tThe foundation will support native/bundled tools and extension-installed tools. Built-in AGH tools may execute in-process. Third-party extension tools must stay out-of-process through MCP, extension sidecar/Host API, subprocess adapter, or a future bridge adapter. Extension manifests can install tool descriptors immediately; execution becomes available only when a healthy authorized backend handle exists.\n12\t\n13\t## MVP Boundary Statement\n14\t\n15\tMVP boundary: implementation steps 1-13 build the Tool Registry foundation, AGH-hosted MCP session exposure, native bootstrap tools, extension descriptor projection, shared CLI/HTTP/UDS surfaces, policy/availability enforcement, hooks, observability, docs, and verification. This MVP proves the registry as an executable daemon primitive without replacing every ACP provider-native tool.\n16\t\n17\tPost-MVP work deferred to later TechSpecs:\n18\t\n19\t- direct driver-specific tool injection outside hosted MCP;\n20\t- full shell/browser/file tool replacement for ACP runtimes;\n21\t- remote peer tool execution over AGH Network;\n22\t- provider-specific deferred schema loading such as Anthropic `tool_reference`;\n23\t- broad marketplace signing/trust overhaul;\n24\t- skill install/remove/update tools;\n25\t- bridge SDK executable tool adapters;\n26\t- client-supplied ACP `mcpServers` as session-scoped registry sources.\n27\t\n28\tExplicitly out of scope for this TechSpec:\n29\t\n30\t- in-process third-party extension handlers;\n31\t- silent compatibility aliases for dotted tool IDs;\n32\t- policy bypasses for `approve-all`;\n33\t- partial surface delivery where CLI/HTTP ships without UDS, docs, codegen, and hosted MCP parity;\n34\t- storing matchable ownership or authorization state inside opaque JSON metadata blobs.\n35\t\n36\t## Architectural Boundaries\n37\t\n38\t`internal/daemon` remains the only composition root. It may import and compose `internal/tools`, `internal/skills`, `internal/mcp`, `internal/extension`, `internal/hooks`, `internal/session`, `internal/network`, `internal/task`, `internal/api/*`, and config/resource stores. No package may import `internal/daemon`.\n39\t\n40\tPackage import boundaries:\n41\t\n42\t- `internal/tools` owns `ToolID`, descriptors, providers, registry, policy interfaces, availability, dispatch contracts, and result normalization. It must not import `internal/daemon`, `internal/api/*`, `internal/cli`, `internal/extension`, `internal/session`, `internal/network`, or `internal/task`.\n43\t- `internal/catalog`, if added, is a thin composition-facing facade over `internal/tools` and `internal/skills`. It must not own tool dispatch or policy.\n44\t- `internal/extension` may publish cold tool descriptors and backend metadata through resources. It must not execute third-party tool handlers in-process and must not import registry internals beyond public `internal/tools` descriptor/resource contracts.\n45\t- `internal/mcp` may adapt MCP tools and host the AGH MCP stdio proxy. All AGH-owned calls must enter `internal/tools.Registry.Call` through UDS or an injected interface; MCP code must not duplicate dispatch policy.\n46\t- `internal/api/core` owns transport-independent handlers. `internal/api/httpapi` and `internal/api/udsapi` only register routes and transport concerns.\n47\t- `internal/cli` calls UDS/HTTP client methods and does not import runtime registry implementations.\n48\t- `internal/hooks` owns typed hook payloads and execution. Hooks dispatch at the registry call site; no code may tail event tables to trigger tool hooks.\n49\t- `internal/store` may persist session lineage permission atoms and events. It must not decide tool policy; it validates and stores normalized atoms.\n50\t- `internal/session` can receive hosted MCP server config and session projections through interfaces. It must not implement a parallel tool registry.\n51\t\n52\tBoundaries to update in implementation:\n53\t\n54\t- If `internal/catalog` or a new `internal/mcp` subpackage is added, update `magefile.go` package boundary checks in the same change.\n55\t- Any OpenAPI/contract change must co-ship generated `openapi/agh.json` and `web/src/generated/agh-openapi.d.ts`.\n56\t- Any CLI surface must co-ship HTTP/UDS parity unless the spec explicitly marks the surface CLI-only. This TechSpec has no CLI-only tool surfaces.\n57\t\n58\t## System Architecture\n59\t\n60\t### Component Overview\n61\t\n62\t| Component | Responsibility | Boundary |\n63\t|---|---|---|\n64\t| `internal/tools` runtime registry | Owns canonical `ToolID`, descriptors, providers, availability projection, policy evaluation, dispatch, result normalization, and telemetry hooks | Does not import `daemon`, `api`, or `cli`; composed by `internal/daemon` |\n65\t| Cold `tool` resources | Persist desired-state tool metadata from extensions, bundles, and future dynamic producers | Metadata only; no function pointers or execution handles |\n66\t| Built-in tool provider | Registers AGH-native tools such as tool search, skill view, network peers/send, and bounded task tools | In-process only because it ships inside the daemon binary |\n67\t| Extension descriptor provider | Converts extension-published tool resources into runtime descriptors | Does not execute extension code during discovery |\n68\t| MCP adapter provider | Discovers/calls MCP-backed tools and preserves raw server/tool names in `SourceRef` | Fails closed on health, auth, or name collision problems |\n69\t| `internal/catalog` facade | Thin cross-domain list/search/view facade over tools and skills | Optional coordination layer; runtime tool dispatch remains in `internal/tools` |\n70\t| Policy engine | Combines ACP approval mode, session lineage, agent policy, source/risk defaults, registry allow/deny, toolsets, availability, and hooks | Produces structured effective decisions, never a single ambiguous boolean |\n71\t| AGH-hosted MCP proxy | Exposes session-callable AGH tools as MCP tools using canonical `ToolID` names | Runs through `agh tool mcp --session \u003cid\u003e` and proxies to daemon over UDS |\n72\t| API/CLI surfaces | Expose machine-readable list/search/info/invoke/status behavior | Shared contracts in `internal/api/contract`, handlers in `internal/api/core`, HTTP/UDS parity |\n73\t| Hook integration | Runs `tool.pre_call`, `tool.post_call`, and `tool.post_error` around registry dispatch | Hooks can deny, narrow, patch, redact, or annotate, but cannot bypass policy |\n74\t| Observability | Emits durable events and metrics for registration, projection, decisions, calls, failures, conflicts, truncation, and policy denials | Redacts secrets and raw tokens |\n75\t\n76\tData flow:\n77\t\n78\t1. Extensions, built-ins, MCP servers, and future dynamic providers register cold descriptors or runtime handles.\n79\t2. `internal/daemon` composes providers into `internal/tools.Registry`.\n80\t3. Registry indexes descriptors by canonical `ToolID`.\n81\t4. Operator surfaces can list all tools with status and reason codes.\n82\t5. Session/model-visible surfaces request a session projection and receive only callable tools for that effective context.\n83\t6. Every invocation enters `Registry.Call`, which validates schema, recomputes availability/policy, runs hooks, calls the adapter, normalizes output, persists/observes, and returns a bounded result.\n84\t\n85\t## Implementation Design\n86\t\n87\t### Core Interfaces\n88\t\n89\tThese are final-shape interface contracts for implementation planning. Implementers may add helper types, but registry dispatch must preserve these method responsibilities and must not reintroduce metadata-only runtime providers.\n90\t\n91\t```go\n92\ttype Registry interface {\n93\t\tList(ctx context.Context, scope Scope) ([]ToolView, error)\n94\t\tSearch(ctx context.Context, scope Scope, q SearchQuery) ([]ToolView, error)\n95\t\tGet(ctx context.Context, scope Scope, id ToolID) (ToolView, error)\n96\t\tCall(ctx context.Context, scope Scope, req CallRequest) (ToolResult, error)\n97\t}\n98\t\n99\ttype Handle interface {\n100\t\tDescriptor() Descriptor\n101\t\tAvailability(ctx context.Context, scope Scope) Availability\n102\t\tCall(ctx context.Context, req CallRequest) (ToolResult, error)\n103\t}\n104\t```\n105\t\n106\tSupporting contracts:\n107\t\n108\t```go\n109\ttype Provider interface {\n110\t\tID() SourceRef\n111\t\tList(ctx context.Context, scope Scope) ([]Descriptor, error)\n112\t\tResolve(ctx context.Context, scope Scope, id ToolID) (Handle, bool, error)\n113\t}\n114\t\n115\ttype PolicyEvaluator interface {\n116\t\tEvaluate(ctx context.Context, scope Scope, d Descriptor) (EffectiveToolDecision, error)\n117\t}\n118\t```\n119\t\n120\t```go\n121\ttype ResultLimiter interface {\n122\t\tApply(ctx context.Context, d Descriptor, result ToolResult) (ToolResult, error)\n123\t}\n124\t\n125\ttype HookRunner interface {\n126\t\tPreCall(ctx context.Context, call CallRequest) (CallRequest, EffectiveToolDecision, error)\n127\t\tPostCall(ctx context.Context, call CallRequest, result ToolResult) (ToolResult, error)\n128\t\tPostError(ctx context.Context, call CallRequest, err error) error\n129\t}\n130\t```\n131\t\n132\tError conventions:\n133\t\n134\t- `ErrToolNotFound`\n135\t- `ErrToolConflict`\n136\t- `ErrToolUnavailable`\n137\t- `ErrToolDenied`\n138\t- `ErrToolApprovalRequired`\n139\t- `ErrToolInvalidInput`\n140\t- `ErrToolResultTooLarge`\n141\t- `ErrToolBackendFailed`\n142\t\n143\tAll production errors must wrap with `%w` where applicable and must map to deterministic API/CLI error codes.\n144\t\n145\t### Data Models\n146\t\n147\t`ToolID`\n148\t\n149\t- Canonical public ID used by registry, policy, CLI, HTTP, UDS, hooks, telemetry, and hosted MCP.\n150\t- Format: `\u003csegment\u003e( \"__\" \u003csegment\u003e )*`\n151\t- Segment regex: `[a-z][a-z0-9_]*`\n152\t- Max length: 64.\n153\t- Lowercase ASCII only.\n154\t- `__` is reserved as namespace separator.\n155\t- No dots, hyphens, uppercase, empty segments, or dual wire aliases.\n156\t\n157\tExamples:\n158\t\n159\t- `agh__tool_list`\n160\t- `agh__tool_search`\n161\t- `agh__tool_info`\n162\t- `agh__skill_list`\n163\t- `agh__skill_search`\n164\t- `agh__skill_view`\n165\t- `agh__network_peers`\n166\t- `agh__network_send`\n167\t- `agh__task_list`\n168\t- `agh__task_read`\n169\t- `mcp__github__create_issue`\n170\t- `ext__linear__search`\n171\t\n172\t`Descriptor`\n173\t\n174\t- `id ToolID`\n175\t- `display_title string`\n176\t- `description string`\n177\t- `input_schema json.RawMessage`\n178\t- `output_schema json.RawMessage`\n179\t- `source SourceRef`\n180\t- `visibility Visibility`\n181\t- `risk RiskClass`\n182\t- `read_only bool`\n183\t- `destructive bool`\n184\t- `open_world bool`\n185\t- `requires_interaction bool`\n186\t- `concurrency_safe bool`\n187\t- `max_result_bytes int64`\n188\t- `toolsets []ToolsetID`\n189\t- `tags []string`\n190\t- `search_hints []string`\n191\t\n192\t`SourceRef`\n193\t\n194\t- `kind`: `builtin`, `mcp`, `extension`, `dynamic`\n195\t- `owner`: daemon, extension id, MCP server id, bundle id, skill id, or provider id\n196\t- `raw_server_name`\n197\t- `raw_tool_name`\n198\t- `resource_id`\n199\t- `resource_version`\n200\t- `workspace_id`\n201\t- `scope`\n202\t\n203\t`Availability`\n204\t\n205\tStates are composable, not a single boolean:\n206\t\n207\t- `registered`\n208\t- `enabled`\n209\t- `available`\n210\t- `authorized`\n211\t- `executable`\n212\t- `conflicted`\n213\t\n214\tReason codes include:\n215\t\n216\t- `dependency_missing`\n217\t- `backend_unhealthy`\n218\t- `extension_inactive`\n219\t- `mcp_unreachable`\n220\t- `mcp_auth_required`\n221\t- `source_disabled`\n222\t- `policy_denied`\n223\t- `approval_required`\n224\t- `session_denied`\n225\t- `hook_denied`\n226\t- `schema_invalid`\n227\t- `conflicted_id`\n228\t- `conflicted_sanitized_name`\n229\t- `result_budget_exceeded`\n230\t\n231\t`EffectiveToolDecision`\n232\t\n233\t- `visible_to_operator bool`\n234\t- `visible_to_session bool`\n235\t- `callable bool`\n236\t- `approval_required bool`\n237\t- `system_permission_mode`\n238\t- `session_policy_result`\n239\t- `agent_policy_result`\n240\t- `registry_policy_result`\n241\t- `source_policy_result`\n242\t- `availability_result`\n243\t- `hook_result`\n244\t- `reason_codes []string`\n245\t\n246\t`ToolResult`\n247\t\n248\t- `content []ToolContent`\n249\t- `structured any`\n250\t- `preview string`\n251\t- `artifacts []ArtifactRef`\n252\t- `metadata map[string]any`\n253\t- `redactions []Redaction`\n254\t- `truncated bool`\n255\t- `bytes int64`\n256\t- `duration_ms int64`\n257\t\n258\t`Toolset`\n259\t\n260\tToolsets are separate named resources/config entries, not overloaded as tools. They use a typed `ToolsetID`, include exact `ToolID`s and patterns, and may include other toolsets recursively. Policy fields must distinguish `tools` and `toolsets` to avoid ambiguity.\n261\t\n262\t### Data-Model Field Rationale\n263\t\n264\t| Field or key | Shape | Purpose | Storage decision |\n265\t|---|---|---|---|\n266\t| `ToolID` | string, provider-safe `__`-segmented id | Single policy/dispatch/audit identity across registry, CLI, HTTP, UDS, hooks, telemetry, and hosted MCP | Typed field, never inferred from display title |\n267\t| `Descriptor.source` | structured `SourceRef` | Preserve raw external provenance without making prefixes the only source of truth | Typed struct; raw names are subfields, not alternate IDs |\n268\t| `Descriptor.input_schema` | JSON Schema object | Validate call input before dispatch | JSON schema payload is appropriate because schema content is externally structured and opaque to AGH queries |\n269\t| `Descriptor.output_schema` | JSON Schema object | Optional structured output contract | JSON schema payload is appropriate for the same reason as input schema |\n270\t| `Descriptor.visibility` | enum | Separate internal/operator/session/model surfaces | Typed enum, queryable and policy-relevant |\n271\t| `Descriptor.risk` | enum | Classify read/mutate/network/open-world behavior | Typed enum, never free-text metadata |\n272\t| `Descriptor.read_only` | bool | Drives `approve-reads` and safety checks | Typed bool; misclassification is a security bug |\n273\t| `Descriptor.destructive` | bool | Forces stricter policy and approval behavior | Typed bool, not metadata |\n274\t| `Descriptor.open_world` | bool | Marks tools that can reach arbitrary external state | Typed bool, not metadata |\n275\t| `Descriptor.requires_interaction` | bool | Distinguishes autonomous-safe calls from interactive calls | Typed bool, not metadata |\n276\t| `Descriptor.max_result_bytes` | int64 | Enforces result budget consistently across surfaces | Typed numeric field with config default |\n277\t| `Descriptor.toolsets` | `[]ToolsetID` | Supports recursive named bundles without overloading individual tools | Typed list; expanded to concrete `ToolID`s for lineage |\n278\t| `EffectiveToolDecision.reason_codes` | `[]string` enum values | Operator diagnostics and deterministic error contracts | Typed enum strings; no prose-only decisions |\n279\t| `[tools].enabled` | bool | Global registry execution switch | Config key with validation/defaults |\n280\t| `[tools].hosted_mcp_enabled` | bool | Allows session exposure through AGH-hosted MCP | Config key with validation/defaults |\n281\t| `[tools].default_max_result_bytes` | int64 | Default output cap when descriptor is silent | Config key with validation/defaults |\n282\t| `[tools.policy].external_default` | enum | Default executable policy for extension/MCP/dynamic tools | Config key, not hidden in metadata |\n283\t| `agent.tools` | `[]ToolID/pattern` | Allow concrete tools/patterns for an agent | Frontmatter/config field resolved to concrete lineage atoms |\n284\t| `agent.toolsets` | `[]ToolsetID` | Allow named bundles for an agent | Frontmatter/config field expanded before session lineage |\n285\t| `agent.deny_tools` | `[]ToolID/pattern` | Explicit narrowing layer for an agent | Frontmatter/config field, not runtime-only state |\n286\t| `extension.resources.tools.*.backend` | structured backend metadata | Connect static descriptor to MCP/sidecar/subprocess backend | Extension manifest fields, not in-process function pointers |\n287\t\n288\tNo new SQLite columns are required for the MVP registry foundation. Existing session lineage stores concrete tool permission atoms; implementation should validate those atoms as canonical `ToolID`s. If a later task needs durable queryable tool-call history beyond append-only events, it must add a typed side table such as `tool_calls` rather than placing queryable call state in a session metadata JSON blob.\n289\t\n290\t### Side-Table vs JSON Decisions\n291\t\n292\t| Domain state | Decision | Rationale |\n293\t|---|---|---|\n294\t| Tool descriptors | Typed resource specs plus runtime descriptors | Descriptors are matchable by id/source/risk/visibility and must not live as opaque metadata blobs |\n295\t| Toolsets | Typed config/resource records | Toolset membership affects policy and lineage; it must be queryable and expandable deterministically |\n296\t| Tool call events | Append-only event payloads for MVP; typed side table only if queryable history is required | Events are the operational ledger; indexed call history would be matchable state and must not be hidden in JSON metadata |\n297\t| Source provenance | Structured `SourceRef` fields | Raw MCP/extension names are needed for debugging and collision handling |\n298\t| Input/output schemas | JSON Schema blobs | Schema contents are inherently opaque external contracts and are not AGH ownership state |\n299\t| Tool result structured payload | JSON payload plus typed envelope fields | Result body can be arbitrary, but status, bytes, truncation, redaction, and tool id are typed envelope fields |\n300\t| Policy decisions | Typed `EffectiveToolDecision` | Authorization is matchable and auditable; it cannot be a JSON bag |\n301\t| Availability reasons | Typed reason-code list | Operator diagnostics and tests need deterministic matching |\n302\t\n303\t### API Endpoints\n304\t\n305\tAll endpoints are implemented once in `internal/api/core` and registered by HTTP and UDS transports.\n306\t\n307\t| Method | Path | Description |\n308\t|---|---|---|\n309\t| `GET` | `/api/tools` | List operator-visible tools with availability/policy reason codes |\n310\t| `POST` | `/api/tools/search` | Search tools by id, title, description, source, tags, and toolsets |\n311\t| `GET` | `/api/tools/{id}` | Return descriptor, availability, policy view, schema, and source provenance |\n312\t| `POST` | `/api/tools/{id}/invoke` | Invoke a tool through registry dispatch |\n313\t| `GET` | `/api/sessions/{id}/tools` | Return session/model-visible callable projection |\n314\t| `POST` | `/api/sessions/{id}/tools/search` | Search only within effective session-callable projection |\n315\t| `GET` | `/api/toolsets` | List named toolsets and expansion status |\n316\t| `GET` | `/api/toolsets/{id}` | Inspect one toolset expansion and conflicts |\n317\t\n318\tInvoke request:\n319\t\n320\t```json\n321\t{\n322\t \"session_id\": \"sess_...\",\n323\t \"workspace_id\": \"ws_...\",\n324\t \"input\": {},\n325\t \"approval_token\": \"optional-local-approval-reference\"\n326\t}\n327\t```\n328\t\n329\tInvoke response:\n330\t\n331\t```json\n332\t{\n333\t \"tool_id\": \"agh__skill_view\",\n334\t \"status\": \"completed\",\n335\t \"result\": {},\n336\t \"truncated\": false,\n337\t \"duration_ms\": 23,\n338\t \"events\": []\n339\t}\n340\t```\n341\t\n342\tStatus codes:\n343\t\n344\t- `200` completed/listed.\n345\t- `202` approval required or async dispatch accepted, only if the tool is explicitly async.\n346\t- `400` invalid `ToolID`, invalid schema input, or malformed request.\n347\t- `403` denied by ACP ceiling, session lineage, registry policy, source policy, or hook.\n348\t- `404` not found or hidden from caller context.\n349\t- `409` conflicted canonical ID or sanitized external name.\n350\t- `422` registered but unavailable or not executable.\n351\t- `500` internal daemon error.\n352\t- `502` backend adapter failure.\n353\t\n354\tCLI parity:\n355\t\n356\t- `agh tool list -o json`\n357\t- `agh tool search \u003cquery\u003e -o json`\n358\t- `agh tool info \u003ctool-id\u003e -o json`\n359\t- `agh tool invoke \u003ctool-id\u003e --input \u003cjson\u003e -o json`\n360\t- `agh toolsets list -o json`\n361\t- `agh toolsets info \u003ctoolset-id\u003e -o json`\n362\t- `agh tool mcp --session \u003csession-id\u003e` for the hosted MCP stdio proxy\n363\t\n364\t## Integration Points\n365\t\n366\t### ACP\n367\t\n368\tACP does not impose a callable tool registry pattern. AGH must not use ACP `ToolCall.title` as a policy identity or dispatch key.\n369\t\n370\tAGH will integrate with ACP by:\n371\t\n372\t- passing the hosted AGH MCP server in ACP `mcpServers` during session creation/load when the selected agent supports MCP;\n373\t- preserving ACP tool-call observations as session events keyed by `toolCallId`;\n374\t- mapping AGH-owned tool calls back into ACP lifecycle updates where relevant;\n375\t- treating ACP `ToolKind` as risk/display metadata, not identity;\n376\t- keeping `permissions.mode` as the system/session approval ceiling.\n377\t\n378\t### Hosted MCP\n379\t\n380\tMVP exposure path:\n381\t\n382\t```text\n383\tagent session -\u003e ACP mcpServers -\u003e agh tool mcp --session \u003cid\u003e -\u003e UDS -\u003e daemon Tool Registry\n384\t```\n385\t\n386\tThe hosted MCP server lists only session-callable tools. It exposes MCP `Tool.name` equal to AGH canonical `ToolID`. It does not expose unavailable, unauthorized, or conflicted tools to the model-visible surface.\n387\t\n388\t### Extensions\n389\t\n390\tExtension-installed tools are possible and should be first-class.\n391\t\n392\tCurrent foundation already has:\n393\t\n394\t- `extension.toml` `resources.tools`\n395\t- `resources.publish.families = [\"tools\"]`\n396\t- resource projection into daemon tool records\n397\t- extension health/status infrastructure\n398\t- Host API capability checks\n399\t\n400\tThis TechSpec extends that by adding backend metadata and runtime projection:\n401\t\n402\t```toml\n403\t[resources.tools.search]\n404\tdescription = \"Search Linear issues\"\n405\tread_only = true\n406\tbackend.kind = \"mcp\"\n407\tbackend.server = \"linear\"\n408\tbackend.tool = \"search\"\n409\ttoolsets = [\"linear__read\"]\n410\t```\n411\t\n412\tCanonical ID is derived by the registry from the trusted source context, for example:\n413\t\n414\t```text\n415\text__linear__search\n416\t```\n417\t\n418\tExtensions must not freely impersonate `agh__*` or another extension namespace. Raw manifest names remain in `SourceRef`.\n419\t\n420\tExecution boundaries:\n421\t\n422\t- `mcp`: call through MCP client/adapter.\n423\t- `extension_host`: call the running extension sidecar through a typed Host API/protocol method.\n424\t- `subprocess`: launch a bounded subprocess through AGH-owned process supervision.\n425\t- `bridge`: future bridge adapter.\n426\t- `builtin`: reserved for AGH-owned daemon code only.\n427\t\n428\tNo in-process third-party extension handlers in MVP.\n429\t\n430\t### MCP Sources\n431\t\n432\tMCP-backed tools use:\n433\t\n434\t```text\n435\tmcp__\u003cserver\u003e__\u003ctool\u003e\n436\t```\n437\t\n438\tThe registry must preserve raw server/tool names in `SourceRef`. Sanitization collisions fail closed and mark the candidate tool `conflicted`.\n439\t\n440\tAGH-managed MCP sources come first in MVP. Client-supplied ACP `mcpServers` can be a later compatibility extension only if they become session-scoped tool sources with explicit source trust and collision handling.\n441\t\n442\t### Hooks\n443\t\n444\tExisting tool hook concepts become part of central registry dispatch:\n445\t\n446\t- `tool.pre_call`\n447\t- `tool.post_call`\n448\t- `tool.post_error`\n449\t\n450\tPayloads should use canonical `tool_id`. Existing `tool_name` / `tool_namespace` usage should be replaced in the registry path to avoid dual identity.\n451\t\n452\t### Skills\n453\t\n454\tBuilt-in skill tools prove progressive disclosure:\n455\t\n456\t- `agh__skill_list`\n457\t- `agh__skill_search`\n458\t- `agh__skill_view`\n459\t\n460\tThese call into `internal/skills.Registry`, respect workspace overlays, reuse content verification, and enforce result budgets. Install/remove/update tools are intentionally out of MVP unless supply-chain policy/scanning is expanded.\n461\t\n462\t### Network And Tasks\n463\t\n464\tMVP includes:\n465\t\n466\t- `agh__network_peers`\n467\t- `agh__network_send`\n468\t- bounded `agh__task_*` tools\n469\t\n470\tThese must route through existing network/task services and existing authorization rules. Mutating tools must not be classified as read-only.\n471\t\n472\t## Extensibility Integration Plan\n473\t\n474\t### Extension Manifests\n475\t\n476\tUpdate extension manifest tool declarations with optional runtime metadata:\n477\t\n478\t- `backend.kind`\n479\t- `backend.server`\n480\t- `backend.tool`\n481\t- `backend.command`\n482\t- `backend.args`\n483\t- `requires_env`\n484\t- `required_capabilities`\n485\t- `risk`\n486\t- `destructive`\n487\t- `open_world`\n488\t- `requires_interaction`\n489\t- `max_result_bytes`\n490\t- `toolsets`\n491\t- `tags`\n492\t- `visibility`\n493\t\n494\tExtension-published descriptors are installed through existing resource publication, then normalized by the registry provider.\n495\t\n496\t### Hooks\n497\t\n498\tAdd or update hook payload schemas to include:\n499\t\n500\t- `tool_id`\n501\t- `display_title`\n502\t- `source`\n503\t- `risk`\n504\t- `read_only`\n505\t- `destructive`\n506\t- `open_world`\n507\t- `session_id`\n508\t- `workspace_id`\n509\t- `decision`\n510\t- `reason_codes`\n511\t- `input_digest`\n512\t- `result_digest`\n513\t\n514\tHooks can deny or patch only through typed return contracts. They cannot raise permissions above ACP/session policy.\n515\t\n516\t### Skills, Tools, Resources, Bundles\n517\t\n518\tAdd toolsets as named resources/config entries:\n519\t\n520\t- built-in `agh__bootstrap`\n521\t- built-in `agh__catalog`\n522\t- built-in `agh__coordination`\n523\t- built-in `agh__tasks`\n524\t- extension-provided toolsets such as `linear__read`\n525\t\n526\tBundles may include toolsets in the future, but must expand to concrete `ToolID`s during session projection to preserve lineage narrowing.\n527\t\n528\t### Bridge SDKs\n529\t\n530\tNo direct bridge SDK execution in MVP. The registry design reserves a `bridge` backend kind, but the adapter is not required until bridge-managed tools need execution.\n531\t\n532\t### AGH Network\n533\t\n534\tNo remote peer tool execution in MVP. Peer discovery may later advertise loaded toolsets or tool summaries, but remote dispatch requires a separate trust and authorization design.\n535\t\n536\t### Docs For Extension Authors\n537\t\n538\tAdd docs covering:\n539\t\n540\t- canonical `ToolID` rules;\n541\t- extension tool manifest shape;\n542\t- backend kinds;\n543\t- why third-party handlers are out-of-process;\n544\t- result budgets and redaction;\n545\t- availability reason codes;\n546\t- how to debug conflicted/unavailable tools;\n547\t- CLI/HTTP/UDS management paths.\n548\t\n549\t## Agent Manageability Plan\n550\t\n551\tAgents must be able to inspect and operate the registry without the web UI.\n552\t\n553\tCLI:\n554\t\n555\t- `agh tool list -o json`\n556\t- `agh tool search \u003cquery\u003e -o json`\n557\t- `agh tool info \u003ctool-id\u003e -o json`\n558\t- `agh tool invoke \u003ctool-id\u003e --input \u003cjson\u003e -o json`\n559\t- `agh toolsets list -o json`\n560\t- `agh toolsets info \u003ctoolset-id\u003e -o json`\n561\t- `agh tool mcp --session \u003cid\u003e`\n562\t\n563\tHTTP and UDS parity:\n564\t\n565\t- Same contract types.\n566\t- Same reason codes.\n567\t- Same redaction.\n568\t- Same policy decisions.\n569\t- UDS is the preferred local machine path for CLI and hosted MCP proxy.\n570\t\n571\tDiscovery behavior:\n572\t\n573\t- Operator surfaces show all registered tools, including unavailable/unauthorized/conflicted entries.\n574\t- Session/model-visible surfaces show only callable tools for that effective session.\n575\t- Dispatch always recomputes policy and availability even if discovery already hid unsafe tools.\n576\t\n577\tDeterministic errors:\n578\t\n579\t- Errors include `code`, `message`, `tool_id`, `reason_codes`, and redacted structured details.\n580\t- Policy errors must identify the denying layer: `system_permission_mode`, `session_lineage`, `agent_policy`, `registry_policy`, `source_policy`, `availability`, or `hook`.\n581\t\n582\tE2E manageability checks:\n583\t\n584\t- CLI list/search/info matches HTTP and UDS for the same workspace/session.\n585\t- Hosted MCP `tools/list` equals `GET /api/sessions/{id}/tools`.\n586\t- Denied tools are visible in operator list but absent from session projection.\n587\t- Extension-installed tool descriptor appears after install and disappears after disable/remove.\n588\t\n589\t## Config Lifecycle\n590\t\n591\t### Global `config.toml`\n592\t\n593\tAdd:\n594\t\n595\t```toml\n596\t[tools]\n597\tenabled = true\n598\thosted_mcp_enabled = true\n599\tdefault_max_result_bytes = 262144\n600\t\n601\t[tools.policy]\n602\texternal_default = \"disabled\"\n603\t```\n604\t\n605\tSemantics:\n606\t\n607\t- `tools.enabled=false` disables AGH-owned registry dispatch and hosted MCP exposure, but operator diagnostics can still show static resources where safe.\n608\t- `hosted_mcp_enabled=true` allows AGH to inject/offer the local hosted MCP proxy for sessions.\n609\t- `default_max_result_bytes` applies when a descriptor does not specify a smaller limit.\n610\t- `external_default=\"disabled\"` means extension/MCP/dynamic executable tools are registered and operator-visible, but not session-callable until enabled by explicit policy/toolset grants. Built-in AGH bootstrap tools remain enabled by default subject to ACP/session policy.\n611\t\n612\tAllowed `external_default` values:\n613\t\n614\t- `disabled`\n615\t- `ask`\n616\t- `enabled`\n617\t\n618\tMVP default is `disabled`.\n619\t\n620\t### Agent Definitions\n621\t\n622\tKeep the existing `tools` field but harden its meaning:\n623\t\n624\t- `tools`: exact canonical `ToolID`s or approved wildcard patterns.\n625\t- `toolsets`: named toolset IDs.\n626\t- `deny_tools`: exact IDs or patterns that always narrow permissions.\n627\t\n628\tSession lineage should persist concrete resolved `ToolID` atoms, not broad unresolved wildcard patterns. Child session permissions must remain subsets of parent session permissions.\n629\t\n630\t### Validation\n631\t\n632\tConfig validation must reject:\n633\t\n634\t- invalid `ToolID` patterns;\n635\t- unknown toolset IDs when a config is resolved in a concrete workspace;\n636\t- `__` misuse;\n637\t- extension attempts to publish under reserved `agh__*`;\n638\t- global defaults that would expose external tools without source policy support;\n639\t- result byte limits below zero or above a daemon maximum.\n640\t\n641\t### Docs And Generated Surfaces\n642\t\n643\tUpdate:\n644\t\n645\t- CLI docs for `agh tool` and `agh toolsets`;\n646\t- settings docs for `permissions.mode` to clarify ceiling behavior;\n647\t- extension author docs;\n648\t- site docs for Tool Registry architecture;\n649\t- OpenAPI contract and generated web types.\n650\t\n651\t## Impact Analysis\n652\t\n653\t| Component | Impact Type | Description and Risk | Required Action |\n654\t|---|---|---|---|\n655\t| `internal/tools` | Modified/new | Becomes runtime registry owner, not just metadata definitions | Add `ToolID`, descriptors, providers, registry, policy, dispatch |\n656\t| `internal/resources` | Modified | Cold tool resource remains desired state but must carry canonical ID/source metadata | Update codecs, validators, tests |\n657\t| `internal/extension` | Modified | Extension tools gain backend metadata and canonical runtime projection | Extend manifest types, validation, publication tests |\n658\t| `internal/mcp` | Modified/new | Hosted MCP proxy exposes registry tools; MCP adapter normalizes external tools | Add MCP list/call bridge through UDS/registry |\n659\t| `internal/acp` | Modified | Session creation/load must include hosted AGH MCP where applicable; permission mode becomes registry ceiling | Wire session projection and maintain current permission semantics |\n660\t| `internal/store` | Modified | Session lineage `Tools` atoms become canonical resolved `ToolID`s | Validate IDs and preserve subset checks |\n661\t| `internal/hooks` | Modified | Tool hook payloads should use canonical `tool_id` | Update payloads, matchers, docs, tests |\n662\t| `internal/api/contract` | New/modified | Shared DTOs for tools/toolsets/calls/errors | Add contract types and codegen |\n663\t| `internal/api/core` | New/modified | Transport-independent tool handlers | Implement list/search/info/invoke/session projection |\n664\t| `internal/api/httpapi` | Modified | Register HTTP routes | Thin transport registration only |\n665\t| `internal/api/udsapi` | Modified | Register UDS routes | Thin transport registration only |\n666\t| `internal/cli` | Modified | Agent-manageable `agh tool` and `agh toolsets` commands | Add structured output and UDS client methods |\n667\t| `internal/skills` | Modified | Skill list/search/view tools call into skills registry | Expose safe service methods if needed |\n668\t| `internal/network` | Modified | Network tools call peers/send through existing service | Ensure mutating calls enforce policy |\n669\t| `internal/task` | Modified | Bounded task tools call task service | Keep TaskManager authority model intact |\n670\t| `web/` | Modified | Settings/help surfaces may display registry policy state and tool diagnostics | Only render truthful daemon-backed status |\n671\t| `packages/site` | Modified | Public docs for registry, extension tools, approval ceiling | Add docs and CLI reference updates |\n672\t| `.compozy/tasks/tools-registry/*` | New | Research, ADRs, final TechSpec | Keep analysis paths referenced in later tasks |\n673\t\n674\t## Test Strategy\n675\t\n676\t### Unit Tests\n677\t\n678\tTest:\n679\t\n680\t- `ToolID` validation, parsing, wildcard matching, and collision rejection.\n681\t- External name sanitization and fail-closed collision behavior.\n682\t- `Descriptor` validation and schema size limits.\n683\t- Availability state transitions and reason-code composition.\n684\t- Policy matrix across `deny-all`, `approve-reads`, and `approve-all`.\n685\t- Source defaults for built-in, extension, MCP, and dynamic tools.\n686\t- Agent allow/deny/toolset expansion.\n687\t- Session lineage concrete `ToolID` subset validation.\n688\t- Dispatch pipeline ordering.\n689\t- JSON schema input validation.\n690\t- Result truncation and redaction.\n691\t- Hook deny/patch/result behavior.\n692\t- Extension manifest backend validation.\n693\t\n694\tMocks are acceptable for provider I/O boundaries, but policy/dispatch correctness must be tested with real registry instances.\n695\t\n696\t### Integration Tests\n697\t\n698\tTest:\n699\t\n700\t- Extension manifest declares a tool and it appears in operator registry projection.\n701\t- Disabling/removing an extension removes or marks the tool unavailable.\n702\t- MCP-backed extension tool remains unavailable until backend health/auth passes.\n703\t- Built-in `agh__skill_view` calls real skills registry content.\n704\t- CLI/HTTP/UDS list/search/info parity.\n705\t- `agh tool mcp --session \u003cid\u003e` `tools/list` matches session projection.\n706\t- `approve-reads` exposes read-only tools but blocks mutating tools without approval.\n707\t- `approve-all` does not bypass explicit registry denies or session lineage narrowing.\n708\t- `deny-all` blocks execution while operator surfaces still show diagnostics.\n709\t- Hooks can deny pre-call and redact post-call output.\n710\t- Conflicted tool IDs are operator-visible and session-hidden.\n711\t- Result budget truncation is identical across CLI, HTTP, UDS, and MCP.\n712\t\n713\t### E2E Tests\n714\t\n715\tUse the AGH runtime E2E harness:\n716\t\n717\t- Start daemon with isolated `AGH_HOME`.\n718\t- Create a session with hosted AGH MCP enabled.\n719\t- Verify the agent session receives registry-backed MCP tools.\n720\t- Invoke a safe built-in tool through the hosted MCP path.\n721\t- Verify events, telemetry, CLI status, HTTP status, and UDS status agree.\n722\t- Install a test extension publishing a tool descriptor and MCP backend metadata.\n723\t- Verify operator diagnostics, session visibility, disable/remove lifecycle, and conflict handling.\n724\t\n725\tRun full `make verify` before completing implementation tasks.\n726\t\n727\t## Implementation Steps\n728\t\n729\t### Build Order\n730\t\n731\tOrdered implementation sequence respecting dependencies:\n732\t\n733\t1. Add canonical `ToolID`, descriptor, source, availability, result, and error contracts in `internal/tools` - no dependencies.\n734\t2. Replace metadata-only provider semantics with runtime provider/handle interfaces - depends on step 1.\n735\t3. Implement registry indexing, collision detection, and operator/session projections - depends on steps 1-2.\n736\t4. Implement policy evaluator with ACP ceiling, agent policy, session lineage, source defaults, allow/deny, and toolsets - depends on step 3.\n737\t5. Implement dispatch pipeline with schema validation, availability recheck, hooks, budgets, adapter call, normalization, and telemetry seams - depends on steps 3-4.\n738\t6. Add built-in provider for bootstrap AGH tools: `agh__tool_*`, `agh__skill_*`, `agh__network_*`, bounded `agh__task_*` - depends on step 5.\n739\t7. Add extension manifest backend metadata and descriptor projection into registry - depends on steps 1-3 and existing extension resources.\n740\t8. Add MCP adapter contracts and hosted MCP stdio proxy command `agh tool mcp --session` - depends on steps 3-6.\n741\t9. Add API contract DTOs and `internal/api/core` handlers - depends on steps 3-6.\n742\t10. Wire HTTP, UDS, CLI commands, and UDS client methods - depends on step 9.\n743\t11. Wire hooks and canonical `tool_id` payload updates end-to-end - depends on step 5.\n744\t12. Add config lifecycle, validation, generated docs, CLI docs, and site docs - depends on steps 4, 8, and 10.\n745\t13. Add integration/E2E coverage and run `make verify` - depends on all prior steps.\n746\t\n747\t### Technical Dependencies\n748\t\n749\tBlocking dependencies that must be resolved before implementation:\n750\t\n751\t- Existing ACP `permissions.mode` behavior in `internal/acp/permission.go`.\n752\t- Existing session lineage permission atoms in `internal/store/session_lineage.go`.\n753\t- Existing extension resource publication and `resources.publish.families = [\"tools\"]`.\n754\t- Existing hooks payload system.\n755\t- Existing MCP server configuration/resource lifecycle.\n756\t- Existing skills registry APIs.\n757\t- Existing task/network services.\n758\t- OpenAPI/codegen path for web contract updates.\n759\t\n760\t### Safety Invariants\n761\t\n762\t1. Every AGH-owned tool call enters `internal/tools.Registry.Call`; CLI, HTTP, UDS, hosted MCP, extension, and session paths cannot bypass the registry dispatch pipeline.\n763\t2. Dispatch recomputes availability and `EffectiveToolDecision` at call time, even when discovery already filtered the tool.\n764\t3. ACP `permissions.mode` is always a ceiling. Registry, source, agent, session, and hook policy can narrow authority but cannot raise it above the effective ACP/session mode.\n765\t4. `approve-all` skips approval prompts only for otherwise allowed tools; it does not bypass explicit denies, source grants, session lineage, conflicts, unavailable backends, or hooks.\n766\t5. `approve-reads` applies only to registry-classified read-only tools. Mutating, destructive, open-world, network-send, and task-write tools cannot inherit read approval by display title or ACP kind.\n767\t6. Session lineage stores concrete canonical `ToolID` atoms after toolset expansion; child sessions can only receive a subset of parent concrete atoms.\n768\t7. Tool ID collisions fail closed. A conflicted tool is operator-visible with reason codes and absent from session/model-visible projections.\n769\t8. Extension-installed tools are descriptor-only until an out-of-process backend handle is healthy, available, authorized, and policy-allowed.\n770\t9. Third-party extension tool handlers never run in-process in the daemon during MVP.\n771\t10. Hooks dispatch at the call site and cannot tail event tables, spawn parallel queues, or mutate durable ownership state outside typed hook contracts.\n772\t11. Tool result limiting and redaction run before results cross CLI, HTTP, UDS, MCP, SSE, logs, memory, or event payloads.\n773\t12. Raw `claim_token`, MCP auth tokens, OAuth codes, PKCE verifiers, and secret bindings never appear in tool inputs/outputs persisted or emitted by AGH-owned surfaces.\n774\t13. Hosted MCP `tools/list` is a projection of `GET /api/sessions/{id}/tools`; divergence is a test failure.\n775\t14. Operator-visible diagnostics are not security boundaries. Hidden session projection plus dispatch-time revalidation is the security boundary.\n776\t15. Toolsets are expanded deterministically and cycle-checked before session projection; recursive expansion cannot happen lazily during dispatch.\n777\t\n778\t## Monitoring and Observability\n779\t\n780\tEvents:\n781\t\n782\t- `tool.registry.refresh_started`\n783\t- `tool.registry.refresh_completed`\n784\t- `tool.registered`\n785\t- `tool.updated`\n786\t- `tool.removed`\n787\t- `tool.conflicted`\n788\t- `tool.availability_changed`\n789\t- `tool.policy_evaluated`\n790\t- `tool.call_started`\n791\t- `tool.call_completed`\n792\t- `tool.call_failed`\n793\t- `tool.call_denied`\n794\t- `tool.result_truncated`\n795\t\n796\tRequired fields:\n797\t\n798\t- `tool_id`\n799\t- `display_title`\n800\t- `source_kind`\n801\t- `source_owner`\n802\t- `workspace_id`\n803\t- `session_id`\n804\t- `parent_session_id`\n805\t- `root_session_id`\n806\t- `agent_name`\n807\t- `risk`\n808\t- `read_only`\n809\t- `destructive`\n810\t- `open_world`\n811\t- `approval_mode`\n812\t- `decision`\n813\t- `reason_codes`\n814\t- `duration_ms`\n815\t- `result_bytes`\n816\t- `truncated`\n817\t- `correlation_id`\n818\t\n819\tMetrics:\n820\t\n821\t- registered tools by source kind\n822\t- conflicted tools by source kind\n823\t- available/callable tools by session\n824\t- calls started/completed/failed/denied\n825\t- approval-required counts\n826\t- result truncation counts\n827\t- backend latency by source kind\n828\t- hook-denied counts\n829\t- policy-denied counts\n830\t\n831\tRedaction:\n832\t\n833\t- Never log raw claim tokens, MCP auth tokens, OAuth codes, PKCE verifiers, secret bindings, or full tool payloads when marked sensitive.\n834\t- Use digests for large inputs/results.\n835\t- Preserve enough metadata to debug policy and availability without exposing secrets.\n836\t\n837\t## Technical Considerations\n838\t\n839\t### Key Decisions\n840\t\n841\tDecision: Tool Registry is an AGH daemon runtime service, not an ACP registry.\n842\t\n843\tRationale: ACP has no callable tool registry and relies on MCP for tool discovery.\n844\t\n845\tTrade-off: AGH must maintain its own registry semantics and expose them through MCP/session surfaces.\n846\t\n847\tDecision: Use one canonical `ToolID` with `__` namespace separators.\n848\t\n849\tRationale: Avoid dotted/internal plus wire alias ambiguity and stay compatible with stricter provider naming limits.\n850\t\n851\tTrade-off: `agh__skill_view` is less visually elegant than `agh.skill.view`, but it prevents dual identity bugs.\n852\t\n853\tDecision: Built-ins can execute in-process; extension tools execute out-of-process.\n854\t\n855\tRationale: Daemon safety and extension isolation matter more than plugin convenience.\n856\t\n857\tTrade-off: Extension tool latency and setup are higher, but failure containment is better.\n858\t\n859\tDecision: ACP `permissions.mode` is the approval ceiling.\n860\t\n861\tRationale: Avoid contradictory policy layers and keep existing settings truthful.\n862\t\n863\tTrade-off: Registry policy must compute effective decisions rather than simple allow/deny flags.\n864\t\n865\tDecision: Operator and session projections differ.\n866\t\n867\tRationale: Operators need diagnostics; models should see only callable tools.\n868\t\n869\tTrade-off: More projection logic, but less model confusion and fewer unsafe calls.\n870\t\n871\tDecision: Keep cold resource records separate from runtime handles.\n872\t\n873\tRationale: Resource/projector system is good for installed metadata; executable dispatch needs function/protocol handles and live health.\n874\t\n875\tTrade-off: More types, but cleaner lifecycle and safer extension support.\n876\t\n877\t### Known Risks\n878\t\n879\tRisk: Mutating tools mislabeled as read-only.\n880\t\n881\tMitigation: Descriptor validation, review tests, policy matrix tests, and hook/audit visibility.\n882\t\n883\tRisk: External tool name collisions force late breaking changes.\n884\t\n885\tMitigation: Enforce canonical `ToolID` grammar and fail-closed collision handling in MVP.\n886\t\n887\tRisk: Hosted MCP path diverges from CLI/HTTP/UDS behavior.\n888\t\n889\tMitigation: Hosted MCP proxies through UDS into the same registry dispatch path.\n890\t\n891\tRisk: Extension tools become visible before safe backend execution exists.\n892\t\n893\tMitigation: Operator-visible only until availability, source policy, and backend handle all pass.\n894\t\n895\tRisk: `approve-all` is misunderstood as \"execute everything.\"\n896\t\n897\tMitigation: Docs and UI copy must clarify it auto-approves otherwise allowed calls; it does not bypass explicit denies, source grants, availability, lineage, or hooks.\n898\t\n899\tRisk: Toolsets create ambiguous policy language.\n900\t\n901\tMitigation: Keep `tools` and `toolsets` as separate typed fields; expand toolsets to concrete `ToolID`s for session lineage.\n902\t\n903\tRisk: Result payloads leak secrets or overwhelm context.\n904\t\n905\tMitigation: Central result limiter, redaction metadata, digesting, and output budgets.\n906\t\n907\t### Delete Targets\n908\t\n909\tBecause AGH is greenfield alpha, the implementation should hard-cut ambiguous old tool concepts instead of adding compatibility bridges:\n910\t\n911\t- Replace metadata-only `ToolProvider.Tools(ctx)` as the runtime extension point with provider/descriptor/handle contracts.\n912\t- Replace `internal/tools.Tool.Name` as a registry identity with canonical `ToolID` in new public contracts.\n913\t- Remove any new public use of dotted tool IDs or separate MCP wire aliases.\n914\t- Replace hook policy identity based on `tool_name` + `tool_namespace` with canonical `tool_id` for registry-owned tool calls.\n915\t- Update docs, tests, CLI/API examples, and task artifacts that refer to dotted IDs or dual aliasing.\n916\t\n917\t## Architecture Decision Records\n918\t\n919\t- [ADR-001: Extension Tool Execution Boundary](adrs/adr-001-extension-tool-execution-boundary.md) - extension tools are manifest-first and out-of-process in MVP.\n920\t- [ADR-002: Session Tool Exposure Path](adrs/adr-002-session-tool-exposure-path.md) - expose AGH registry tools through hosted local MCP plus shared CLI/HTTP/UDS.\n921\t- [ADR-003: Runtime Registry Package Boundary](adrs/adr-003-runtime-registry-package-boundary.md) - `internal/tools` owns runtime registry and dispatch; `internal/catalog` remains thin.\n922\t- [ADR-004: MVP Native Tool Scope](adrs/adr-004-mvp-native-tool-scope.md) - bootstrap catalog/skill tools plus selected network/task tools.\n923\t- [ADR-005: ACP Approval Policy Integration](adrs/adr-005-acp-approval-policy-integration.md) - ACP approval mode is the system/session ceiling.\n924\t- [ADR-006: Tool Visibility By Surface](adrs/adr-006-tool-visibility-by-surface.md) - operator projections show diagnostics; model projections show callable tools only.\n925\t- [ADR-007: Canonical Tool ID Format](adrs/adr-007-canonical-tool-id-format.md) - one provider-safe `ToolID` using reserved `__` namespace separators.\n926\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:19.804772Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:19.80496Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01XmFCbuKDbHVs6mejqedYhX","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01XmFCbuKDbHVs6mejqedYhX","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:19.821672Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01XmFCbuKDbHVs6mejqedYhX","blocks":[{"type":"tool_use","id":"toolu_01XmFCbuKDbHVs6mejqedYhX","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/synthesis.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/synthesis.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/synthesis.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:19.826013Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01VQuL5dzVH1hGakMuVDWQPe","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01VQuL5dzVH1hGakMuVDWQPe","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:20.134442Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01VQuL5dzVH1hGakMuVDWQPe","blocks":[{"type":"tool_use","id":"toolu_01VQuL5dzVH1hGakMuVDWQPe","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_agh_current_state.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:20.155428Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:20.158963Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01VQuL5dzVH1hGakMuVDWQPe","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:20.159985Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01XmFCbuKDbHVs6mejqedYhX","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:20.162219Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01VQuL5dzVH1hGakMuVDWQPe","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01VQuL5dzVH1hGakMuVDWQPe","content":"```\n1\t# AGH Current State: Tool Registry Foundation\n2\t\n3\t## Overview\n4\t\n5\tAGH already has the cold side of a tool catalog, but not the runtime side.\n6\t`internal/tools` defines a canonical `tool` resource shape and the daemon projects tool records through the generic resources system. Extensions can publish static tool metadata from `extension.toml`. Sessions can also carry concrete permission atoms for tools in lineage metadata.\n7\t\n8\tWhat is missing is the executable registry: a central service that can answer which tools exist for a specific agent/session, whether each tool is available now, whether the caller is allowed to use it, and how to dispatch the call through one uniform pipeline.\n9\t\n10\tThis matters because AGH's product premise is agent-first manageability. A tool registry is incomplete if it only helps internal Go code list metadata or if each ACP runtime owns a private tool universe that AGH cannot inspect, govern, or extend.\n11\t\n12\t## Existing Mechanisms\n13\t\n14\t### Tool resource metadata\n15\t\n16\t`internal/tools/tool.go` defines `ToolSource` values for `builtin`, `mcp`, `extension`, and `dynamic`, plus a small `Tool` record:\n17\t\n18\t- `Name`\n19\t- `Description`\n20\t- `InputSchema`\n21\t- `ReadOnly`\n22\t- `Source`\n23\t\n24\tThe only provider interface is:\n25\t\n26\t```go\n27\ttype ToolProvider interface {\n28\t Tools(ctx context.Context) ([]Tool, error)\n29\t}\n30\t```\n31\t\n32\tThere is no `Call`, `Availability`, `CheckPermission`, `Aliases`, `Namespace`, `IsConcurrencySafe`, `IsDestructive`, `MaxResultBytes`, `Owner`, `Visibility`, or provenance-rich source metadata.\n33\t\n34\t### Desired-state resources\n35\t\n36\t`internal/tools/resource.go` defines `ToolResourceKind = \"tool\"` and validates tool records as JSON-object specs with a maximum size of 256 KiB. This is a good base for persisted inventory, desired-state reconciliation, and extension-published tool metadata.\n37\t\n38\tThe daemon already has a generic `resourceCatalog[T]` in `internal/daemon/tool_mcp_resources.go` and a `newToolProjector` that projects reconciled `tool` records into a daemon-local snapshot. This catalog is descriptive and revisioned, but it is not executable.\n39\t\n40\t### Extension-published tools\n41\t\n42\t`internal/extension/manifest.go` already lets extensions declare:\n43\t\n44\t```go\n45\ttype ResourcesConfig struct {\n46\t Tools map[string]ToolConfig `toml:\"tools,omitempty\" json:\"tools,omitempty\"`\n47\t MCPServers map[string]MCPServerConfig `toml:\"mcp_servers,omitempty\" json:\"mcp_servers,omitempty\"`\n48\t}\n49\t```\n50\t\n51\t`ToolConfig` carries description, input schema, and read-only status. `ResolveManifestToolResources` converts these manifest entries into `toolspkg.Tool` records with `Source = ToolSourceExtension`. The daemon syncer publishes them into the resource graph with source keys like `extension/\u003cname\u003e/tool/\u003ctool\u003e`.\n52\t\n53\tThis is close to OpenClaw's manifest-first model, but AGH currently stops at metadata. A manifest-declared extension tool is not callable unless some separate ACP/MCP/runtime surface happens to expose it.\n54\t\n55\t### MCP resources\n56\t\n57\tThe same extension manifest can declare MCP servers, and the daemon sync path resolves them into desired-state MCP server resources. Skills can also declare MCP sidecars. This gives AGH a strong candidate adapter for extension tools: manifest tools may be backed by an MCP server, an extension sidecar Host API endpoint, or native AGH built-ins, but all should normalize into one registry contract.\n58\t\n59\t### Session permission atoms\n60\t\n61\t`internal/store/session_lineage.go` defines `SessionPermissionPolicy` with concrete atoms:\n62\t\n63\t- `Tools`\n64\t- `Skills`\n65\t- `MCPServers`\n66\t- `WorkspacePaths`\n67\t- `NetworkChannels`\n68\t- `SandboxProfiles`\n69\t\n70\t`internal/session/spawn.go` validates child permissions as a subset of parent permissions. This is an important base for runtime tool policy because it is already persisted with session lineage and already participates in spawn delegation.\n71\t\n72\tThe current agent definition has a flat `Tools []string` field in `internal/config/agent.go`. It lacks allow/deny overlays, named toolsets, namespace patterns, visibility tiers, and risk classes.\n73\t\n74\t### Hooks around tool calls\n75\t\n76\t`internal/hooks/payloads.go` already defines `ToolPreCallPayload`, `ToolPostCallPayload`, `ToolPostErrorPayload`, `ToolCallPatch`, and `ToolResultPatch`. This is the right policy extension point for a centralized dispatch pipeline:\n77\t\n78\t- pre-call hooks can deny or mutate input\n79\t- post-call hooks can redact or mutate output\n80\t- post-error hooks can classify or recover failures\n81\t\n82\tThe current gap is that AGH does not have a single dispatch pipeline that all AGH-owned tools must pass through.\n83\t\n84\t### Skills registry contrast\n85\t\n86\t`internal/skills.Registry` is much more mature than tools. It has global snapshots, workspace overlays, content loading, verification, install provenance, and `GlobalVersion()` for invalidation. Skills are injected as a static prompt catalog at session start, while tool resources are not exposed as a session-callable registry.\n87\t\n88\tThe Tool Registry should copy the skills registry's useful properties where they fit: global/workspace overlays, versioned snapshots, resource projection, progressive disclosure, and explicit content/schema loading.\n89\t\n90\t## Gaps\n91\t\n92\t1. No central runtime registry that owns executable tool handles.\n93\t2. No agent-facing discovery API for tools.\n94\t3. No native AGH tools such as `agh__tool_search`, `agh__skill_view`, or `agh__network_send`.\n95\t4. No availability model for env vars, binaries, MCP health, extension health, workspace scope, or policy state.\n96\t5. No central permission pipeline for AGH-owned tools.\n97\t6. No namespace or structured provenance model, so duplicate names would be ambiguous.\n98\t7. No toolsets or bundles comparable to skills/capabilities.\n99\t8. No direct extension execution boundary for manifest-declared tools.\n100\t9. No consistent way to expose tools over CLI, HTTP, UDS, and session-visible agent surfaces.\n101\t10. No usage telemetry by tool or skill.\n102\t11. No result-size budget, persistence policy, redaction, or output mapping at registry level.\n103\t12. The `dynamic` source enum exists but has no producer.\n104\t\n105\t## Relevant Code Paths\n106\t\n107\t- `internal/tools/tool.go:14-136`: tool source enum, metadata-only `Tool`, and list-only `ToolProvider`.\n108\t- `internal/tools/resource.go:13-61`: `ToolResourceKind` codec and JSON schema validation.\n109\t- `internal/daemon/tool_mcp_resources.go:20-122`: generic daemon `resourceCatalog` and `newToolProjector`.\n110\t- `internal/daemon/tool_mcp_resources.go:620-640`: extension manifest tools are projected into desired-state resources.\n111\t- `internal/extension/manifest.go:55-62`: extension resources can include tools and MCP servers.\n112\t- `internal/extension/manifest.go:154-160`: extension `ToolConfig` is static metadata only.\n113\t- `internal/extension/resource_publication.go:13-31`: manifest tool declarations become `toolspkg.Tool` records.\n114\t- `internal/config/agent.go:14-23`: `AgentDef.Tools []string` is flat.\n115\t- `internal/store/session_lineage.go:31-39`: session lineage has concrete `Tools` permission atoms.\n116\t- `internal/session/interfaces.go:244-251`: `AgentDriver` has no catalog-delta or AGH tool injection extension.\n117\t- `internal/hooks/payloads.go:520-568`: tool pre/post/error payloads already exist.\n118\t- `internal/skills/registry.go:100-103`: skills expose a global version suitable for catalog delta detection.\n119\t- `.compozy/tasks/autonomous/analysis/analysis_skills_tools_registry.md`: prior autonomy gap analysis with G1-G12 and P1-P9 proposals.\n120\t- `.compozy/tasks/hermes/analysis/analysis_tools_security.md`: security gaps relevant once AGH exposes agent-callable tools.\n121\t\n122\t## Design Constraints for the TechSpec\n123\t\n124\tThe Tool Registry should be a foundation, not a pile of built-in commands. It should define the contracts, policy path, extension boundary, and surfaces first, then add a small bootstrap set of native AGH tools to prove the system.\n125\t\n126\tAGH should avoid copying in-process plugin patterns from Python/TypeScript systems. Third-party executable tools should cross a process/protocol boundary: MCP, extension sidecar Host API, subprocess adapter, or future bridge SDK. Built-in Go tools can register in-process because they are part of the daemon binary.\n127\t\n128\tThe cold `tool` resource should remain valuable as catalog metadata and desired state, but executable dispatch must be modeled separately. A manifest-declared tool can be installed and discoverable while still being unavailable until its backend is healthy and authorized.\n129\t\n130\tAvailability and authorization must both be rechecked at dispatch time. Hiding unavailable tools from discovery is useful, but it is not a security boundary.\n131\t\n132\t## Open Questions\n133\t\n134\t1. Should extension tools be executable in the MVP, or should MVP only make them discoverable with an explicit unavailable state?\n135\t2. If executable, should extension tools be allowed only through MCP/sidecar boundaries, or should trusted bundled extensions get in-process handlers?\n136\t3. Should the first AGH-native tool surface be injected into ACP sessions, exposed as an MCP server hosted by AGH, or exposed only through CLI/UDS/HTTP for drivers to call indirectly?\n137\t4. Should `internal/catalog` coordinate tools and skills, or should `internal/tools` own runtime tools while a thinner catalog/search service composes skills and tools?\n138\t5. Which visibility tiers are needed for MVP: internal, CLI/HTTP, agent-visible, model-visible, deferred-discoverable, extension-private?\n139\t\n140\t## Evidence\n141\t\n142\t- `internal/tools/tool.go:91-136`: current `Tool` and `ToolProvider` are descriptive and list-only.\n143\t- `internal/tools/resource.go:13-61`: tool resource codec validates metadata records.\n144\t- `internal/extension/manifest.go:55-62`: extension manifests can publish `resources.tools`.\n145\t- `internal/extension/resource_publication.go:13-31`: extension manifest tools become static tool resources.\n146\t- `internal/daemon/tool_mcp_resources.go:620-640`: daemon sync publishes extension tools and MCP servers into resource desired state.\n147\t- `internal/store/session_lineage.go:31-39`: session permission policy already includes `Tools`.\n148\t- `internal/hooks/payloads.go:520-568`: tool lifecycle hooks are already typed.\n149\t- `.compozy/tasks/autonomous/analysis/analysis_skills_tools_registry.md:1-220`: prior gap analysis identifies no runtime tool registry, no discovery API, no availability, and no agent-facing skill/tool call surface.\n150\t- `.compozy/tasks/hermes/analysis/analysis_tools_security.md:1-140`: security analysis warns that URL-capable, command-capable, MCP, and skill-install surfaces require stronger guardrails before broad exposure.\n151\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:20.164253Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01XmFCbuKDbHVs6mejqedYhX","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01XmFCbuKDbHVs6mejqedYhX","content":"```\n1\t# Tool Registry Synthesis and Proposed Direction\n2\t\n3\t## Executive Summary\n4\t\n5\tAGH should build a Tool Registry as a daemon-owned runtime service that composes tool metadata, availability, policy, execution, hooks, telemetry, and extension adapters.\n6\t\n7\tThe foundation should not be \"add many built-in tools.\" The foundation should be:\n8\t\n9\t1. a first-class runtime tool contract,\n10\t2. a registry that aggregates built-ins, MCP, extensions, and dynamic providers,\n11\t3. a policy/availability projection for each agent/session,\n12\t4. a single dispatch pipeline,\n13\t5. an extension-safe execution boundary,\n14\t6. agent-manageable CLI/HTTP/UDS/session surfaces,\n15\t7. toolsets/bundles comparable to skills.\n16\t\n17\tThe ACP inventory found `.resources/openfang` present, but with no meaningful ACP evidence.\n18\t\n19\t## Recommended Architecture\n20\t\n21\t### Accepted decisions so far\n22\t\n23\t- Extension tool execution boundary: manifest-first descriptors with out-of-process execution only for extension tools in the MVP.\n24\t- Session exposure path: AGH-hosted local MCP server plus shared CLI/HTTP/UDS contracts.\n25\t- Package boundary: `internal/tools` owns runtime registry contracts and dispatch; a thin `internal/catalog` facade composes tools and skills for cross-domain discovery.\n26\t- MVP native tool scope: bootstrap catalog/skill tools plus network and bounded task tools (`agh__tool_*`, `agh__skill_*`, `agh__network_peers`, `agh__network_send`, `agh__task_*`).\n27\t- Policy integration: existing ACP `permissions.mode` is the system/session approval ceiling; registry policy is a granular layer below it and cannot silently grant more authority than ACP policy allows.\n28\t- Visibility by surface: operator surfaces show unavailable/unauthorized/conflicted tools with reason codes; session/model-visible surfaces expose only tools callable in that effective session context.\n29\t- Naming/collision policy: one canonical public `ToolID` uses provider-safe lower snake segments separated by reserved `__`, for example `agh__skill_view` and `mcp__github__create_issue`; this is captured in ADR-007.\n30\t\n31\t### ACP compatibility finding\n32\t\n33\tACP does not define a durable callable-tool registry. It defines session lifecycle, `mcpServers` bootstrap fields, client authority callbacks, permission requests, and observable tool-call events. ACP `ToolCall` records have `toolCallId`, human-readable `title`, coarse `kind`, status, locations, raw input/output, and content, but no programmatic tool `name` equivalent to MCP `Tool.name`.\n34\t\n35\tThis means the Tool Registry should remain an AGH daemon/runtime service. Session exposure should use the accepted AGH-hosted MCP path, where AGH exposes the canonical `ToolID` directly as the hosted MCP `Tool.name`.\n36\t\n37\tAccepted identity format:\n38\t\n39\t- `ToolID`: stable provider-safe id with reserved `__` namespace separators, such as `agh__skill_view`.\n40\t- `DisplayTitle`: human-readable and non-unique.\n41\t- `SourceRef`: structured provenance, not inferred only from prefixes.\n42\t\n43\tCollision handling must be fail-closed. Canonical ID collisions are provider registration errors or operator diagnostics. Sanitized external-name collisions make the affected tools unavailable to that session until resolved. Display title collisions are allowed because titles are not policy identities.\n44\t\n45\t### 1. Split descriptor, runtime handle, and resource record\n46\t\n47\tKeep the existing `internal/tools.Tool` resource shape as the cold catalog/desired-state record, but introduce a runtime contract with separate types:\n48\t\n49\t- `ToolID`: stable provider-safe id such as `agh__skill_view`, `mcp__github__create_issue`, `ext__linear__search`.\n50\t- `Descriptor`: identity, description, input schema, optional output schema, read-only/destructive/open-world/concurrency metadata, source/provenance, visibility, tags, owner, result budget.\n51\t- `Handle`: descriptor plus `Availability(ctx, ToolContext)` and `Call(ctx, ToolCall)` for executable tools.\n52\t- `Provider`: contributes descriptors/handles and can refresh.\n53\t- `Registry`: owns provider registration, indexing, listing, search, policy projection, and dispatch.\n54\t- `ToolResult`: structured output, preview, artifacts, redactions, bytes, display title, metadata.\n55\t\n56\tThis avoids overloading the desired-state resource with function pointers while still allowing resource records to feed the runtime registry.\n57\t\n58\t### 2. Use manifest-first extension tools\n59\t\n60\tExtension manifests should continue to declare tool metadata statically. Add enough metadata to connect the declaration to a backend:\n61\t\n62\t- backend kind: `mcp`, `extension_host`, `subprocess`, or `builtin` where appropriate;\n63\t- namespace/owner;\n64\t- visibility;\n65\t- risk class;\n66\t- required config/env/capabilities;\n67\t- optional toolset memberships.\n68\t\n69\tThe registry can list these tools without executing extension code. A tool becomes executable only when its backend adapter is healthy, authorized, and has a callable handle.\n70\t\n71\tRecommendation for MVP: no in-process third-party handlers. Built-in Go tools can register in-process. Extension tools should execute through MCP or an extension sidecar/Host API adapter.\n72\t\n73\t### 3. Make availability a state machine, not a boolean\n74\t\n75\tUse explicit status:\n76\t\n77\t- `registered`: descriptor exists.\n78\t- `enabled`: operator/session policy has not disabled it.\n79\t- `available`: dependencies are present and backend is healthy.\n80\t- `authorized`: caller policy permits visibility/use.\n81\t- `executable`: there is a live handle for dispatch.\n82\t- `conflicted`: id/name collision requires resolution.\n83\t\n84\tDiscovery can hide unavailable/unauthorized tools from agents while operator surfaces show reasons. Dispatch must recheck availability and authorization.\n85\t\n86\tThe registry should expose separate operator and session projections. The operator projection includes diagnostics, source/provenance, policy reasons, availability reasons, and conflicts. The session projection powers hosted MCP and future driver injection and includes only tools that pass effective visibility/execution gates for that session.\n87\t\n88\t### 4. Centralize dispatch\n89\t\n90\tEvery AGH-owned tool call should pass through:\n91\t\n92\t1. resolve tool id/alias in context,\n93\t2. validate input against schema,\n94\t3. compute availability,\n95\t4. evaluate policy and session permission atoms,\n96\t5. run `tool.pre_call` hooks,\n97\t6. enforce concurrency/rate/result budgets,\n98\t7. call provider adapter,\n99\t8. normalize result,\n100\t9. redact/truncate/persist output,\n101\t10. run `tool.post_call` or `tool.post_error` hooks,\n102\t11. emit telemetry.\n103\t\n104\tNo CLI, HTTP, UDS, MCP, extension, or session path should bypass this pipeline.\n105\t\n106\t### 5. Model policy as overlays\n107\t\n108\tUse one policy engine that combines:\n109\t\n110\t- system/session ACP `permissions.mode`,\n111\t- daemon defaults,\n112\t- workspace config,\n113\t- extension grants,\n114\t- agent definition,\n115\t- session lineage `SessionPermissionPolicy.Tools`,\n116\t- skill/command scoped grants where relevant,\n117\t- explicit allow/deny patterns,\n118\t- named toolsets,\n119\t- risk defaults.\n120\t\n121\tToolsets should be recursive resources/config entries. This copies Hermes' strongest idea while fitting AGH's resource model.\n122\t\n123\tThe registry must not create a second approval system that contradicts ACP. `approve-all` removes automatic approval prompts for otherwise allowed tools, but explicit registry denies, source grants, session lineage restrictions, availability failures, and hooks still apply. `approve-reads` auto-approves only registry-classified read-only tools. `deny-all` denies execution by default and requires an explicit approval path.\n124\t\n125\t### 6. Provide a small bootstrap native toolset\n126\t\n127\tThe TechSpec should not enumerate every future AGH tool. It should require a small proving set:\n128\t\n129\t- `agh__tool_list`\n130\t- `agh__tool_search`\n131\t- `agh__tool_info`\n132\t- `agh__skill_list`\n133\t- `agh__skill_view`\n134\t\n135\tOptional later groups:\n136\t\n137\t- `agh__skill_install`\n138\t- `agh__network_peers`\n139\t- `agh__network_send`\n140\t- `agh__task_*`\n141\t- `agh__extension_*`\n142\t\n143\tThe bootstrap set proves discovery, schema loading, skill body loading, policy, result budget, and telemetry without overcommitting the whole daemon.\n144\t\n145\t### 7. Expose agent-manageable surfaces\n146\t\n147\tThe registry should have shared contract types used by:\n148\t\n149\t- CLI: `agh tool list/search/info/invoke`.\n150\t- HTTP: `/api/tools`, `/api/tools/{id}`, `/api/tools/{id}/invoke`.\n151\t- UDS: same operations for local agents and internal tools.\n152\t- Session-visible tool surface: either an AGH-hosted MCP server, driver-specific ACP tool injection where possible, or a fallback where agents can use `agh` CLI/UDS through their runtime.\n153\t\n154\tThe TechSpec should pick one MVP path and keep the others as contract-compatible surfaces.\n155\t\n156\t### 8. Treat Tool Search as provider-neutral\n157\t\n158\tClaude Code's `tool_reference` mechanism is useful but not portable. AGH should implement registry search as a normal catalog operation first:\n159\t\n160\t- search over name, namespace, description, tags, source, toolset, and search hints;\n161\t- return metadata first;\n162\t- load schema/details on demand;\n163\t- optionally persist discovered state per session later.\n164\t\n165\tDriver-specific schema-on-demand integration can be a future enhancement.\n166\t\n167\t### 9. Reuse existing AGH infrastructure\n168\t\n169\tBuild on:\n170\t\n171\t- `internal/tools` for contracts and registry,\n172\t- `internal/resources` for desired-state records,\n173\t- `internal/extension` manifest publication,\n174\t- `internal/hooks` for pre/post/error dispatch gates,\n175\t- `internal/store.SessionPermissionPolicy` for lineage constraints,\n176\t- `internal/skills.Registry` for skill listing/content,\n177\t- `internal/toolruntime` for subprocess ownership if extension tools need process handles,\n178\t- `internal/api/contract` for shared HTTP/UDS payloads.\n179\t\n180\tAvoid a large generic `internal/catalog` at first unless it only coordinates cross-domain search. The runtime tool registry belongs in or near `internal/tools`; a catalog facade can compose tools and skills for `agh__tool_*` / `agh__skill_*`.\n181\t\n182\t## Proposed MVP Scope\n183\t\n184\t### In scope\n185\t\n186\t- Runtime tool registry contract and central dispatch pipeline.\n187\t- Built-in provider for `agh__tool_list`, `agh__tool_search`, `agh__tool_info`, `agh__skill_list`, `agh__skill_search`, `agh__skill_view`, `agh__network_peers`, `agh__network_send`, and a bounded `agh__task_*` set.\n188\t- Resource-backed descriptors from existing `tool` records.\n189\t- Extension manifest backend metadata for future executable extension tools.\n190\t- MCP adapter design, even if full MCP call-through is deferred.\n191\t- Context-specific list/search/info APIs.\n192\t- Tool policy with allow/deny patterns and named toolsets.\n193\t- Availability model and reason codes.\n194\t- Hook integration for pre/post/error.\n195\t- Telemetry events for list/search/info/call and failures.\n196\t- CLI/HTTP/UDS contract surfaces.\n197\t\n198\t### Out of scope for MVP\n199\t\n200\t- Full shell/browser/file tool replacement for ACP runtimes.\n201\t- Provider-specific Anthropic `tool_reference` integration.\n202\t- In-process third-party extension handlers.\n203\t- Large catalog of AGH-native tools beyond the selected catalog/skill/network/task set.\n204\t- Skill install/remove/update tools unless explicitly paired with supply-chain policy/scanning work.\n205\t- Network peer remote tool execution.\n206\t- Marketplace signing/trust overhaul, except for explicit risk hooks needed by extension tools.\n207\t\n208\t## Critical Decisions Before TechSpec\n209\t\n210\t1. Extension execution boundary: out-of-process only, metadata-only first, or trusted in-process handlers.\n211\t2. Session exposure path: hosted MCP, direct ACP extension, CLI/UDS fallback, or all in phases.\n212\t3. Package boundary: runtime registry in `internal/tools` with a catalog facade, or a new `internal/catalog` owning tools and skills together.\n213\t4. MVP tool set: only list/search/info/view, or include mutating install/network/task tools.\n214\t5. Policy defaults: external tools disabled, ask, or visible-but-not-callable until granted.\n215\t6. Availability visibility: hide unavailable tools from agents, show unavailable tools with reasons, or configurable by surface.\n216\t7. Naming/collision policy: accepted in ADR-007. Use one canonical provider-safe `ToolID` with reserved `__` namespace separators, display-only title, structured provenance, and no shadowing or silent sanitized-name collisions.\n217\t\n218\t## Competitor Pattern Matrix\n219\t\n220\t| Pattern | Hermes | Claude Code | GoClaw | OpenClaw | AGH Recommendation |\n221\t|---|---:|---:|---:|---:|---|\n222\t| Single normalized tool contract | Yes | Yes | Yes | Yes | Required |\n223\t| Runtime executable registry | Yes | Distributed | Yes | Yes | Required |\n224\t| Manifest-first extension discovery | Partial | Plugin metadata | Partial | Strong | Required |\n225\t| MCP as adapter | Yes | Strong | Yes | Strong | Required |\n226\t| Availability gating | Strong discovery | `isEnabled` + MCP state | Policy/lazy checks | Lifecycle state | Required at discovery and dispatch |\n227\t| Central dispatch | Mostly | Strong | Mostly | Gateway + adapters | Required with no bypass |\n228\t| Toolsets/bundles | Strong | Policy lists | Groups | Policy groups | Required |\n229\t| Deferred search | Partial | Strong | Search helpers | Partial | Provider-neutral MVP |\n230\t| Concurrency metadata | Partial | Strong | Partial | Partial | Required metadata, scheduling can evolve |\n231\t| Extension in-process handlers | Yes | No native direct tools | Some | Plugin API | Avoid for MVP |\n232\t\n233\t## Implementation Shape to Explore in TechSpec\n234\t\n235\t```go\n236\ttype Descriptor struct {\n237\t ID ToolID\n238\t DisplayName string\n239\t Description string\n240\t InputSchema json.RawMessage\n241\t OutputSchema json.RawMessage\n242\t Source SourceRef\n243\t Visibility Visibility\n244\t Risk RiskClass\n245\t ReadOnly bool\n246\t Destructive bool\n247\t OpenWorld bool\n248\t ConcurrencySafe bool\n249\t MaxResultBytes int64\n250\t Toolsets []string\n251\t Tags []string\n252\t}\n253\t\n254\ttype Handle interface {\n255\t Descriptor() Descriptor\n256\t Availability(ctx context.Context, call ToolContext) Availability\n257\t Call(ctx context.Context, call ToolCall) (ToolResult, error)\n258\t}\n259\t\n260\ttype Provider interface {\n261\t ID() string\n262\t ListTools(ctx context.Context) ([]Descriptor, error)\n263\t Resolve(ctx context.Context, id ToolID) (Handle, bool, error)\n264\t}\n265\t\n266\ttype Registry interface {\n267\t List(ctx context.Context, scope Scope) ([]ToolView, error)\n268\t Search(ctx context.Context, scope Scope, query SearchQuery) ([]ToolView, error)\n269\t Get(ctx context.Context, scope Scope, id ToolID) (ToolView, error)\n270\t Call(ctx context.Context, scope Scope, req CallRequest) (ToolResult, error)\n271\t}\n272\t```\n273\t\n274\tThe exact Go shape can change, but the separation should hold.\n275\t\n276\t## Risks\n277\t\n278\tIf AGH exposes tool invocation before policy and availability are in place, it will create a broader attack surface than today's ACP-delegated tools.\n279\t\n280\tIf extension tools can run in-process, one bad extension can compromise the daemon.\n281\t\n282\tIf the registry only lists resources but does not dispatch, AGH will still lack the \"last mile\" that motivated the feature.\n283\t\n284\tIf the registry only works through one surface, agents will not be able to manage it consistently.\n285\t\n286\tIf name collisions are postponed, extension/MCP adoption will force a breaking change later.\n287\t\n288\t## Evidence Index\n289\t\n290\t- AGH current state: `analysis_agh_current_state.md`.\n291\t- Hermes reference: `analysis_hermes.md`.\n292\t- Claude Code reference: `analysis_claude-code.md`.\n293\t- GoClaw reference: `analysis_goclaw.md`.\n294\t- OpenClaw reference: `analysis_openclaw.md`.\n295\t- Local Claude Code ideas: `analysis_claude_code_ideas.md`.\n296\t- Prior autonomy gaps: `.compozy/tasks/autonomous/analysis/analysis_skills_tools_registry.md`.\n297\t- Security constraints: `.compozy/tasks/hermes/analysis/analysis_tools_security.md`.\n298\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:22.572365Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:22.572687Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01PrfdEugUSbLea5QtKRa8Kf","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01PrfdEugUSbLea5QtKRa8Kf","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:23.768153Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01PrfdEugUSbLea5QtKRa8Kf","blocks":[{"type":"tool_use","id":"toolu_01PrfdEugUSbLea5QtKRa8Kf","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_acp_tool_registry_compatibility.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_acp_tool_registry_compatibility.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_acp_tool_registry_compatibility.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:23.768413Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01AaNV1bhYHtM9YF7s6A5abu","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01AaNV1bhYHtM9YF7s6A5abu","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:23.836426Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01AaNV1bhYHtM9YF7s6A5abu","blocks":[{"type":"tool_use","id":"toolu_01AaNV1bhYHtM9YF7s6A5abu","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/internal/CLAUDE.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/internal/CLAUDE.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/internal/CLAUDE.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:23.890682Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:23.892615Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01PrfdEugUSbLea5QtKRa8Kf","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:23.892808Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01AaNV1bhYHtM9YF7s6A5abu","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:23.894867Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01PrfdEugUSbLea5QtKRa8Kf","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01PrfdEugUSbLea5QtKRa8Kf","content":"```\n1\t# Analysis: ACP Tool Registry Compatibility\n2\t\n3\t## Scope\n4\t\n5\tThis analysis answers whether ACP imposes a tool registry pattern that AGH must follow, and which `.resources/*` projects materially use ACP in ways that affect the Tool Registry TechSpec. The research combines official ACP/MCP documentation with read-only subagent passes over `.resources/rayclaw`, `.resources/harnss`, `.resources/acpx`, `.resources/openclaw`, `.resources/opencode`, and an inventory pass across every top-level `.resources/*` project.\n6\t\n7\t## Executive Conclusion\n8\t\n9\tACP does not define a durable, programmatic tool registry for callable tools. ACP defines session lifecycle, prompt streaming, client authority callbacks, permission requests, MCP server bootstrap fields, and observable tool-call events. Those tool-call events carry `toolCallId`, `title`, `kind`, `status`, locations, raw input, raw output, and content, but they do not carry a stable `name` field equivalent to MCP `Tool.name`.\n10\t\n11\tTherefore, AGH should not model its Tool Registry as an ACP registry, and should not use ACP `title` as a durable policy or collision key.\n12\t\n13\tFor session-visible AGH tools, the strongest compatibility path remains the accepted MVP path: an AGH-hosted MCP server backed by the daemon Tool Registry. MCP supplies the externally callable `Tool.name`; ACP supplies the way ACP-compatible agents receive `mcpServers`, report tool execution, and request permission.\n14\t\n15\tThe practical design correction is:\n16\t\n17\t- AGH should use one canonical provider-safe `ToolID` everywhere, using reserved double-underscore namespace separators, for example `agh__skill_view`, `mcp__github__create_issue`, or `ext__linear__search`.\n18\t- The same `ToolID` should be the hosted MCP `Tool.name`; AGH should not introduce a second wire alias in the MVP.\n19\t- ACP `title` is display-only and event-only.\n20\t- ACP `ToolKind` is a risk/display hint, not registry identity.\n21\t- `permissions.mode` remains the session approval ceiling; registry policy remains the granular layer below it.\n22\t\n23\t## Official Protocol Constraints\n24\t\n25\t### ACP\n26\t\n27\tOfficial ACP schema evidence:\n28\t\n29\t- ACP `session/new`, `session/load`, and `session/resume` include `mcpServers`; agents are expected to connect to those MCP servers for the session. Source: \u003chttps://agentclientprotocol.com/protocol/schema\u003e.\n30\t- ACP `ToolCall` is event/reporting data with `toolCallId`, `title`, `kind`, `status`, `rawInput`, `rawOutput`, `locations`, and `content`. The schema describes `title` as human-readable and `toolCallId` as unique within a session. It does not expose a durable callable `name` field. Source: \u003chttps://agentclientprotocol.com/protocol/schema\u003e.\n31\t- ACP `ToolKind` values are coarse categories such as `read`, `edit`, `delete`, `move`, `search`, `execute`, `think`, `fetch`, `switch_mode`, and `other`. The schema says these help clients pick icons and display progress, which is weaker than registry identity. Source: \u003chttps://agentclientprotocol.com/protocol/schema\u003e.\n32\t- ACP `session/request_permission` carries a `toolCall` object plus permission options. It is a permission bridge for a concrete tool call, not a registry discovery API. Source: \u003chttps://agentclientprotocol.com/protocol/schema\u003e.\n33\t- The official \"ACP Registry\" is an agent registry: a catalog of ACP-compatible agents and their install/run metadata, not a callable tool registry. Source: \u003chttps://agentclientprotocol.com/registry\u003e.\n34\t\n35\t### MCP\n36\t\n37\tOfficial MCP schema evidence:\n38\t\n39\t- MCP `tools/list` returns `Tool[]`.\n40\t- MCP `Tool` has `name`, optional `title`, optional `description`, `inputSchema`, optional `outputSchema`, annotations, execution metadata, and `_meta`.\n41\t- MCP describes `name` as intended for programmatic/logical use and `title` as intended for UI/end-user contexts. Source: \u003chttps://modelcontextprotocol.io/specification/draft/schema\u003e.\n42\t\n43\tImplication: AGH should treat MCP `Tool.name` as the session wire name when exposing AGH registry tools through hosted MCP. ACP does not replace that name.\n44\t\n45\t## ACP Usage Inventory Across `.resources/*`\n46\t\n47\t| Project | ACP usage | Tool registry relevance |\n48\t|---|---|---|\n49\t| `.resources/acpx` | ACP client/orchestrator and conformance tooling. | Has an agent/adapter registry, not a callable tool registry. Passes `mcpServers`; models tool calls by `toolCallId`, title, kind, status, raw input/output. |\n50\t| `.resources/collaborator-ai` | ACP client/orchestrator. | No registry found. Uses ACP tool update titles for display. |\n51\t| `.resources/goclaw` | ACP client/orchestrator. | No formal registry. Uses method switches and permission heuristics for ACP callbacks. |\n52\t| `.resources/harnss` | ACP client/orchestrator with Electron bridge. | Has ACP agent registry and UI rendering adapters, not a tool registry. Converts configured MCP servers to ACP `McpServer[]`. |\n53\t| `.resources/hermes` | ACP server/agent implementation plus ACP client shim. | Relevant: registers ACP-provided MCP servers into Hermes agent state and valid tool names; maps Hermes tools to ACP `ToolKind` and titles. |\n54\t| `.resources/multica` | ACP client/orchestrator. | No formal registry. Parses titles such as `terminal:` and `read:` for UI normalization. |\n55\t| `.resources/openclaw` | ACP server/client/runtime bridge. | Has internal tool catalog and plugin/MCP surfaces, but main ACP bridge does not expose an ACP tool registry and rejects per-session `mcpServers`. |\n56\t| `.resources/opencode` | Native ACP server. | Has a real internal `ToolRegistry`, but ACP does not expose it as a registry API. Accepts ACP `mcpServers` and converts them into internal MCP config. |\n57\t| `.resources/paperclip` | Docs/reference only. | Conceptual ACP references only. |\n58\t| `.resources/rayclaw` | ACP client/orchestrator. | Exposes ACP control as local `acp_*` tools; ACP-reported tool calls are telemetry, not registry entries. |\n59\t| `.resources/sandbox-agent` | ACP adapter/proxy/client package. | Agent launch registry only; no ACP tool registry found. |\n60\t| `.resources/t3code` | ACP schema/client/runtime package. | Schema and runtime tracking for ACP tool events; no broad tool registry. |\n61\t\n62\tNo meaningful ACP evidence was found in `.resources/cc-posts`, `.resources/chat`, `.resources/openfang`, `.resources/pi`, or `.resources/symphony`. `.resources/claude-code` had an `ACP` false positive inside an embedded/base64-like string, not implementation evidence.\n63\t\n64\t## Deep Dives\n65\t\n66\t### RayClaw\n67\t\n68\tRayClaw is an ACP client/orchestrator. It spawns configured ACP agents, runs JSON-RPC lifecycle calls (`initialize`, `session/new`, `session/prompt`, `session/end`), and exposes ACP orchestration to RayClaw's primary LLM through local wrapper tools named `acp_coding`, `acp_new_session`, `acp_prompt`, `acp_end_session`, `acp_list_sessions`, `acp_submit_job`, and `acp_job_status`.\n69\t\n70\tThose `acp_*` names are RayClaw's local tool registry convention, not ACP. RayClaw's ACP tool-call handling treats incoming `session/update` tool calls as observations and records them by title/raw input. It does not dispatch those reported ACP tool calls through RayClaw's local registry.\n71\t\n72\tImportant evidence:\n73\t\n74\t- `.resources/rayclaw/src/acp.rs:436-453` initializes ACP with client capabilities, not a host tool registry.\n75\t- `.resources/rayclaw/src/acp.rs:752-843` handles `session/request_permission`.\n76\t- `.resources/rayclaw/src/acp.rs:846-984` parses ACP tool-call progress.\n77\t- `.resources/rayclaw/src/acp.rs:1521-1531` creates sessions with `mcpServers: []`.\n78\t- `.resources/rayclaw/src/tools/acp.rs:16-40` registers the local `acp_*` wrapper tools.\n79\t- `.resources/rayclaw/tests/acp_integration.rs:128-240` enforces local tool-name uniqueness, allowed characters, length, and collision checks.\n80\t\n81\tTransferable points:\n82\t\n83\t- Separate ACP orchestration tools from normal runtime tools.\n84\t- Treat ACP tool calls as child-agent telemetry unless AGH deliberately bridges them.\n85\t- Do not prefer `allow_always` as an automatic approval default the way RayClaw does under `auto_approve`; AGH should keep durable grants explicit.\n86\t- Reject or disambiguate sanitized name collisions rather than truncating.\n87\t\n88\t### Harnss\n89\t\n90\tHarnss is an ACP client/orchestrator with an Electron bridge and React UI. It converts renderer MCP server configs into ACP SDK `McpServer[]`, including stdio and remote transports, then passes them into `newSession` and `loadSession`. It also supports live reload through ACP `loadSession` when available.\n91\t\n92\tHarnss does not consume or expose an ACP tool registry. Its \"registry\" evidence is an ACP agent registry and a UI-side static MCP renderer table. ACP tool calls are converted into UI messages keyed by `toolCallId`, using title/kind/raw input/output normalization.\n93\t\n94\tImportant evidence:\n95\t\n96\t- `.resources/harnss/electron/src/ipc/acp-sessions.ts:193-215` converts MCP configs to ACP `McpServer[]`.\n97\t- `.resources/harnss/electron/src/ipc/acp-sessions.ts:365-483` wires ACP connection callbacks, event forwarding, and permission bridge.\n98\t- `.resources/harnss/electron/src/ipc/acp-sessions.ts:521-546` starts ACP sessions with MCP servers.\n99\t- `.resources/harnss/electron/src/ipc/acp-sessions.ts:793-828` reloads sessions with MCP servers.\n100\t- `.resources/harnss/src/hooks/useACP.ts:194-337` converts ACP tool events into UI messages.\n101\t- `.resources/harnss/src/hooks/useACP.ts:413-473` handles ACP permission requests.\n102\t- `.resources/harnss/src/lib/engine/acp-adapter.ts:267-358` derives display/tool renderer names from ACP title/kind/raw input.\n103\t- `.resources/harnss/src/components/McpToolContent.tsx:83-138` supports SDK-style `mcp__Server__tool` names and ACP-style `Tool: Server/tool` titles in UI rendering.\n104\t\n105\tTransferable points:\n106\t\n107\t- Normalize ACP event data at the boundary into AGH's canonical tool-call observation model.\n108\t- Preserve raw ACP permission options; do not collapse manual allow/deny into \"first allow\" or \"first reject\" if the protocol provides multiple option IDs.\n109\t- Keep rendering names separate from policy names.\n110\t- Pass MCP servers as session bootstrap/load data when AGH chooses per-session MCP support.\n111\t\n112\t### ACPX\n113\t\n114\tACPX is a headless ACP client/orchestrator. Its registry is an agent/adapter registry mapping names like `codex`, `claude`, `gemini`, and others to launch commands. This is not a callable tool registry.\n115\t\n116\tACPX implements client authority callbacks such as filesystem read/write, terminal create/output/wait/kill/release, and `session/request_permission`. It parses `mcpServers` from config and passes them through to `session/new` and `session/load`. It does not discover or normalize MCP tools into a registry.\n117\t\n118\tImportant evidence:\n119\t\n120\t- `.resources/acpx/src/agent-registry.ts:38-107` maps adapter names to commands.\n121\t- `.resources/acpx/src/mcp-servers.ts:100-177` parses MCP server configs.\n122\t- `.resources/acpx/src/acp/client.ts:475-538` wires ACP client callbacks and initialize capabilities.\n123\t- `.resources/acpx/src/acp/client.ts:638-693` passes `mcpServers` to `session/new` and `session/load`.\n124\t- `.resources/acpx/src/permissions.ts:98-152` implements coarse permission decisions.\n125\t- `.resources/acpx/src/session/conversation-model.ts:310-353` persists tool events keyed by tool call ID.\n126\t- `.resources/acpx/conformance/cases/021-prompt-post-success-drain.json:1-50` shows late tool updates can arrive after prompt success.\n127\t\n128\tTransferable points:\n129\t\n130\t- Keep agent/provider registries separate from Tool Registry.\n131\t- ACP compatibility includes filesystem and terminal callbacks where advertised; those callbacks must share AGH's registry policy engine or be routed through equivalent approval gates.\n132\t- Preserve distinct identities: AGH record IDs, ACP session IDs, provider-native session IDs, tool call IDs, and registry tool IDs.\n133\t- Add a protocol-aware drain/settle window for late `tool_call_update` events.\n134\t\n135\t### OpenClaw\n136\t\n137\tOpenClaw's main `openclaw acp` bridge is a Gateway-backed ACP server. It forwards prompts to the Gateway and translates Gateway events into ACP session updates. The main bridge advertises MCP HTTP/SSE support as disabled and rejects non-empty per-session `mcpServers`; its docs say MCP should be configured at the Gateway/agent layer.\n138\t\n139\tOpenClaw has rich internal tool catalogs and plugin/MCP surfaces, but the main ACP bridge does not expose them as an ACP tool registry. Tool identity in ACP is display/event identity: title formatting plus inferred `ToolKind`.\n140\t\n141\tImportant evidence:\n142\t\n143\t- `.resources/openclaw/src/acp/server.ts:4-13` and `.resources/openclaw/src/acp/server.ts:104-122` bootstrap the ACP stdio server.\n144\t- `.resources/openclaw/src/acp/translator.ts:519-540` advertises ACP capabilities.\n145\t- `.resources/openclaw/src/acp/translator.ts:542-603` handles session creation/loading.\n146\t- `.resources/openclaw/src/acp/translator.ts:1417-1424` rejects non-empty `mcpServers`.\n147\t- `.resources/openclaw/src/acp/translator.ts:848-940` maps Gateway tool events to ACP tool updates.\n148\t- `.resources/openclaw/src/acp/event-mapper.ts:297-342` formats tool titles and infers tool kind.\n149\t- `.resources/openclaw/src/agents/tool-catalog.ts:20-37` and `.resources/openclaw/src/agents/tool-catalog.ts:306-393` define a separate internal tool catalog.\n150\t- `.resources/openclaw/extensions/acpx/src/runtime-internals/mcp-proxy.mjs:33-64` shows the ACPX extension can inject MCP servers into embedded ACP sessions, unlike the main gateway bridge.\n151\t\n152\tTransferable points:\n153\t\n154\t- AGH must explicitly choose whether its ACP bridge accepts per-session `mcpServers` like OpenCode/Harnss/ACPX or rejects them like OpenClaw's gateway bridge.\n155\t- If AGH supports both runtime-managed MCP and ACP-provided MCP servers, precedence and collision rules must be explicit.\n156\t- Do not use substring heuristics for registry policy where explicit tool metadata is available.\n157\t\n158\t### OpenCode\n159\t\n160\tOpenCode implements a native ACP server and has a real internal `ToolRegistry`. This is the strongest local reference for how an agent can maintain a rich internal registry while ACP still sees only session lifecycle, MCP bootstrap, tool-call updates, and permission requests.\n161\t\n162\tOpenCode accepts ACP per-session `mcpServers`, stores them in ACP session state, converts them into internal MCP config, and adds them through its SDK. Its internal MCP naming pattern exposes MCP tools as `sanitize(server) + \"_\" + sanitize(tool)` while preserving the original MCP tool name for the actual call.\n163\t\n164\tImportant evidence:\n165\t\n166\t- `.resources/opencode/packages/opencode/src/cli/cmd/acp.ts:23-60` bootstraps `opencode acp`.\n167\t- `.resources/opencode/packages/opencode/src/acp/types.ts:1-16` and `.resources/opencode/packages/opencode/src/acp/session.ts:8-75` store ACP session state, including `mcpServers`.\n168\t- `.resources/opencode/packages/opencode/src/acp/agent.ts:534-578` advertises MCP support.\n169\t- `.resources/opencode/packages/opencode/src/acp/agent.ts:584-687` accepts MCP servers on session creation/loading.\n170\t- `.resources/opencode/packages/opencode/src/acp/agent.ts:1216-1254` converts ACP MCP servers into internal MCP config.\n171\t- `.resources/opencode/packages/opencode/src/mcp/index.ts:115-146` and `.resources/opencode/packages/opencode/src/mcp/index.ts:618-651` implement sanitized server/tool naming while preserving raw MCP names.\n172\t- `.resources/opencode/packages/opencode/src/tool/tool.ts:34-43` and `.resources/opencode/packages/opencode/src/tool/registry.ts:163-207` define internal tool definitions and registry behavior.\n173\t- `.resources/opencode/packages/opencode/src/acp/agent.ts:273-455` emits ACP tool-call lifecycle updates.\n174\t- `.resources/opencode/packages/opencode/src/acp/agent.ts:190-271` bridges internal permission events to ACP `session/request_permission`.\n175\t\n176\tTransferable points:\n177\t\n178\t- Keep AGH's internal registry richer than ACP.\n179\t- Store ACP-provided MCP servers in session state, not global daemon config.\n180\t- Preserve raw MCP server/tool names separately from the canonical AGH `ToolID`.\n181\t- Emit a stable ACP lifecycle, preferably `pending -\u003e in_progress -\u003e completed/failed`, even when the underlying runtime first reports a running event.\n182\t- Do not rely on a single-underscore sanitized naming scheme without collision diagnostics.\n183\t\n184\t### Hermes, Multica, GoClaw, Sandbox-Agent, T3Code\n185\t\n186\tThese projects reinforce the same split:\n187\t\n188\t- `.resources/hermes` is relevant because it registers ACP-provided MCP servers into agent state and valid tool names, then maps tool events into ACP kinds/titles. It has useful registry ideas, but ACP remains the session/event layer.\n189\t- `.resources/multica` and `.resources/goclaw` normalize ACP tool titles/kinds for display and permission heuristics; neither shows a protocol-level tool registry.\n190\t- `.resources/sandbox-agent` has ACP HTTP-to-stdio adapter and launch registry logic, but no callable ACP tool registry.\n191\t- `.resources/t3code` provides ACP schema/client/runtime tracking for tool-call events, not a broad registry.\n192\t\n193\t## Design Implications For AGH\n194\t\n195\t1. ACP compatibility is not a reason to avoid a daemon Tool Registry. ACP leaves tool discovery/execution models to the agent/runtime, or to MCP servers supplied to the session.\n196\t\n197\t2. AGH should expose daemon-owned session tools through an AGH-hosted MCP server in the MVP. This matches the accepted ADR-002 direction and aligns with ACP's `mcpServers` field.\n198\t\n199\t3. The registry's canonical ID must not be ACP `title`. Use one stable provider-safe `ToolID` across AGH and hosted MCP.\n200\t\n201\t4. The registry should store one canonical callable identity plus metadata:\n202\t - `ToolID`: provider-safe lower snake segments separated by reserved `__`, for example `agh__skill_view`.\n203\t - `DisplayTitle`: user-facing title only.\n204\t - `SourceRef`: structured provenance, for example built-in, MCP server, extension ID, bundle ID, provider ID.\n205\t\n206\t5. Collision handling must be fail-closed:\n207\t - Canonical `ToolID` collision: provider registration error or conflicted diagnostic.\n208\t - Sanitized external-name collision: tool is not exposed to the session until disambiguated.\n209\t - Display title collision: allowed, because titles are not policy identities.\n210\t\n211\t6. Operator and session projections should remain separate:\n212\t - Operator surfaces show unavailable, unauthorized, and conflicted tools with reason codes.\n213\t - Session/model surfaces expose only callable tools after availability, authorization, approval ceiling, and collision checks.\n214\t\n215\t7. ACP permission policy integration must remain ceiling-based:\n216\t - `deny-all` denies by default.\n217\t - `approve-reads` auto-approves only registry-classified read-only tools and ACP read/search callbacks AGH classifies as read-only.\n218\t - `approve-all` skips approval prompts for otherwise allowed tools, but does not bypass registry deny rules, extension grants, session lineage, source trust, availability, hooks, or conflict checks.\n219\t\n220\t8. ACP filesystem and terminal callbacks, if AGH advertises them, must not bypass Tool Registry policy. Either route them through the registry as first-class built-in tools or share the same policy/approval engine with equivalent telemetry and hooks.\n221\t\n222\t9. ACP `ToolKind` should be explicit metadata on AGH descriptors. Heuristics from title/kind are fallback-only for external ACP events that AGH observes but does not own.\n223\t\n224\t10. AGH should persist observed ACP tool calls separately from registry definitions. Observations are keyed by `toolCallId` within a session and carry title/kind/status/raw input/output. Registry entries are keyed by canonical `ToolID`.\n225\t\n226\t11. AGH should support late tool-call updates after prompt completion by draining the ACP event stream for a bounded window or until protocol-specific completion conditions are met.\n227\t\n228\t12. AGH should decide explicitly whether to accept third-party ACP `mcpServers` from clients:\n229\t - If accepted, store them as session-scoped tool sources with clear precedence and conflict policy.\n230\t - If rejected, document the OpenClaw-style stance and require MCP sources to be configured through AGH's registry/config lifecycle.\n231\t - For this TechSpec, the safer MVP path is AGH-managed hosted MCP first, with acceptance of client-supplied MCP servers as a compatibility extension only if collision and source-trust rules are implemented.\n232\t\n233\t## Accepted Naming And Collision Recommendation\n234\t\n235\tAdopt one canonical public `ToolID` format:\n236\t\n237\t- Canonical ID: provider-safe lower snake segments separated by reserved double underscore, for example `agh__skill_view`, `agh__tool_search`, `mcp__github__create_issue`, `ext__linear__search`.\n238\t- Hosted MCP `Tool.name`: same as the canonical `ToolID`.\n239\t- Display title: human-readable and non-unique, for example `View Skill`.\n240\t- Source/provenance: structured fields, not inferred solely from name prefixes.\n241\t- No shadowing: providers cannot replace an existing canonical ID unless they are the same source updating the same record.\n242\t- No silent truncation: if sanitization or length rules would collide, registration/session projection marks the tool conflicted and hides it from the session surface.\n243\t- Policy, dispatch, telemetry, hooks, CLI, HTTP, UDS, and hosted MCP all use the same `ToolID`.\n244\t\n245\tThis keeps AGH's internal registry expressive while respecting MCP wire compatibility and ACP's event-oriented model.\n246\t\n247\t## Evidence Summary\n248\t\n249\tOfficial protocol sources:\n250\t\n251\t- ACP schema: \u003chttps://agentclientprotocol.com/protocol/schema\u003e\n252\t- ACP agent registry: \u003chttps://agentclientprotocol.com/registry\u003e\n253\t- MCP draft schema: \u003chttps://modelcontextprotocol.io/specification/draft/schema\u003e\n254\t\n255\tPrimary local evidence:\n256\t\n257\t- `.resources/rayclaw/src/acp.rs`\n258\t- `.resources/rayclaw/src/tools/acp.rs`\n259\t- `.resources/rayclaw/tests/acp_integration.rs`\n260\t- `.resources/harnss/electron/src/ipc/acp-sessions.ts`\n261\t- `.resources/harnss/src/hooks/useACP.ts`\n262\t- `.resources/harnss/src/lib/engine/acp-adapter.ts`\n263\t- `.resources/acpx/src/acp/client.ts`\n264\t- `.resources/acpx/src/mcp-servers.ts`\n265\t- `.resources/acpx/src/permissions.ts`\n266\t- `.resources/openclaw/src/acp/translator.ts`\n267\t- `.resources/openclaw/src/acp/event-mapper.ts`\n268\t- `.resources/opencode/packages/opencode/src/acp/agent.ts`\n269\t- `.resources/opencode/packages/opencode/src/tool/registry.ts`\n270\t- `.resources/opencode/packages/opencode/src/mcp/index.ts`\n271\t\n272\tSubagent inventory evidence:\n273\t\n274\t- ACP implementations/usages: `acpx`, `collaborator-ai`, `goclaw`, `harnss`, `hermes`, `multica`, `openclaw`, `opencode`, `paperclip`, `rayclaw`, `sandbox-agent`, `t3code`.\n275\t- No meaningful ACP evidence: `cc-posts`, `chat`, `openfang`, `pi`, `symphony`.\n276\t- Excluded false positive: `claude-code`.\n277\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:23.895287Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01AaNV1bhYHtM9YF7s6A5abu","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01AaNV1bhYHtM9YF7s6A5abu","content":"```\n1\t# Internal Backend (Go)\n2\t\n3\tThe Go runtime — `internal/*` packages composed by `internal/daemon`, plus the API transports under `internal/api/*`. ACP subprocess management, SQLite persistence, HTTP/SSE + UDS APIs, autonomy kernel, AGH Network. Entry binary lives in `cmd/agh`.\n4\t\n5\tRepo-wide rules (Critical Rules, Workflow, Build, Commits, Skill Dispatch, Memory \u0026 Skills RFC, CI/Release) live in the **root `CLAUDE.md`**. This file owns architecture, package boundaries, autonomy contracts, security invariants, and `internal/`-specific debugging/forensics.\n6\t\n7\t## Architecture\n8\t\n9\t### Principles\n10\t\n11\t- **Designed for incremental extension** — new capabilities arrive as new packages wired into `daemon/`, without modifying existing packages. Small interfaces + dependency injection. Every capability plan decides which extension points, hooks, capabilities, tools/resources, bundles, registries, bridge SDKs, and docs must be added, updated, or removed.\n12\t- **Pragmatic Flat with Discipline** — packages under `internal/`, API transports grouped under `api/`, no domain/infra split, no event bus.\n13\t- **`daemon/` is the sole composition root** — the only package that imports all others. Reconciliation logic running at boot belongs to composition root and is not \"legacy support\".\n14\t- **No package imports `daemon/`, `api/`, or `cli/`** — dependencies flow downward only.\n15\t- **Interfaces defined where consumed** (Go-style) — `session/` defines `AgentDriver`, `acp/` implements it.\n16\t- **Direct function calls through interfaces** — no event bus, no reflection-based routing, no NATS as inter-package coordination. NATS is permitted **only** inside `internal/network` as the embedded wire transport for the AGH Network protocol; daemon packages communicate via interfaces and the Notifier pattern, never by publishing to subjects.\n17\t- **Notifier pattern for fan-out** — typed interface for observability and SSE, not a generic bus.\n18\t- **No back-pointers between packages** — inject callbacks or interfaces.\n19\t- **Functional options for constructors** — `NewManager(opts ...Option)`.\n20\t- **Maps for \u003c10 items** — no registry interfaces for small collections.\n21\t- **File-level organization** within packages — sub-packages only when complexity justifies it.\n22\t- **CI-enforceable boundaries** — `mage Boundaries` rules prevent import cycles. Update `magefile.go` Boundaries() in the same commit that introduces a new `internal/api/*` subpackage.\n23\t- **`internal/api/core` is the canonical handler home.** REST/UDS endpoints exist as shared `BaseHandlers` methods; HTTP and UDS only choose registration and authentication. No transport-duplicated parsing/validation.\n24\t- **Authoritative primitives are exclusive.** When a primitive owns a state transition (`task.Service.ClaimNextRun`, `Spawn`, `EnsureMigration`), no peer package may replicate it. Wake/observe/sweep are allowed; claim/own is not. The mechanical scheduler does not call `ClaimNextRun`.\n25\t- **Hooks are typed dispatch, not an event bus.** Dispatch at the call site that owns the state transition. Never tail event/log tables to fire hooks. Hooks may deny/narrow/annotate but cannot bypass safety primitives (claim tokens, leases, TTL, lineage, spawn caps, permission narrowing).\n26\t- **Agent-manageable by default.** User-visible runtime capabilities must expose stable machine-readable control surfaces for agents: CLI verbs with `-o json`/`-o jsonl` where relevant, HTTP/UDS parity when state crosses the daemon boundary, discoverable status/config output, and docs that describe the agent path. UI-only manageability is incomplete.\n27\t- **No partial-surface completions.** Any change touching a public surface closes the loop end-to-end in one pass: contract → HTTP handler → UDS handler → CLI client → CLI command → extension/config/docs surfaces → tests → docs.\n28\t\n29\t### Concurrency\n30\t\n31\tGeneric Go concurrency patterns (goroutine ownership, channels vs mutexes, `select`/`ctx.Done()` discipline, no `time.Sleep` in orchestration) live in `agh-code-guidelines`. Architectural invariants below are load-bearing for design decisions:\n32\t\n33\t- **Goroutines spawned by `internal/session/manager_*.go` MUST be tracked by Manager-owned WaitGroup and joined in Manager shutdown.** Never put goroutine-owned channels in a struct field that another goroutine mutates — use a per-run handle.\n34\t- **Detached execution lifetime.** Any work that outlives an HTTP/UDS request — prompts, network channel sends, automation jobs — MUST detach via `context.WithoutCancel(ctx)`. Never tie execution lifetime to request lifetime. Expose explicit cancel endpoints (e.g., `POST /api/sessions/:id/prompt/cancel`).\n35\t- **`context.WithoutCancel` does NOT preserve deadlines.** Re-attach a deadline if needed.\n36\t- **Subprocess managed-stop** must respect `ctx.Done()` between Shutdown and Wait. Wrap `proc.Wait()` in `select { case \u003c-proc.Done(): case \u003c-ctx.Done(): }`.\n37\t- **Process-group supervision parity.** Unix uses process groups; Windows uses forced-exit fallback. Always cross-build with `GOOS=windows GOARCH=amd64 go build` before claiming subprocess work complete. Centralize signaling helpers in `internal/procutil`.\n38\t\n39\t### Runtime\n40\t\n41\t- Single-binary and local-first. Sidecars or external control planes require a written techspec.\n42\t- Keep execution paths deterministic and observable.\n43\t- **Daemon runs in background by default.** No daemon should require a foreground terminal.\n44\t- **`compozy exec` is headless.** `--format text` returns a single string; `--format json` returns a stream of valid JSON objects; the TUI is opt-in via `--tui`. `exec` does not persist artifacts to `.compozy/runs/` unless `--persist` is given.\n45\t- **Agent operations must not depend on the web UI.** If agents need to inspect, configure, start, stop, approve, claim, release, or repair a capability, the spec must provide a CLI/HTTP/UDS path with structured output and deterministic errors.\n46\t\n47\t### Observability\n48\t\n49\t- Every domain operation emits a canonical event with correlation keys (`workspace_id`, `session_id`, `parent_session_id`, `root_session_id`, `agent_name`, `task_id`, `run_id`, `claim_token_hash`, `lease_until`, `workflow_id`, `coordinator_session_id`, `scheduler_reason`, `hook_event`, `hook_name`, `spawn_depth`, `actor_kind`, `actor_id`, `release_reason`).\n50\t- Cover with a coverage matrix test that fails if any required lifecycle path doesn't emit its canonical event.\n51\t- Append-only event store (`runtime.db`) is the canonical operational ledger; session DBs are projections, not authority.\n52\t- Live broadcasters publish only after durable append; reconnect/replay uses `after_seq`.\n53\t\n54\t## Security Invariants\n55\t\n56\t- **`claim_token` redaction is non-negotiable.** Raw `claim_token` (`agh_claim_*`), MCP auth tokens, OAuth codes, PKCE verifiers, and secret bindings MUST NEVER appear in logs, status APIs, settings views, error payloads, channel messages, SSE, web UI, or memory. Use hash forms (`claim_token_hash`) over the wire. Network layer rejects raw `claim_token` in metadata.\n57\t- **Symlink escape hardening.** Skill sidecars, skill files, managed-extension dependency copies, and bundle install paths MUST verify resolved targets remain inside approved roots. Use `EvalSymlinks` + path-prefix check, not naive joins. Handle macOS `/private/var/folders` quirk (canonicalize source root before containment check).\n58\t- **Path security helpers.** Filesystem helpers resolving user-controlled or agent-controlled paths use the `sanitizePathKey` + `realpathDeepestExisting` pattern (defenses against null-byte, URL-encoded traversal, Unicode normalization, symlink-escape).\n59\t- **Identity proof-stripping defense.** In any signed-message processing path (AGH Network v1), an identity in verified format (`nickname@fingerprint`) without valid `proof` MUST classify as `rejected`, not `unverified`.\n60\t- **External-call timeouts.** Outbound HTTP/network calls MUST use a client with an explicit timeout. `http.DefaultClient` is forbidden in production code paths.\n61\t- **Load-time security scan.** Every non-bundled skill is scanned via `internal/skills.VerifyContent` on every load (not just install). Critical findings block; warning findings log; info findings log silently. Bundled skills are exempt because `go:embed` provides immutability.\n62\t\n63\t## Package Layout\n64\t\n65\t| Path | Responsibility |\n66\t| ------------------------------- | ----------------------------------------------------------------------------- |\n67\t| `cmd/agh` | Main entry point, CLI binary |\n68\t| `internal/config` | TOML loading, validation, merge, home paths, agent def parsing |\n69\t| `internal/acp` | ACP client: subprocess spawn, JSON-RPC over stdio |\n70\t| `internal/agentidentity` | Caller-identity inference from `AGH_SESSION_ID`/`AGH_AGENT` |\n71\t| `internal/automation` | Cron, webhook, and scheduled triggers; durable scheduler state |\n72\t| `internal/bridges` | External messaging adapters (Slack, Telegram, etc.) |\n73\t| `internal/bridgesdk` | Bridge SDK / contract types |\n74\t| `internal/bundles` | Bundle activation projector |\n75\t| `internal/cli` | Cobra commands |\n76\t| `internal/codegen` | OpenAPI → TS generator helpers |\n77\t| `internal/coordinator` | Coordinator-agent bootstrap and lifecycle |\n78\t| `internal/daemon` | Composition root, lock, boot, shutdown |\n79\t| `internal/diagnostics` | Diagnostics + health probes |\n80\t| `internal/e2elane` | E2E lane harness wiring |\n81\t| `internal/sandbox` | Sandbox profile resolution and provider runtime |\n82\t| `internal/extension` | Extension manifest, registry, host API, install runtime |\n83\t| `internal/extensiontest` | Extension test harness |\n84\t| `internal/filesnap` | File snapshot utilities |\n85\t| `internal/fileutil` | Shared filesystem helpers |\n86\t| `internal/frontmatter` | YAML frontmatter parsing |\n87\t| `internal/hooks` | Typed hook taxonomy + dispatch |\n88\t| `internal/logger` | Structured logging (slog) |\n89\t| `internal/mcp` | MCP server lifecycle / sidecars |\n90\t| `internal/memory` | Persistent dual-scope memory (global + workspace + agent), provenance, recall |\n91\t| `internal/memory/consolidation` | Dream consolidation runtime (Time → Sessions → Lock gate cascade) |\n92\t| `internal/network` | AGH Network channels/peers/wire, NATS profile |\n93\t| `internal/observe` | Event recording, health metrics, query engine |\n94\t| `internal/procutil` | Process utilities, process-group signaling, Windows fallback |\n95\t| `internal/registry` | Skill/agent/capability registry helpers |\n96\t| `internal/resources` | Resource projector / codec / validate |\n97\t| `internal/retry` | Retry primitives |\n98\t| `internal/scheduler` | Mechanical scheduler (idle registry, wakeups, sweep, recovery) |\n99\t| `internal/session` | Session lifecycle, Manager, state machine |\n100\t| `internal/settings` | Settings overlay/projection |\n101\t| `internal/situation` | Situation surface providers (`/agent/context`) |\n102\t| `internal/skills` | Skills catalog, loader, `VerifyContent`, MCP/hook decl, provenance |\n103\t| `internal/skills/bundled` | Bundled skill definitions |\n104\t| `internal/sse` | Shared SSE helpers |\n105\t| `internal/store` | SQLite shared helpers, migrations registry, validation |\n106\t| `internal/store/globaldb` | Global catalog (`agh.db`): sessions, metadata |\n107\t| `internal/store/sessiondb` | Per-session event store (`events.db`) |\n108\t| `internal/subprocess` | Subprocess signaling primitives |\n109\t| `internal/task` | Task domain, `task_runs` ownership, `ClaimNextRun` |\n110\t| `internal/testutil` | Shared test helpers |\n111\t| `internal/api/contract` | Shared daemon/CLI/HTTP contract types |\n112\t| `internal/api/core` | Shared handler types (`BaseHandlers`), error mapping, SSE helpers |\n113\t| `internal/api/httpapi` | HTTP/SSE server (Gin) for web UI |\n114\t| `internal/api/udsapi` | UDS server for CLI IPC |\n115\t| `internal/api/testutil` | Test helpers for the API layer |\n116\t| `internal/toolruntime` | Tool process registry + interrupts |\n117\t| `internal/tools` | Tool definitions and dispatch |\n118\t| `internal/transcript` | Canonical replay message assembly from persisted events |\n119\t| `internal/version` | Build metadata |\n120\t| `internal/workref` | Work reference helpers |\n121\t| `internal/workspace` | Workspace resolver and entity management |\n122\t\n123\t## Memory \u0026 Skills Runtime (RFC-backed)\n124\t\n125\t- **Five-layer skill/memory/agent precedence**: Bundled → Marketplace → User → Additional → Workspace, with agent-local overriding all. Higher precedence wins on collision; an audit trail logs every shadow.\n126\t- **Memory taxonomy**: `user | feedback | project | reference` types; scopes `agent | workspace | global`. Default write scope declared per agent in `memory.scope`.\n127\t- **Memory consolidation gates**: Time → Sessions → Lock cascade ordered by computational cost. Default gates: 24h, 5 touched sessions, file-lock. Never replace gates with naive heuristics.\n128\t- **Lifecycle hooks** (`on_session_created`, `on_session_stopped`) execute in hierarchy precedence then alphabetical order; configurable timeout (default 5s); fail-open semantics (errors logged, never block); JSON over stdin.\n129\t\n130\t## Forensic Bug Fixes\n131\t\n132\t- **Bug-fix plans open with confirmed reproduction** (timestamp, command, observed evidence) BEFORE listing changes. \"I think\" or \"probably\" is forbidden at the top of a fix plan.\n133\t- **Inactive metadata repair must distinguish startup-pending from crashed.** Sessions in `m.pending` are still starting, not failed.\n134\t- **Stale ACP session ids must be classified, not propagated.** Convert `Resource not found` to fresh-start fallback.\n135\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:28.230818Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:28.232267Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01BnAdN6pUcjCei8sAwQayn3","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01BnAdN6pUcjCei8sAwQayn3","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:33.989328Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01BnAdN6pUcjCei8sAwQayn3","blocks":[{"type":"tool_use","id":"toolu_01BnAdN6pUcjCei8sAwQayn3","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_claude-code.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_claude-code.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_claude-code.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:33.989735Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01JEbQfnxsUJ2crzRATHarxF","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01JEbQfnxsUJ2crzRATHarxF","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:34.103838Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01JEbQfnxsUJ2crzRATHarxF","blocks":[{"type":"tool_use","id":"toolu_01JEbQfnxsUJ2crzRATHarxF","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_hermes.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_hermes.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_hermes.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:34.215484Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:34.22034Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01BnAdN6pUcjCei8sAwQayn3","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:34.22041Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01JEbQfnxsUJ2crzRATHarxF","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:34.222815Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01BnAdN6pUcjCei8sAwQayn3","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01BnAdN6pUcjCei8sAwQayn3","content":"```\n1\t# Competitor Analysis: Claude Code Tool System\n2\t\n3\t## Overview\n4\t\n5\tClaude Code does not expose a single mutable \"ToolRegistry\" class in the generic plugin sense. Its effective registry is a layered system:\n6\t\n7\t1. a strongly typed `Tool` contract,\n8\t2. a static built-in tool list,\n9\t3. request-time tool pool assembly,\n10\t4. MCP adaptation into the same contract,\n11\t5. deferred discovery through Tool Search,\n12\t6. centralized dispatch and permission handling.\n13\t\n14\tFor AGH, the main lesson is not Claude Code's static `tools.ts` shape. The useful pattern is the separation between canonical definition, contribution adapters, context-specific assembly, permission decision, dispatch, result mapping, and dynamic discovery.\n15\t\n16\t## Mechanisms / Patterns\n17\t\n18\tThe `Tool` interface carries model identity, schemas, runtime behavior, permission hooks, safety metadata, rendering hooks, output mapping, and dynamic discovery flags. `buildTool` applies defaults for omitted fields.\n19\t\n20\tBuilt-in tools are imported statically and exposed through `getAllBaseTools()`. `getTools(permissionContext)` filters this base list by mode, deny rules, special/internal tool rules, REPL visibility, and per-tool availability. `assembleToolPool()` then merges built-ins with MCP tools, sorts for prompt-cache stability, and deduplicates by name with built-ins winning.\n21\t\n22\tMCP tools are adapted into the same `Tool` contract. Claude Code calls MCP `tools/list`, maps schemas and annotations, preserves server/tool provenance in `mcpInfo`, and refreshes on `tools/list_changed`.\n23\t\n24\tDeferred discovery is a key pattern. Deferred tools are indexed by name/search hints and hidden from the initial API request. The model can call `ToolSearchTool`, which returns `tool_reference` blocks that cause the full schemas to be included later. Claude Code also has fallback delta/message paths when provider-native dynamic discovery is unavailable.\n25\t\n26\tPermissions are evaluated in an ordered policy pipeline. Deny rules can hide tools from visibility and block dispatch. Runtime permission checks consider explicit allow/deny/ask rules, tool-specific checks, user interaction requirements, safety classification, headless behavior, mode bypasses, and hooks.\n27\t\n28\tDispatch is centralized. `runToolUse` resolves the tool, validates schema input, runs optional validation, prepares observable input, executes pre-tool hooks, resolves permission, calls the handler, maps results, and handles result-size/storage behavior. `StreamingToolExecutor` uses `isConcurrencySafe` to parallelize safe tools while serializing unsafe ones.\n29\t\n30\tPlugins contribute tool-like behavior primarily through MCP servers, skills, commands, agents, hooks, and settings rather than native in-process `Tool` objects. This is a useful extension boundary for AGH because it keeps third-party tools behind a protocol/process adapter.\n31\t\n32\t## Relevant Code Paths\n33\t\n34\t- `.resources/claude-code/Tool.ts:362-520`: canonical `Tool` fields including schemas, `call`, availability, read-only/destructive/concurrency metadata, user-interaction requirements, deferred-loading flags, MCP metadata, validation, and permissions.\n35\t- `.resources/claude-code/Tool.ts:701-783`: `ToolDef`, defaults, and `buildTool`.\n36\t- `.resources/claude-code/tools.ts:161-193`: built-in tool source of truth.\n37\t- `.resources/claude-code/tools.ts:262-326`: runtime filtering by deny rules, special tools, REPL visibility, and `isEnabled`.\n38\t- `.resources/claude-code/tools.ts:345-389`: built-in + MCP tool pool assembly and deduplication.\n39\t- `.resources/claude-code/utils/api.ts:123-234`: conversion to API tool schemas and deferred-loading fields.\n40\t- `.resources/claude-code/utils/toolSearch.ts:155-197`: Tool Search modes.\n41\t- `.resources/claude-code/utils/toolSearch.ts:270-385`: provider/model capability checks for Tool Search.\n42\t- `.resources/claude-code/utils/toolSearch.ts:525-646`: discovered deferred tool extraction and delta computation.\n43\t- `.resources/claude-code/tools/ToolSearchTool/ToolSearchTool.ts:167-471`: search scoring and `tool_reference` outputs.\n44\t- `.resources/claude-code/services/api/claude.ts:1120-1339`: request-time Tool Search enablement and fallback injection.\n45\t- `.resources/claude-code/services/tools/toolExecution.ts:340-1297`: central dispatch.\n46\t- `.resources/claude-code/services/tools/streamingToolExecutor.ts:35-391`: concurrency-safe scheduling.\n47\t- `.resources/claude-code/utils/permissions/permissions.ts:236-362`, `1067-1312`: rule matching and ordered permission engine.\n48\t- `.resources/claude-code/services/mcp/client.ts:1738-2010`, `2160-2195`, `3020-3075`: MCP tools/list ingestion, adaptation, refresh, and call execution.\n49\t- `.resources/claude-code/services/mcp/useManageMCPConnections.ts:600-690`: `tools/list_changed` handling.\n50\t- `.resources/claude-code/utils/plugins/mcpPluginIntegration.ts:100-634`: plugin MCP server extraction, scoping, env resolution, and contribution adapter.\n51\t- `.resources/claude-code/types/plugin.ts:14-67`: plugin shape.\n52\t- `.resources/claude-code/skills/loadSkillsDir.ts:185-335`: skill frontmatter, `allowed-tools`, and user-invocable visibility.\n53\t- `.resources/claude-code/tools/AgentTool/runAgent.ts:440-690`: agent-specific permission scoping and merged tool pools.\n54\t\n55\t## Transferable Patterns\n56\t\n57\tAGH should define a first-class tool definition contract that attaches identity, schema, provenance, visibility, availability, permission requirements, dispatch handler, output policy, and observability metadata.\n58\t\n59\tAGH should treat tool pool assembly as separate from registration. Registration collects contributions; assembly produces a context-specific view for a workspace, user, agent, session, provider, mode, runtime health, and permission policy.\n60\t\n61\tAGH should model visibility explicitly instead of scattering \"hidden\" flags. Candidate tiers include internal-only, daemon-manageable, CLI/HTTP-visible, agent-visible, model-visible, deferred-discoverable, user-command-only, and extension-private.\n62\t\n63\tAGH should support provider-neutral search and deferred schema loading even if Anthropic-specific `tool_reference` blocks are not portable. A registry search index and `agh__tool_search` are useful independently.\n64\t\n65\tAGH should keep permissions as an ordered pipeline. Discovery-time filtering improves UX, but dispatch must revalidate deny/ask/allow rules, tool-specific requirements, session permissions, workspace permissions, and hooks.\n66\t\n67\tAGH should map MCP annotations into local metadata at adapter boundaries: read-only, destructive, open-world, title, schema, search hints, and provenance.\n68\t\n69\tAGH should carry `IsConcurrencySafe` or equivalent execution metadata and enforce it centrally.\n70\t\n71\t## Risks / Mismatches\n72\t\n73\tClaude Code makes LLM API calls directly and can use provider-specific dynamic tool features. AGH usually manages external ACP-compatible runtimes, so it cannot assume direct control of model API request payloads.\n74\t\n75\tClaude Code's built-in registry is a static import list. AGH needs an extensible daemon registry with contribution adapters, not a monolithic static list.\n76\t\n77\t`buildTool` defaults some permission behavior in a way that is acceptable for controlled built-ins but too permissive for third-party extension tools. AGH should default untrusted external tools to disabled, deny, or ask until policy grants them.\n78\t\n79\tClaude Code uses naming conventions such as MCP prefixes for some behavior. AGH should use structured provenance and namespaces instead.\n80\t\n81\tClaude Code's plugin tools primarily flow through MCP. This is a good MVP boundary, but AGH may also want a native extension Host API for richer lifecycle and local-resource management.\n82\t\n83\t## Open Questions\n84\t\n85\t1. Should AGH expose native registry tools to ACP runtimes as hosted MCP tools, direct ACP tools, or CLI/UDS-callable commands?\n86\t2. Should AGH persist deferred-tool discovery state per session, or keep search stateless and recompute on each call?\n87\t3. What should happen when an ACP runtime already has a tool with the same short name as an AGH-native tool?\n88\t4. Should extension/plugin tool contributions be MCP-only for MVP?\n89\t\n90\t## Evidence\n91\t\n92\t- `.resources/claude-code/Tool.ts:362-520`, `701-783`: canonical tool shape and defaults.\n93\t- `.resources/claude-code/tools.ts:161-389`: base registry, filtering, MCP merge, sorting, dedupe.\n94\t- `.resources/claude-code/utils/api.ts:123-234`: schema projection and deferred-loading fields.\n95\t- `.resources/claude-code/utils/toolSearch.ts:155-646`: dynamic discovery logic.\n96\t- `.resources/claude-code/tools/ToolSearchTool/ToolSearchTool.ts:167-471`: search scoring and references.\n97\t- `.resources/claude-code/services/tools/toolExecution.ts:340-1297`: central dispatch and result mapping.\n98\t- `.resources/claude-code/services/tools/streamingToolExecutor.ts:35-391`: concurrency-safe scheduling.\n99\t- `.resources/claude-code/utils/permissions/permissions.ts:236-362`, `1067-1312`: permission rule engine.\n100\t- `.resources/claude-code/services/mcp/client.ts:1738-2010`, `2160-2195`, `3020-3075`: MCP adapter.\n101\t- `.resources/claude-code/utils/plugins/mcpPluginIntegration.ts:100-634`: plugin-to-MCP contribution adapter.\n102\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:34.223256Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01JEbQfnxsUJ2crzRATHarxF","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01JEbQfnxsUJ2crzRATHarxF","content":"```\n1\t# Competitor Analysis: Hermes Tool Registry\n2\t\n3\t## Overview\n4\t\n5\tHermes has the clearest direct precedent for the \"ToolRegistry\" name. Its tool system centers on a single Python registry that collects tool definitions, schemas, handlers, availability checks, toolset membership, MCP adapters, plugin contributions, and dispatch metadata.\n6\t\n7\tThe core pattern is powerful but too global for AGH to copy literally. The transferable part is the product shape: every tool contribution normalizes into one registry contract, the model-visible tool list is filtered by availability and toolset policy, and tool calls flow through one dispatch path.\n8\t\n9\t## Mechanisms / Patterns\n10\t\n11\tHermes tools self-register at import time. A tool module imports the singleton registry and calls `registry.register(...)` with a name, toolset, JSON schema, handler, optional `check_fn`, env requirements, display metadata, async flag, and result budget.\n12\t\n13\tThe registry provides:\n14\t\n15\t- `register`: add a tool definition and reject most non-MCP collisions.\n16\t- `get`: resolve a tool entry.\n17\t- `dispatch`: call the registered handler.\n18\t- `get_available_tools`: project the model-visible list after toolset and availability filters.\n19\t\n20\tToolsets are recursive bundles. Named toolsets can compose other toolsets, and broad aliases such as `all` / `*` expand across registered tools. This is the most relevant pattern for AGH's agent-role tool policy.\n21\t\n22\tAvailability is attached to the tool definition through `check_fn` and `requires_env`. Hermes filters unavailable tools before presenting definitions to the model. This is a critical reliability property, but Hermes does not consistently treat it as a dispatch-time security boundary.\n23\t\n24\tMCP tools are adapted into the same registry. Hermes discovers MCP tools, normalizes schemas, prefixes/organizes names, refreshes on MCP `tools/list_changed`, and registers each remote tool as a local registry entry. Dynamic MCP refresh is tested.\n25\t\n26\tPlugins can contribute tools, but the exact dispatch path can bypass parts of the normal `handle_function_call` pipeline. AGH should avoid this split by making the registry dispatch path mandatory for every executable AGH-owned tool.\n27\t\n28\t## Relevant Code Paths\n29\t\n30\t- `.resources/hermes/tools/registry.py:1-14`: registry purpose and top-level contract.\n31\t- `.resources/hermes/tools/registry.py:23-64`: `ToolEntry` shape.\n32\t- `.resources/hermes/tools/registry.py:176-228`: singleton registry operations.\n33\t- `.resources/hermes/tools/registry.py:260-327`: availability filtering and definitions.\n34\t- `.resources/hermes/tools/registry.py:352-433`: dispatch and result handling.\n35\t- `.resources/hermes/model_tools.py:141-153`: built-ins are imported to trigger registration.\n36\t- `.resources/hermes/model_tools.py:209-370`: tool definition projection for model calls.\n37\t- `.resources/hermes/model_tools.py:389-528`: function-call handling.\n38\t- `.resources/hermes/model_tools.py:529-705`: result transformation and tool-call lifecycle.\n39\t- `.resources/hermes/toolsets.py:483-692`: recursive toolset composition.\n40\t- `.resources/hermes/hermes_cli/tools_config.py:681-849`: operator configuration for tools.\n41\t- `.resources/hermes/hermes_cli/plugins.py:210-380`: plugin load and metadata paths.\n42\t- `.resources/hermes/hermes_cli/plugins.py:518-646`: plugin tool dispatch path.\n43\t- `.resources/hermes/tools/mcp_tool.py:860-1038`: MCP discovery and schema adaptation.\n44\t- `.resources/hermes/tools/mcp_tool.py:1058-1296`: MCP tool registration details.\n45\t- `.resources/hermes/tools/mcp_tool.py:1850-2108`: MCP refresh and runtime paths.\n46\t- `.resources/hermes/tools/mcp_tool.py:2508-2770`: dynamic discovery integration.\n47\t- `.resources/hermes/tests/tools/test_mcp_dynamic_discovery.py:1-160`: tests for MCP dynamic tool refresh.\n48\t- `.resources/hermes/tools/process_registry.py:1-21`, `465-690`: process registry and scoped runtime management.\n49\t\n50\t## Transferable Patterns\n51\t\n52\tAGH should build one registry contract that all tool sources normalize into: built-in Go tools, extension manifest tools, extension sidecar tools, MCP tools, and future dynamic tools.\n53\t\n54\tAGH should separate toolset policy from tool definitions. Toolsets should be named bundles resolved recursively at list/dispatch time, not hardcoded into each provider.\n55\t\n56\tAGH should fail closed during discovery when required env vars, binaries, MCP servers, or extension sidecars are missing. The discovery surface should explain why a tool is unavailable to operators, while the agent-visible surface should omit or mark tools according to policy.\n57\t\n58\tAGH should route MCP tools through the same registry dispatch path as native tools. MCP is an adapter, not a separate tool universe.\n59\t\n60\tAGH should treat dynamic refresh as a first-class event. Hermes' MCP `tools/list_changed` path is a useful precedent for a registry generation counter and catalog delta notification.\n61\t\n62\tAGH should include result budgets and transformation at the registry boundary. Large outputs should be persisted or summarized consistently rather than left to individual handlers.\n63\t\n64\t## Risks / Mismatches\n65\t\n66\tHermes relies heavily on a process-wide singleton and import-time registration. AGH should prefer explicit composition-root registration because daemon boot already wires skills, extensions, resource stores, hooks, API services, and session managers.\n67\t\n68\tHermes uses permissive in-process plugin execution. AGH should not load third-party executable handlers into the daemon process for MVP. Out-of-process MCP or extension sidecar execution is a better fit for AGH's security and observability model.\n69\t\n70\tHermes availability filtering is strong for model-visible definitions, but AGH must also recheck availability at dispatch. Discovery filtering alone is not a security boundary.\n71\t\n72\tHermes has some name-prefix and collision behavior around MCP tools. AGH should use structured namespaces and provenance instead of deriving security meaning from string prefixes.\n73\t\n74\tHermes' plugin dispatch split is a warning. If AGH has CLI, HTTP, UDS, session, MCP, and extension entry points, all of them must call the same `Registry.Call` pipeline.\n75\t\n76\t## Open Questions\n77\t\n78\t1. Should AGH expose unavailable tools to operators with reasons while hiding them from model-visible surfaces?\n79\t2. Should AGH support recursive toolsets as resources, config fields, or both?\n80\t3. Should MCP `tools/list_changed` cause live session deltas, or only refresh the next model-visible catalog query?\n81\t4. What conflict policy should AGH use when multiple providers contribute the same short tool name?\n82\t\n83\t## Evidence\n84\t\n85\t- Hermes registry: `.resources/hermes/tools/registry.py:23-64`, `176-228`, `260-327`, `352-433`.\n86\t- Hermes request pipeline: `.resources/hermes/model_tools.py:141-705`.\n87\t- Hermes toolsets: `.resources/hermes/toolsets.py:483-692`.\n88\t- Hermes MCP adapter: `.resources/hermes/tools/mcp_tool.py:860-1296`, `1850-2108`, `2508-2770`.\n89\t- Hermes plugin loader/dispatch: `.resources/hermes/hermes_cli/plugins.py:210-380`, `518-646`.\n90\t- Hermes dynamic discovery tests: `.resources/hermes/tests/tools/test_mcp_dynamic_discovery.py:1-160`.\n91\t- Hermes process registry: `.resources/hermes/tools/process_registry.py:1-21`, `465-690`.\n92\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:36.320865Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:36.321349Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01WvVniw7b8XVyGgat3FLPfZ","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01WvVniw7b8XVyGgat3FLPfZ","name":"Bash","title":"Terminal","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:37.325842Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WvVniw7b8XVyGgat3FLPfZ","blocks":[{"type":"tool_use","id":"toolu_01WvVniw7b8XVyGgat3FLPfZ","name":"Task","title":"ls /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/ 2\u003e\u00261 | head -30","input":{"prompt":"List lessons directory"},"rawInput":{"command":"ls /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/ 2\u003e\u00261 | head -30","description":"List lessons directory"}},{"type":"tool_result","toolUseId":"toolu_01WvVniw7b8XVyGgat3FLPfZ","content":"List lessons directory"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:37.350619Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:37.412025Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WvVniw7b8XVyGgat3FLPfZ","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:37.413275Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01WvVniw7b8XVyGgat3FLPfZ","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01WvVniw7b8XVyGgat3FLPfZ","content":"```console\nL-001-detached-prompt-lifetime.md\nL-002-tparallel-vs-tsetenv.md\nL-003-task-runs-single-queue.md\nL-004-manual-equals-peer.md\nL-005-authoritative-primitive-exclusivity.md\nL-006-greenfield-delete-not-adapt.md\nL-007-e2e-follows-runtime-contract.md\nL-008-schema-migrations-mandatory.md\nL-009-concurrent-worktree-deadlock.md\nL-010-model-name-validation.md\nL-011-fraco-test-coverage-pushback.md\nL-012-techspec-prose-only-rework.md\nL-013-prd-must-not-name-implementation.md\nL-014-sandbox-vocabulary-drift.md\nREADME.md\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:39.670007Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:39.670273Z","update":{"kind":"tool_call_started","toolCallId":"toolu_015XFAbREQF3ZzFqKwVxM8hc","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_015XFAbREQF3ZzFqKwVxM8hc","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:39.71831Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_015XFAbREQF3ZzFqKwVxM8hc","blocks":[{"type":"tool_use","id":"toolu_015XFAbREQF3ZzFqKwVxM8hc","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-005-authoritative-primitive-exclusivity.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-005-authoritative-primitive-exclusivity.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-005-authoritative-primitive-exclusivity.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:39.718527Z","update":{"kind":"tool_call_started","toolCallId":"toolu_017CsLEXa42xccUdxXabJXsr","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_017CsLEXa42xccUdxXabJXsr","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:40.458609Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_017CsLEXa42xccUdxXabJXsr","blocks":[{"type":"tool_use","id":"toolu_017CsLEXa42xccUdxXabJXsr","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-006-greenfield-delete-not-adapt.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-006-greenfield-delete-not-adapt.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-006-greenfield-delete-not-adapt.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:40.458912Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01BWtfgH1KcBSgmAA6ai9kmR","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01BWtfgH1KcBSgmAA6ai9kmR","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:40.862616Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01BWtfgH1KcBSgmAA6ai9kmR","blocks":[{"type":"tool_use","id":"toolu_01BWtfgH1KcBSgmAA6ai9kmR","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-008-schema-migrations-mandatory.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-008-schema-migrations-mandatory.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-008-schema-migrations-mandatory.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:40.86307Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01JcDAYpsZ3r1L8JfCZap5kx","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01JcDAYpsZ3r1L8JfCZap5kx","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:41.669326Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01JcDAYpsZ3r1L8JfCZap5kx","blocks":[{"type":"tool_use","id":"toolu_01JcDAYpsZ3r1L8JfCZap5kx","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-012-techspec-prose-only-rework.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-012-techspec-prose-only-rework.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-012-techspec-prose-only-rework.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:41.670061Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01JKhk8G1oyykbtHRqJdkaBS","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01JKhk8G1oyykbtHRqJdkaBS","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.047761Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01JKhk8G1oyykbtHRqJdkaBS","blocks":[{"type":"tool_use","id":"toolu_01JKhk8G1oyykbtHRqJdkaBS","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-007-e2e-follows-runtime-contract.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-007-e2e-follows-runtime-contract.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-007-e2e-follows-runtime-contract.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.04831Z","update":{"kind":"tool_call_started","toolCallId":"toolu_0197qmzs5CcF8J7Dqn4uU3t2","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_0197qmzs5CcF8J7Dqn4uU3t2","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.605175Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_0197qmzs5CcF8J7Dqn4uU3t2","blocks":[{"type":"tool_use","id":"toolu_0197qmzs5CcF8J7Dqn4uU3t2","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-014-sandbox-vocabulary-drift.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-014-sandbox-vocabulary-drift.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-014-sandbox-vocabulary-drift.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.634795Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.638257Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01JcDAYpsZ3r1L8JfCZap5kx","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.63835Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_015XFAbREQF3ZzFqKwVxM8hc","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.6384Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01BWtfgH1KcBSgmAA6ai9kmR","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.63864Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_017CsLEXa42xccUdxXabJXsr","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.63869Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01JKhk8G1oyykbtHRqJdkaBS","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.638786Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_0197qmzs5CcF8J7Dqn4uU3t2","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.639815Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01JcDAYpsZ3r1L8JfCZap5kx","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01JcDAYpsZ3r1L8JfCZap5kx","content":"```\n1\t# L-012 — TechSpec without Go interface signatures triggers heavy review rework\n2\t\n3\t**Class:** Spec authoring\n4\t**Date discovered:** 2026-04-25 / 2026-04-26 (autonomy techspec vs. release-adjustments comparison)\n5\t**Evidence sources:** `../analysis/analysis_compozy_tasks.md` §PRD/TechSpec Quality Patterns\n6\t\n7\t## Context\n8\t\n9\tTwo TechSpecs from the same period delivered radically different review trajectories:\n10\t\n11\t- **Autonomy techspec** (`.compozy/tasks/autonomous/_techspec.md`, 706 lines): MVP boundary at top, listed Architectural Boundaries, Go interface signatures pasted as code blocks (`ClaimCriteria`, `ClaimedRun`, `TaskClaimer`, `SpawnOpts`, `PermissionNarrower`), data-model fields with rationale, side-table-vs-JSON decisions explicit, lease invariants enumerated as a numbered list. Eighteen tasks executed cleanly with **one** review round.\n12\t- **Release-adjustments / qa-review** (no `_techspec.md`, just review-only directories): unresolved review queues persisted across multiple PRs. Tasks 07-09 of autonomy that touched contract-laden interfaces had **exactly one** round of fixes because the techspec gave the implementer no contract ambiguity.\n13\t\n14\tThe differentiator was not length — it was concreteness. Specs that paste signatures, list fields with rationale, and enumerate invariants leave nothing to interpretation. Specs that describe the same mechanics in prose force the implementer to invent shapes that reviewers then reject.\n15\t\n16\t## Root cause\n17\t\n18\tProse-only descriptions produce N implementations, where N is the number of agents that read the spec. Reviewers then converge each implementation toward the implicit intent through review rounds — that is the rework. Code blocks (interface signatures, struct fields, SQL DDL, enum values) are unambiguous; reviewers either approve or reject specific tokens, and the spec author resolves the ambiguity once instead of N times.\n19\t\n20\t## Rule\n21\t\n22\t\u003e A TechSpec is not ready for review until it carries the **six quality markers**:\n23\t\u003e\n24\t\u003e 1. MVP boundary statement at top.\n25\t\u003e 2. Architectural Boundaries section.\n26\t\u003e 3. Concrete Go interface signatures pasted as code blocks (not prose).\n27\t\u003e 4. Data-model field rationale (purpose + shape per new column / frontmatter field / config key).\n28\t\u003e 5. Side-table-vs-JSON decision stated for every new domain entity.\n29\t\u003e 6. Lease / safety invariants as a numbered list.\n30\t\u003e\n31\t\u003e Specs without these markers are pre-rejected — they will need multiple review rounds.\n32\t\n33\t## Operationalization\n34\t\n35\t`cy-spec-peer-review` invokes Opus with a six-marker checklist embedded in the prompt. `cy-spec-preflight` blocks `cy-create-techspec` from completing until the six markers are present.\n36\t\n37\tWhen a spec is missing a marker, fix the spec — do not start tasks against the gap.\n38\t\n39\t## Anti-patterns\n40\t\n41\t- \"The function will accept the relevant config and return the result.\" (no signature)\n42\t- \"Add a column for ownership tracking.\" (no rationale, no name, no type)\n43\t- \"Choose the appropriate storage shape.\" (no decision)\n44\t- \"Ensure the lease is held safely.\" (no invariants)\n45\t- \"We'll figure out the interface during implementation.\" (defers the ambiguity)\n46\t\n47\t## Source\n48\t\n49\t- `.compozy/tasks/autonomous/_techspec.md` — the cleanest exemplar\n50\t- `.compozy/tasks/autonomous/adrs/adr-003.md` (lease invariants numbered)\n51\t- `.compozy/tasks/autonomous/adrs/adr-006.md` (safe spawn caps numbered)\n52\t- `.compozy/tasks/release-adjustments/` and `.compozy/tasks/qa-review/` — review-only dirs without TechSpecs, persistent unresolved review queues\n53\t- `../analysis/analysis_compozy_tasks.md` §\"Markers of 'good enough to execute'\" and §\"Markers of trouble\"\n54\t- `docs/_memory/_synthesis.md` skill candidate S-M9 `agh-techspec-quality-gate`\n55\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.640606Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_015XFAbREQF3ZzFqKwVxM8hc","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_015XFAbREQF3ZzFqKwVxM8hc","content":"```\n1\t# L-005 — Authoritative primitives are exclusive\n2\t\n3\t**Class:** Architecture\n4\t**Date discovered:** 2026-04-25 (autonomy ADR-004); reinforced 2026-04-26 (autonomy task_11)\n5\t**Evidence sources:** ADR-004 + task_11 memory + global_runs analysis\n6\t\n7\t## Context\n8\t\n9\tThe mechanical scheduler in `internal/scheduler` was tempted to claim runs directly during sweeps and recoveries. That would have collapsed the agent-pull model into a daemon-push model and given two components authority over the same state transition.\n10\t\n11\t## Root cause\n12\t\n13\tWhen two components can perform the same authoritative state transition (claim, spawn, migrate, narrow), they will eventually disagree. Either you serialize them (introducing locks, latency, and complexity), or you accept two sources of truth (introducing races and recovery bugs). The clean answer is to pick one authority per transition and force everything else to _observe and notify_ rather than _act_.\n14\t\n15\t## Rule\n16\t\n17\t\u003e When an authoritative primitive owns a state transition (`task.Service.ClaimNextRun`, `Spawn`, `EnsureMigration`), no peer package may replicate the transition. Wake/observe/sweep are allowed; claim/own is not.\n18\t\n19\t## Examples (canonical authorities in AGH)\n20\t\n21\t| Transition | Authority | Allowed peers |\n22\t| ----------------------------- | ------------------------------------ | -------------------------------------------------------------- |\n23\t| Claim a `task_run` | `task.Service.ClaimNextRun` | `internal/scheduler` may wake idle agents; never claims itself |\n24\t| Spawn a child session | Daemon-managed safe-spawn API | Coordinator submits requests; the daemon decides |\n25\t| Apply a schema migration | `internal/store` migrations registry | `EnsureSchema` is forbidden for column changes |\n26\t| Mutate session terminal state | Session manager | Channels, hooks, observability emit events but cannot mutate |\n27\t| Approve / publish a task | Operator + manual API surface | Coordinators receive enqueues; do not auto-approve |\n28\t\n29\t## Operationalization\n30\t\n31\t- **Scheduler can wake and sweep, but cannot claim.** `internal/scheduler` issues `scheduler.wake.count`/`no_match`/`lease_sweep.count`/`error` metrics — never `task.run.claim.success`.\n32\t- **Hooks can deny/narrow/annotate but cannot bypass safety invariants** (claim tokens, leases, TTL, lineage, spawn caps, permission narrowing).\n33\t- **Coordination channels are NEVER an ownership/status authority.** Channel `status`/`result` messages cannot mutate ownership/terminal state.\n34\t\n35\t## Anti-pattern\n36\t\n37\t- Adding a \"scheduler.\\*\" hook taxonomy that lets external code claim runs.\n38\t- Letting the coordinator bypass `ClaimNextRun` through a \"fast-path\" for trusted runs.\n39\t- Allowing the network layer to write terminal state via channel messages.\n40\t\n41\t## Source\n42\t\n43\t- `.compozy/tasks/autonomous/adrs/adr-004.md` — Split Semantic vs Mechanical Scheduling\n44\t- `.compozy/tasks/autonomous/adrs/adr-009.md` — Hooks First-Class (forbids hook-as-claim)\n45\t- `.compozy/tasks/autonomous/memory/task_11.md` — scheduler scope-creep avoided\n46\t- `../analysis/analysis_global_runs.md` lesson L4\n47\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.641202Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_017CsLEXa42xccUdxXabJXsr","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_017CsLEXa42xccUdxXabJXsr","content":"```\n1\t# L-006 — Greenfield + zero-legacy means _delete_, not _adapt_\n2\t\n3\t**Class:** Project posture\n4\t**Date discovered:** 2026-04-17 (harness TechSpec review, Portuguese-language reviewer)\n5\t**Evidence sources:** Harness review + `remove-legacy-alpha.md` standing directive + multiple ADRs\n6\t\n7\t## Context\n8\t\n9\tThe harness TechSpec proposed migrating an `inputAugmenter` callback to a `TurnAugmenter` pipeline. The spec did not say whether the old callback was deleted, kept as an adapter, or coexisting. The reviewer (in Portuguese) flagged this directly: _\"política zero-legacy exige declarar 'delete'\"_ — the zero-legacy policy _requires_ the spec to declare what is deleted.\n10\t\n11\tThis is a stronger application of the CLAUDE.md \"Greenfield Alpha — Zero Legacy Tolerance\" rule: it's not enough to _allow_ deletion; specs must _enumerate_ what is deleted.\n12\t\n13\t## Root cause\n14\t\n15\tWhen a spec says \"we are migrating to X\" without naming the delete-target, agents default to keeping both. Compatibility shims, adapters, and \"preserve old behavior\" branches accumulate as technical debt. Greenfield discipline only works if every breaking-change spec explicitly names what disappears.\n16\t\n17\t## Rule\n18\t\n19\t\u003e Every breaking-change techspec MUST explicitly name its delete targets. \"Delete the old thing\" is not a default; it is a checklist item that must be enumerated.\n20\t\n21\t## Operationalization\n22\t\n23\tIn every TechSpec that changes a public surface (or any meaningful internal contract), include a section like:\n24\t\n25\t```markdown\n26\t## Delete Targets\n27\t\n28\t- `internal/foo.OldType` (replaced by `internal/foo.NewType` in step 3)\n29\t- `pkg/bar.LegacyAdapter` (no callers after migration; remove in step 5)\n30\t- TOML key `[old.section]` (renamed; no backward alias)\n31\t- HTTP endpoint `/v0/old/path` (replaced by `/v1/new/path`; no redirect)\n32\t```\n33\t\n34\tRenames sweep code, storage, APIs, CLI, extensions, specs, RFCs, AND `.compozy/tasks/*` artifacts in the same change. No aliases, no dual fields, no migration code.\n35\t\n36\t## Allowed exception (single-pass repair)\n37\t\n38\tWhen the cost of \"delete the old thing\" is \"every developer rebuilds their local SQLite,\" in-place ALTER + one-shot repair is allowed if and only if:\n39\t\n40\t1. Repair is bounded to a single boot.\n41\t2. Strict semantics resume immediately after repair.\n42\t3. The exception is documented in an ADR.\n43\t\n44\tReference: `session-driver-override/adrs/adr-005.md`.\n45\t\n46\t## Source\n47\t\n48\t- `.codex/plans/remove-legacy-alpha.md` (standing directive)\n49\t- `.codex/sessions/2026/04/17/.../exec-20260417-232547-929722000/turns/0001/response.txt` (harness review, Portuguese)\n50\t- `.compozy/tasks/autonomous/adrs/adr-001.md`\n51\t- Multiple `network-rename`, `assistant-ui-hard-cut`, `workspace-menu-hardcut` plans in `.codex/plans/`\n52\t- `../analysis/analysis_local_runs.md` lesson LL-1\n53\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.641729Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01BWtfgH1KcBSgmAA6ai9kmR","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01BWtfgH1KcBSgmAA6ai9kmR","content":"```\n1\t# L-008 — Schema migrations are required even on fresh DBs\n2\t\n3\t**Class:** Persistence\n4\t**Date discovered:** 2026-04-25 (Hermes BUG-002, Critical)\n5\t**Evidence sources:** Hermes BUG-002 + multiple Hermes/autonomy review issues\n6\t\n7\t## Context\n8\t\n9\tThe Hermes track widened the `memory_operation_log` table to add `scope`, `workspace_root`, `filename` columns. The change worked on fresh installs because `internal/memory/catalog.go` boots through `storepkg.EnsureSchema`, which created the new shape. But existing databases kept the old five-column table — `EnsureSchema` skipped existing tables — and `agh memory write` failed on upgrade with `no such column: scope`.\n10\t\n11\tCodeRabbit flagged it as Critical. The fix: schema migration v6 added through the deterministic migrations runner.\n12\t\n13\t## Root cause\n14\t\n15\t`EnsureSchema`-style boot reconciliation has a fundamental gap: it creates tables that don't exist but does not mutate tables that do. Any column/index/constraint addition needs a real migration; a migration is required _even when fresh installs already work_, because upgrade is a first-class scenario in alpha.\n16\t\n17\tA second contributor: AGH had two schema paths — the global DB used a real migrations registry, while the catalog DB used `EnsureSchema`. Drift between these paths produced the bug.\n18\t\n19\t## Rule\n20\t\n21\t\u003e Any change to a SQLite column, index, or constraint MUST add a versioned migration in the migrations registry. `EnsureSchema`-style boot reconciliation is forbidden for column changes. Test fresh-DB AND reopen-after-restart paths.\n22\t\n23\t## Operationalization\n24\t\n25\t- **One schema migration primitive shared by all SQLite databases** (`agh.db`, `events.db`, catalog DBs, automation scheduler state, memory operation log).\n26\t- **Numbered migrations** are recorded in `schema_migrations`. Idempotent. Wrapped in transactions.\n27\t- **Test the upgrade path:** every schema change must include a `Test*FreshDB` test AND a `Test*ReopenAfterRestart` test.\n28\t- **`SQLite recovery code paths must rename or remove `-wal`and`-shm`companions, not only the`.db` file.** (Cross-cutting hygiene: see refac-v2 issue 001 Critical WAL recovery bug.)\n29\t- **Schema-version bump** lives in the migrations registry — do not encode it in code constants.\n30\t\n31\t## Allowed exception\n32\t\n33\tIn greenfield alpha, a hard-cut rename + table rewrite without compat migration is allowed when:\n34\t\n35\t1. The change is documented in the techspec's \"Delete Targets\" section.\n36\t2. All callers of the old shape are deleted in the same change.\n37\t3. Per-developer wipe of local SQLite is acceptable cost.\n38\t\n39\t## Anti-pattern\n40\t\n41\t- `CREATE TABLE IF NOT EXISTS new_columns ...` then expecting the table to grow.\n42\t- Schema-version constants in Go code that don't match the migrations registry.\n43\t- Tests that only cover fresh-DB.\n44\t\n45\t## Source\n46\t\n47\t- `.codex/ledger/2026-04-25-MEMORY-hermes-qa-execution.md` (BUG-002)\n48\t- `.compozy/tasks/hermes/reviews-001/issue_020.md` (Critical)\n49\t- `.compozy/tasks/refac-v2/reviews-001/issue_001.md` (WAL/SHM Critical)\n50\t- `.compozy/tasks/autonomous/memory/task_07.md` (claim/lease schema v7)\n51\t- `../analysis/analysis_global_runs.md` lesson L1, `../analysis/analysis_local_runs.md` lesson LL-2\n52\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.64211Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01JKhk8G1oyykbtHRqJdkaBS","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01JKhk8G1oyykbtHRqJdkaBS","content":"```\n1\t# L-007 — E2E harness regressions follow runtime contract changes\n2\t\n3\t**Class:** Testing\n4\t**Date discovered:** 2026-04-26 (autonomy task_18 QA)\n5\t**Evidence sources:** task_18 BUG-001/002/003 + global_runs\n6\t\n7\t## Context\n8\t\n9\tThe autonomy MVP `make verify` passed. Real-scenario QA via `real-scenario-qa` then surfaced three Playwright/E2E regressions:\n10\t\n11\t- **BUG-001** — workspace onboarding race in the web E2E `TC-AUTO-015` test; absent shared workspace-onboarding wait helper.\n12\t- **BUG-002** — `acpmock` exact-match canonicalization for situation-augmented prompts. Task 04 added a situation-context augmenter that changed the rendered prompt; the deterministic ACP mock fixture matcher still expected the pre-Task-04 shape.\n13\t- **BUG-003** — Tasks browser E2E asserting an empty Agents-panel state; manual-first publish actually rendered an active run.\n14\t\n15\tAll three were rooted in tests written against an _older_ runtime contract. None was a production bug.\n16\t\n17\t## Root cause\n18\t\n19\tWhen a runtime contract changes — a new prompt augmenter, a different fixture canonicalization, a new manual-first UI state — the deterministic test infrastructure (acpmock fixtures, Playwright selectors, browser fixtures) embeds the _old_ contract. `make verify` passes because tests still hit their old expectations. Real-scenario QA exposes the drift.\n20\t\n21\t## Rule\n22\t\n23\t\u003e When a runtime contract changes (prompt augmenter, situation context, fixture format, manual-first UI state), the E2E mock and matchers ship in the same PR. Do not let the test infrastructure encode a stale contract.\n24\t\n25\t## Operationalization\n26\t\n27\t- For ACP fixture work: replace fragile string-matching with structured prompt metadata. acpmock uses typed metadata, not rendered prompt substrings.\n28\t- For Playwright E2E: add shared wait helpers (`web/e2e/fixtures/selectors.ts`) for workspace onboarding, session creation, manual-first publish. New runtime states require helper updates in the same PR.\n29\t- Real-scenario QA is the canonical regression net. `make verify` is necessary but not sufficient.\n30\t- E2E regressions surfaced in the QA pass are NOT production bugs unless they reveal divergent runtime behavior. Fix the test infrastructure, not the runtime.\n31\t\n32\t## Anti-pattern\n33\t\n34\t- Adding a `time.Sleep(2 * time.Second)` to \"stabilize\" a flaky Playwright spec.\n35\t- Loosening an acpmock matcher to substring instead of metadata.\n36\t- Skipping E2E in the QA pass because \"the unit tests cover it.\"\n37\t\n38\t## Source\n39\t\n40\t- `.compozy/tasks/autonomous/memory/task_18.md`\n41\t- `.compozy/tasks/autonomous/qa/verification-report.md`\n42\t- `.compozy/tasks/autonomous/qa/issues/BUG-001.md`, `BUG-002.md`, `BUG-003.md`\n43\t- `.codex/plans/2026-04-17-e2e-confidence-hardening.md` — root-cause plan for the structured-metadata switch\n44\t- `../analysis/analysis_compozy_tasks.md` (task_18 findings), `../analysis/analysis_global_runs.md` (autonomy QA section)\n45\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:42.642713Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_0197qmzs5CcF8J7Dqn4uU3t2","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_0197qmzs5CcF8J7Dqn4uU3t2","content":"```\n1\t# L-014: Runtime Vocabulary Must Match Public Contracts\n2\t\n3\t**Class:** Architecture / Product vocabulary\n4\t\n5\t## Incident\n6\t\n7\tThe execution isolation feature was implemented and exposed as \"environments\" even though the product concept was Sandbox. The mismatch appeared across internal packages, config keys, database columns, API fields, CLI flags, hook topics, extension Host API methods, web navigation, settings UI, generated docs, and task artifacts.\n8\t\n9\tThe feature was also under-documented: the landing page did not explain Sandbox, runtime docs did not have a dedicated Sandbox section, and the web UI hid the feature under Settings instead of giving it primary navigation.\n10\t\n11\t## Root cause\n12\t\n13\tThe first implementation used an overloaded infrastructure term and let it harden into contracts before product vocabulary was settled. Once code, docs, generated references, and QA artifacts diverged, the feature became difficult to discover and easy to misrepresent.\n14\t\n15\t## Fix / Rule\n16\t\n17\tPublic runtime concepts need one canonical noun before they reach contracts. If the noun changes during greenfield alpha, perform one hard cut across every public and internal surface in the same change:\n18\t\n19\t- runtime packages, storage columns, config keys, generated contracts, CLI flags, hook names, Host API methods, logs, and tests\n20\t- web routes, navigation, page copy, mocks, route tests, and generated client types\n21\t- landing pages, docs navigation, runtime guides, generated CLI docs, and examples\n22\t- `CLAUDE.md` / `AGENTS.md`, `.compozy/tasks/*` artifacts, glossary, and lessons\n23\t\n24\tDo not keep aliases, redirects, dual fields, or fallback parsing for the obsolete product noun. Generic operating-system terminology remains valid when it describes process environment variables or host context.\n25\t\n26\t## Evidence\n27\t\n28\t- Accepted implementation plan: `.codex/plans/2026-04-28-sandbox-hard-cut.md`\n29\t- Runtime package hard cut: `internal/sandbox/`\n30\t- Public API/schema source: `internal/api/spec/spec.go`\n31\t- Web IA hard cut: `web/src/routes/_app/sandbox.tsx` and `web/src/components/app-sidebar.tsx`\n32\t- Dedicated docs: `packages/site/content/runtime/core/sandbox/index.mdx`\n33\t- Landing page surface: `packages/site/components/landing/sandbox-section.tsx`\n34\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:46.28366Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:46.284734Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01PHh1ZwBNniXzDCVzDAdFV5","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01PHh1ZwBNniXzDCVzDAdFV5","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:46.405398Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01PHh1ZwBNniXzDCVzDAdFV5","blocks":[{"type":"tool_use","id":"toolu_01PHh1ZwBNniXzDCVzDAdFV5","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-013-prd-must-not-name-implementation.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-013-prd-must-not-name-implementation.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-013-prd-must-not-name-implementation.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:46.406379Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01EHxNqmsB4NoPZJERTUGzQz","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01EHxNqmsB4NoPZJERTUGzQz","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:47.172428Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01EHxNqmsB4NoPZJERTUGzQz","blocks":[{"type":"tool_use","id":"toolu_01EHxNqmsB4NoPZJERTUGzQz","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-003-task-runs-single-queue.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-003-task-runs-single-queue.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-003-task-runs-single-queue.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:47.172697Z","update":{"kind":"tool_call_started","toolCallId":"toolu_018Bxjmvf44G6APkJ9vbFMEu","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_018Bxjmvf44G6APkJ9vbFMEu","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:47.831569Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_018Bxjmvf44G6APkJ9vbFMEu","blocks":[{"type":"tool_use","id":"toolu_018Bxjmvf44G6APkJ9vbFMEu","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-004-manual-equals-peer.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-004-manual-equals-peer.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/docs/_memory/lessons/L-004-manual-equals-peer.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:47.832228Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01KGovCozwko6ybBy9K98eZD","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01KGovCozwko6ybBy9K98eZD","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:48.886023Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01KGovCozwko6ybBy9K98eZD","blocks":[{"type":"tool_use","id":"toolu_01KGovCozwko6ybBy9K98eZD","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_claude_code_ideas.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_claude_code_ideas.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_claude_code_ideas.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:48.886066Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:48.888309Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01PHh1ZwBNniXzDCVzDAdFV5","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:48.888413Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_018Bxjmvf44G6APkJ9vbFMEu","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:48.888512Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01EHxNqmsB4NoPZJERTUGzQz","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:48.88876Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01KGovCozwko6ybBy9K98eZD","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:48.890036Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01PHh1ZwBNniXzDCVzDAdFV5","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01PHh1ZwBNniXzDCVzDAdFV5","content":"```\n1\t# L-013 — PRD must not name frameworks, storage, error codes, or file formats\n2\t\n3\t**Class:** Spec authoring\n4\t**Date discovered:** 2026-04-18 (todo-api smux pairing run)\n5\t**Evidence sources:** Codex orchestrator prompt template + analysis_codex_sessions\n6\t\n7\t## Context\n8\t\n9\tIn the `todo-api` smux pairing experiment, Pedro built an orchestrator role for one Claude pane that explicitly inspects PRDs and rejects any document that surfaces implementation choices. The orchestrator instruction was: _\"PRD naming frameworks/storage engines/file formats — strip, push to TechSpec.\"_ Pedro adopted this as a generally-applied rule across AGH spec authoring.\n10\t\n11\tA PRD that names `PostgreSQL`, `react-query`, `OAuth 2.0`, `JWT`, `gRPC`, or specific HTTP error codes leaks implementation into the vision document. The implementer reads the PRD with framework constraints already locked in and stops asking \"is this the right shape?\" The TechSpec phase exists exactly to make those decisions — moving them earlier collapses two phases into one and removes the option to choose differently when the architecture surface comes into focus.\n12\t\n13\t## Root cause\n14\t\n15\tLLM-authored PRDs default to \"concrete and useful\" framing because that's how product writing reads online. Real product writing is meant to ship a feature; AGH PRDs feed into a TechSpec that an architecture-aware reviewer will pressure-test. The PRD's job is to constrain the _user-observable_ surface, not the _implementation_ surface.\n16\t\n17\t## Rule\n18\t\n19\t\u003e PRDs frame **what** and **why**, never **how**. PRDs MUST NOT name:\n20\t\u003e\n21\t\u003e - Frameworks (`react`, `next.js`, `tanstack-query`, `gin`, `cobra`, `gorm`).\n22\t\u003e - Storage engines (`PostgreSQL`, `SQLite`, `Redis`, `S3`).\n23\t\u003e - Wire protocols (`gRPC`, `JSON-RPC`, `WebSocket`).\n24\t\u003e - Auth standards (`OAuth 2.0`, `JWT`, `mTLS`, `PKCE`).\n25\t\u003e - File formats (`YAML`, `JSON`, `TOML`, `Protobuf`).\n26\t\u003e - HTTP error codes / status numbers.\n27\t\u003e - SQL schema or column names.\n28\t\u003e - Specific tools (`bun`, `mise`, `goreleaser`).\n29\t\u003e\n30\t\u003e Strip and push to the TechSpec. Approval owner: PRD author.\n31\t\n32\t## Operationalization\n33\t\n34\t`cy-spec-preflight` runs a regex pass over the PRD draft and surfaces any matching tokens. Items found are listed for the author to either justify (rare exception, e.g., when the PRD is _about_ AGH Network's wire format) or strip.\n35\t\n36\tPRDs may name **product surfaces** (CLI verb, web route, doc page) when those are user-observable. They may not name the implementation behind those surfaces.\n37\t\n38\t## Anti-patterns\n39\t\n40\t- \"Use PostgreSQL for the durable queue.\" → strip; TechSpec decides.\n41\t- \"Return 422 when validation fails.\" → strip; TechSpec decides.\n42\t- \"JWT-based session tokens.\" → strip; TechSpec decides.\n43\t- \"React-query mutation for the publish action.\" → strip; TechSpec decides.\n44\t- \"Store config in a YAML file.\" → strip; TechSpec decides whether YAML, TOML, or sidecar JSON.\n45\t\n46\t## Allowed exceptions\n47\t\n48\t- AGH Network protocol PRDs that are _about_ wire format (capability envelopes, NATS subjects). The protocol IS the user-observable surface for that PRD.\n49\t- AGENT.md / MEMORY.md / SKILL.md PRDs where the file format is the product.\n50\t- PRDs scoped to a specific framework's ergonomics (e.g., a PRD about TanStack Query usage patterns inside `web/`).\n51\t\n52\t## Source\n53\t\n54\t- `~/.codex/sessions/2026/04/18/19-27-52` (smux pairing orchestrator instruction)\n55\t- `../analysis/analysis_codex_sessions.md` §Anti-Patterns (todo-api smux rule)\n56\t- `docs/_memory/_synthesis.md` Top-level Finding 3\n57\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:48.890358Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01EHxNqmsB4NoPZJERTUGzQz","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01EHxNqmsB4NoPZJERTUGzQz","content":"```\n1\t# L-003 — `task_runs` is the single durable work queue\n2\t\n3\t**Class:** Architecture / Autonomy\n4\t**Date discovered:** 2026-04-25 (autonomy techspec, ADR-003)\n5\t**Evidence sources:** 3 ADRs forbid duplication\n6\t\n7\t## Context\n8\t\n9\tDesigning the autonomy MVP, several alternatives proposed a parallel scheduler-owned queue (\"scheduler claims, then asks task service to assign\"). All were rejected.\n10\t\n11\t## Root cause\n12\t\n13\tTwo durable queues for the same work creates two sources of truth for ownership state. Any divergence (lease vs. claim, sweep vs. heartbeat, recovery vs. boot) becomes a race. The existing `task_runs` table already carries `status`, `attempt`, `idempotency_key`, `origin_kind/ref` — adding state via columns and side tables is strictly cheaper than adding a parallel table.\n14\t\n15\t## Rule\n16\t\n17\t\u003e `task_runs` is the single durable work queue. Do not introduce a parallel queue or actor table. Add new ownership/state via columns + side tables on `task_runs`.\n18\t\n19\t## Lease invariants (operationalization)\n20\t\n21\t1. Exactly one active claim token per non-terminal run.\n22\t2. Heartbeat/complete/fail/release compare run owner + claim token.\n23\t3. Stale/late after recovery fails explicitly (no silent reassignment).\n24\t4. Sweep + heartbeat serialize via SQLite tx (`BEGIN IMMEDIATE`).\n25\t5. Boot recovery runs BEFORE the scheduler accepts wake/claim traffic.\n26\t6. Lease extension is bounded by config.\n27\t7. One active lease per session in MVP.\n28\t\n29\t## Side-table strategy\n30\t\n31\t- Capability matching = exact-match rows in `task_run_required_capabilities` and `task_run_preferred_capabilities`.\n32\t- Coordination channels: `coordination_channel_id` column on `task_runs`.\n33\t- Permission narrowing data: side table indexed by `task_run_id`.\n34\t- **Never** stuff dynamic ownership/match state into a JSON metadata blob.\n35\t\n36\t## Source\n37\t\n38\t- `.compozy/tasks/autonomous/adrs/adr-003.md` — Extend Task Runs for Atomic Claim/Lease\n39\t- `.compozy/tasks/autonomous/adrs/adr-004.md` — Split Semantic vs Mechanical Scheduling\n40\t- `.compozy/tasks/autonomous/adrs/adr-010.md` — Manual Operator Control First-Class\n41\t- `../analysis/analysis_compozy_tasks.md` (lesson 1, with 3 ADRs cited)\n42\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:48.890652Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_018Bxjmvf44G6APkJ9vbFMEu","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_018Bxjmvf44G6APkJ9vbFMEu","content":"```\n1\t# L-004 — Manual operator paths converge with autonomous on the same primitives\n2\t\n3\t**Class:** Architecture / Autonomy\n4\t**Date discovered:** 2026-04-25 (autonomy techspec, ADR-010 — most-repeated rule)\n5\t**Evidence sources:** ADR-010 referenced 7+ times across `_techspec.md`\n6\t\n7\t## Context\n8\t\n9\tEarly autonomy drafts treated user-driven flows and agent-spawned flows as separate code paths. Pedro pushed back: \"autonomy is additive, never replacement.\"\n10\t\n11\t## Root cause\n12\t\n13\tSplitting manual and autonomous into \"user mode\" and \"agent mode\" creates two implementations of every safety primitive (claim, lease, heartbeat, complete, fail, release, narrow). Inevitably they drift. Operators end up with weaker invariants than agents (or vice versa), and the system loses the property that operator and agent flows can interleave safely.\n14\t\n15\t## Rule\n16\t\n17\t\u003e Manual operator paths and autonomous paths converge on the same primitives. User-created tasks, automation-created tasks, coordinator-created tasks, and agent-spawned child tasks all use the same task/run model and the same claim-token/lease/heartbeat/complete/fail/release rules.\n18\t\n19\t## Operationalization\n20\t\n21\t- **Task creation alone NEVER enqueues claimable work or starts the coordinator.** Publish/start/approval is the run-enqueue boundary that triggers coordinator bootstrap.\n22\t- **Operator commands are identity-explicit; agent commands are identity-implicit.** Operator endpoints MUST NOT infer agent identity from environment variables.\n23\t- **No separate manual/autonomous/coordinator queues.** All converge on `task_runs` with `actor_kind` differentiating origin.\n24\t- **E2E coverage MUST include both manual-first bookends:**\n25\t 1. `user create → publish → coordinated execution`\n26\t 2. `user-start session → direct prompt without coordinator`\n27\t- **Operator UI must visually distinguish creation vs. publish/approval vs. run enqueue vs. coordinator spawn.**\n28\t\n29\t## Source\n30\t\n31\t- `.compozy/tasks/autonomous/adrs/adr-010.md` — most load-bearing autonomy ADR\n32\t- `.compozy/tasks/autonomous/adrs/adr-005.md` — coordinator triggers (publish/start/approval)\n33\t- `.compozy/tasks/autonomous/adrs/adr-002.md` — agent-facing CLI before MCP\n34\t- `.compozy/tasks/autonomous/_techspec.md` (Manual Control Contract section)\n35\t- `../analysis/analysis_compozy_tasks.md`, `../analysis/analysis_existing_surfaces.md`\n36\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:48.891306Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01KGovCozwko6ybBy9K98eZD","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01KGovCozwko6ybBy9K98eZD","content":"```\n1\t# Local Ideas Cross-Reference: docs/ideas/from-claude-code\n2\t\n3\t## Overview\n4\t\n5\tThe `docs/ideas/from-claude-code` folder contains prior Claude Code analyses and a filtered recommendation document. The most important point is a tension:\n6\t\n7\t- Earlier filtering said Claude Code Tool Search and streaming tool execution were \"not relevant\" because AGH manages external ACP runtimes and does not make LLM API calls directly.\n8\t- A Tool Registry feature changes part of that conclusion. If AGH introduces daemon-owned, agent-callable tools, then search, progressive schema disclosure, permission ordering, result budgets, and tool metadata become relevant again.\n9\t\n10\tThe TechSpec should be explicit about this boundary. AGH should not pretend it controls every provider's internal tool loop, but it can own a registry for AGH-native tools and extension-provided tools exposed through AGH surfaces.\n11\t\n12\t## Relevant Ideas\n13\t\n14\t### Canonical tool contract\n15\t\n16\t`analysis_tool_system.md` proposes a Go `Tool` shape with name, description, schema, permission checks, execution, classifier input, and result limits. This aligns with the competitor research, but AGH should avoid making security classifier behavior mandatory in the first registry layer.\n17\t\n18\tThe stronger AGH version should split:\n19\t\n20\t- descriptor fields used for discovery and schema projection,\n21\t- policy metadata used for authorization,\n22\t- availability checks used for live health,\n23\t- handler/adapters used for dispatch,\n24\t- result policy used for truncation, persistence, redaction, and telemetry.\n25\t\n26\t### Tool Search and deferred loading\n27\t\n28\tThe document proposes an `eager`, `deferred`, and `discovered` registry with `Search(query)` returning tool references. That is useful if AGH has large tool universes from MCP, extensions, skills, network peers, and built-ins.\n29\t\n30\tAGH should adapt this provider-neutrally:\n31\t\n32\t- `agh__tool_search` returns descriptors and optional schema handles.\n33\t- CLI/HTTP/UDS search returns the same data.\n34\t- ACP/model-specific `tool_reference` integration is optional and provider-dependent.\n35\t- The registry can still compute deltas and persist \"discovered in this session\" state later.\n36\t\n37\t### Result persistence and budgets\n38\t\n39\tThe Claude Code analysis highlights per-tool max result sizes and disk persistence for large outputs. AGH needs the same concept because tool results may be delivered into session transcripts, HTTP responses, UDS clients, or agent-visible messages.\n40\t\n41\tThe registry should define:\n42\t\n43\t- `MaxResultBytes` or a default by risk/source class,\n44\t- preview strategy,\n45\t- artifact persistence target,\n46\t- redaction path,\n47\t- telemetry fields for result bytes and persisted artifact id.\n48\t\n49\t### Permission and security validators\n50\t\n51\tThe local idea files discuss command-specific validators, classifier input projection, dangerous pattern registries, and bash-specific semantics. Those are important for shell tools, but they should not block the Tool Registry foundation.\n52\t\n53\tFor MVP, the registry should provide a hook point:\n54\t\n55\t- a tool can declare `RiskClass`, `OpenWorld`, `Destructive`, and `RequiresUserInteraction`;\n56\t- a policy engine can decide allow/deny/ask;\n57\t- specialized tools such as shell/url/browser can later plug in validators.\n58\t\n59\t### Prompt and catalog deltas\n60\t\n61\t`analysis_prompt_architecture.md` discusses enabled-tools-aware prompt sections and delta attachment patterns. The relevant AGH takeaway is that catalog changes should be incremental and explicit. For ACP runtimes that cannot accept live tool deltas, AGH should clearly fall back to \"visible on next session.\"\n62\t\n63\t### Streaming executor and concurrency\n64\t\n65\t`analysis_query_engine.md` includes concurrency-safe vs exclusive tool execution. AGH should keep the metadata and enforce it at registry dispatch, but not copy a direct model streaming executor unless AGH owns a provider's query loop.\n66\t\n67\t### Plugin system references\n68\t\n69\t`analysis_services_infra.md` describes plugin refresh, availability, hooks, and plugin error taxonomy. This supports an AGH registry model where extension tools have lifecycle state and refresh reasons. AGH should convert extension sidecar and MCP health into availability reasons rather than exposing raw plugin errors to agents.\n70\t\n71\t## Filtered Recommendation Reversal\n72\t\n73\t`filtered_recommendations.md` says Tool Search, streaming execution, bash classifiers, and API tool loops are not relevant because AGH does not make LLM API calls. That remains true for driver-internal tools.\n74\t\n75\tHowever, the Tool Registry feature is not about controlling Claude Code's own tools. It is about creating AGH-owned tools that are:\n76\t\n77\t- discoverable through AGH,\n78\t- governed by AGH,\n79\t- executable through AGH,\n80\t- extensible by AGH extensions,\n81\t- visible to agents regardless of ACP runtime when an adapter exists.\n82\t\n83\tTherefore:\n84\t\n85\t- Do not copy Claude Code's provider API request mechanics into the MVP.\n86\t- Do copy the registry/search/permission/result architecture where AGH owns the tool surface.\n87\t\n88\t## Relevant Code / Document Paths\n89\t\n90\t- `docs/ideas/from-claude-code/analysis_tool_system.md:450-620`: deferred registry, security validator pipeline, classifier input projection, dangerous pattern registry, and key file references.\n91\t- `docs/ideas/from-claude-code/filtered_recommendations.md:1-38`: architectural warning that AGH is an orchestration kernel, not a direct LLM API loop.\n92\t- `docs/ideas/from-claude-code/analysis_prompt_architecture.md`: prompt sections and tool-aware catalog deltas.\n93\t- `docs/ideas/from-claude-code/analysis_query_engine.md`: tool execution concurrency and exclusive scheduling references.\n94\t- `docs/ideas/from-claude-code/analysis_services_infra.md`: plugin refresh and availability ideas.\n95\t\n96\t## Transferable Patterns\n97\t\n98\t1. Build a provider-neutral registry search API first.\n99\t2. Add deferred schema loading as an AGH catalog behavior, not as an Anthropic-only assumption.\n100\t3. Track per-tool result budgets and persisted artifacts.\n101\t4. Keep security validators pluggable by tool family.\n102\t5. Model concurrency metadata even before advanced scheduling.\n103\t6. Treat live catalog deltas as optional driver capabilities with a fallback.\n104\t\n105\t## Risks / Mismatches\n106\t\n107\tAGH should not duplicate the ACP runtime's internal shell/browser/file tools unless there is a clear cross-runtime AGH-owned reason.\n108\t\n109\tAGH should not depend on Claude Code-only `tool_reference` wire formats for the core registry contract.\n110\t\n111\tAGH should not make shell command classifiers part of the foundation unless the MVP includes an AGH-owned shell tool.\n112\t\n113\tAGH should not over-inject tool catalogs into prompts. Progressive disclosure and search should be preferred once the tool universe grows.\n114\t\n115\t## Open Questions\n116\t\n117\t1. Should AGH's first registry search surface be `agh__tool_search` as an agent-callable tool, `agh tool search` as CLI, or both?\n118\t2. Should catalog delta support be designed now even if only a subset of drivers implement it?\n119\t3. Should result persistence share AGH's session event/artifact store or get a dedicated tool-result artifact store?\n120\t\n121\t## Evidence\n122\t\n123\t- `docs/ideas/from-claude-code/analysis_tool_system.md:450-620`: local implementation sketches for deferred registry, validators, result storage, and key Claude Code paths.\n124\t- `docs/ideas/from-claude-code/filtered_recommendations.md:1-38`: explicit \"AGH is not the LLM API loop\" constraint.\n125\t- `docs/ideas/from-claude-code/filtered_recommendations.md:96-134`: skills activation and prompt assembly ideas that intersect with registry progressive disclosure.\n126\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:51.016521Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:51.021273Z","update":{"kind":"tool_call_started","toolCallId":"toolu_019shNExKAgUgEkJEh5hx6x5","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_019shNExKAgUgEkJEh5hx6x5","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:52.534006Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_019shNExKAgUgEkJEh5hx6x5","blocks":[{"type":"tool_use","id":"toolu_019shNExKAgUgEkJEh5hx6x5","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_openclaw.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_openclaw.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_openclaw.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:52.535527Z","update":{"kind":"tool_call_started","toolCallId":"toolu_01QMu2rnVmRYQbQyktjz2bFv","toolCallState":"pending","blocks":[{"type":"tool_use","id":"toolu_01QMu2rnVmRYQbQyktjz2bFv","name":"Read","title":"Read File","rawInput":{}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:52.881646Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01QMu2rnVmRYQbQyktjz2bFv","blocks":[{"type":"tool_use","id":"toolu_01QMu2rnVmRYQbQyktjz2bFv","name":"Read","title":"Read /Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_goclaw.md","input":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_goclaw.md"},"rawInput":{"file_path":"/Users/pedronauck/Dev/compozy/agh/.compozy/tasks/tools-registry/analysis/analysis_goclaw.md"}}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:52.881688Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:52.882891Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_019shNExKAgUgEkJEh5hx6x5","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:52.883092Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01QMu2rnVmRYQbQyktjz2bFv","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:52.884457Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_019shNExKAgUgEkJEh5hx6x5","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_019shNExKAgUgEkJEh5hx6x5","content":"```\n1\t# Competitor Analysis: OpenClaw Tool and Plugin Registry\n2\t\n3\t## Overview\n4\t\n5\tOpenClaw's strongest contribution is its two-phase extension model: manifest-first static discovery followed by runtime materialization. Plugin manifests can declare tool contracts, configuration, activation hints, and ownership metadata without immediately executing plugin code. Runtime tool registration then materializes concrete tools through plugin APIs or MCP adapters.\n6\t\n7\tThis is highly relevant to AGH because AGH already lets extension manifests publish static `resources.tools`, but lacks a runtime execution boundary and a registry that can connect static declarations to callable backends.\n8\t\n9\t## Mechanisms / Patterns\n10\t\n11\tOpenClaw reads plugin manifests first. Static `contracts.tools`, config schema, activation hints, ownership, and duplicate diagnostics are available before runtime code is loaded. This supports cheap discovery, trust decisions, and policy projection.\n12\t\n13\tRuntime tools are registered through `api.registerTool` as either concrete tools or factories receiving a plugin tool context. This avoids global singleton state and gives each tool access to scoped runtime services.\n14\t\n15\tTool materialization is policy-filtered. Core and plugin tools are assembled, then filtered by profile/provider/global/agent/group/sandbox/subagent rules. Plugin owner metadata supports policy by plugin id or broad plugin groups.\n16\t\n17\tOpenClaw treats MCP as a provider backend. Bundle/user MCP configs are connected over stdio, SSE, or streamable HTTP, tools are listed and sanitized, calls are wrapped, and per-session runtime instances have idle TTL and fingerprint invalidation.\n18\t\n19\tOpenClaw also exposes a direct HTTP gateway for tool invocation. This is useful as a manageability pattern, but AGH should only expose direct invoke through strict local authorization and the same registry dispatch pipeline.\n20\t\n21\t## Relevant Code Paths\n22\t\n23\t- `.resources/openclaw/src/plugins/manifest.ts:250-367`: manifest contract shape.\n24\t- `.resources/openclaw/src/plugins/manifest.ts:539-583`: manifest validation and duplicate handling.\n25\t- `.resources/openclaw/src/plugins/manifest.ts:1161-1251`: manifest registry integration.\n26\t- `.resources/openclaw/src/plugins/manifest-registry.ts:303-379`: plugin manifest discovery.\n27\t- `.resources/openclaw/src/plugins/manifest-registry.ts:640-805`: precedence/diagnostics behavior.\n28\t- `.resources/openclaw/src/plugins/types.ts:2209-2353`: plugin API and tool registration types.\n29\t- `.resources/openclaw/src/plugins/tool-types.ts:8-45`: tool type definitions.\n30\t- `.resources/openclaw/src/plugins/registry.ts:421-446`: plugin registry access patterns.\n31\t- `.resources/openclaw/src/plugins/registry.ts:1464-1557`: activation/materialization path.\n32\t- `.resources/openclaw/src/plugins/tools.ts:111-239`: tool assembly from plugins.\n33\t- `.resources/openclaw/src/agents/pi-tools.ts:585-656`, `673-727`: policy-filtered agent tool projection.\n34\t- `.resources/openclaw/src/agents/tool-policy.ts:102-164`: tool policy model.\n35\t- `.resources/openclaw/src/agents/tool-policy-pipeline.ts:36-147`: policy pipeline.\n36\t- `.resources/openclaw/src/agents/pi-bundle-mcp-runtime.ts:181-575`: MCP runtime sessions and lifecycle.\n37\t- `.resources/openclaw/src/agents/pi-bundle-mcp-materialize.ts:64-174`: MCP tool materialization.\n38\t- `.resources/openclaw/docs/gateway/tools-invoke-http-api.md:11-146`: direct tool invocation gateway.\n39\t\n40\t## Transferable Patterns\n41\t\n42\tAGH should keep manifest-first discovery. Extension `resources.tools` should remain static and cheap to inspect. Runtime code should not be needed to list declared tools.\n43\t\n44\tAGH should attach structured owner/provenance metadata to every tool: source kind, source id, namespace, extension id, MCP server id, bundle id, trust tier, and conflict state.\n45\t\n46\tAGH should materialize tools through factories/adapters with a scoped context rather than globals. Built-ins receive daemon services; MCP tools receive server clients; extension tools receive a Host API or sidecar client.\n47\t\n48\tAGH should support extension-level grants and expand them into explicit tool permissions. \"Allow extension X\" should resolve to the tool ids owned by extension X at a specific registry generation.\n49\t\n50\tAGH should isolate failures. If an extension sidecar is unhealthy, its tools should become unavailable with reasons without breaking the full registry.\n51\t\n52\tAGH should cache materialized context-specific views. Tool projection can be hot-path work for session starts, catalog queries, and live deltas.\n53\t\n54\tAGH should expose direct invocation only through the same registry dispatch pipeline and only on local trusted surfaces such as UDS/daemon-authenticated HTTP.\n55\t\n56\t## Risks / Mismatches\n57\t\n58\tOpenClaw manifests are broad. AGH should keep the MVP manifest addition small and avoid a general plugin DSL inside the tool registry workstream.\n59\t\n60\tSome OpenClaw discovery modes may still execute plugin code. AGH should make manifest-first discovery a hard rule for untrusted extensions.\n61\t\n62\tGlobal tool names create collision pressure. AGH should require stable namespaced ids and optionally expose short display names.\n63\t\n64\t`optional: true` style availability is too coarse for AGH. Availability should distinguish not installed, disabled, unauthorized, unhealthy, dependency missing, config missing, sandbox denied, and conflict.\n65\t\n66\tTrusted bundled-only policies are insufficient for AGH's extension story. Trust tier and execution boundary should both be explicit.\n67\t\n68\t## Open Questions\n69\t\n70\t1. Should AGH extension tools declare a backend kind in the manifest, such as `mcp`, `host_api`, or `subprocess`?\n71\t2. Should extension-owned tools be disabled until an operator grants the extension's requested tool family?\n72\t3. Should AGH allow extension-private tools that only that extension can call?\n73\t4. How should AGH represent duplicate names: hard error, namespaced id only, or visible conflict diagnostics?\n74\t\n75\t## Evidence\n76\t\n77\t- `.resources/openclaw/src/plugins/manifest.ts:250-367`, `539-583`, `1161-1251`: manifest-first contracts and validation.\n78\t- `.resources/openclaw/src/plugins/manifest-registry.ts:303-379`, `640-805`: discovery and precedence diagnostics.\n79\t- `.resources/openclaw/src/plugins/types.ts:2209-2353`: runtime plugin tool registration API.\n80\t- `.resources/openclaw/src/plugins/tool-types.ts:8-45`: tool type shape.\n81\t- `.resources/openclaw/src/plugins/tools.ts:111-239`: plugin tool assembly.\n82\t- `.resources/openclaw/src/agents/pi-tools.ts:585-656`, `673-727`: agent projection.\n83\t- `.resources/openclaw/src/agents/tool-policy.ts:102-164`: policy model.\n84\t- `.resources/openclaw/src/agents/tool-policy-pipeline.ts:36-147`: policy pipeline.\n85\t- `.resources/openclaw/src/agents/pi-bundle-mcp-runtime.ts:181-575`: MCP runtime.\n86\t- `.resources/openclaw/src/agents/pi-bundle-mcp-materialize.ts:64-174`: MCP materialization.\n87\t- `.resources/openclaw/docs/gateway/tools-invoke-http-api.md:11-146`: direct invoke gateway.\n88\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:52.884857Z","update":{"kind":"tool_call_updated","toolCallId":"toolu_01QMu2rnVmRYQbQyktjz2bFv","toolCallState":"completed","blocks":[{"type":"tool_result","toolUseId":"toolu_01QMu2rnVmRYQbQyktjz2bFv","content":"```\n1\t# Competitor Analysis: GoClaw Tool Registry\n2\t\n3\t## Overview\n4\t\n5\tGoClaw is the closest Go-native reference. It has a runtime `tools.Tool` interface with executable behavior, a registry that owns aliases, metadata, disabled state, rate limiting, grouping, deferred activation, and an executor/policy layer. It also adapts MCP tools into the same local interface.\n6\t\n7\tThe useful pattern for AGH is not GoClaw's exact `map[string]any` API. It is the separation between executable tool contract, registry metadata, run-scoped context, policy filtering, MCP adaptation, and dispatch hooks.\n8\t\n9\t## Mechanisms / Patterns\n10\t\n11\tGoClaw's core tool contract includes `Name`, `Description`, `Parameters`, and `Execute(context.Context, map[string]any) *Result`. This is weaker than the typed API AGH should build, but it proves the right direction: tools are executable handles, not only metadata records.\n12\t\n13\tThe registry owns tools, metadata, aliases, disabled state, rate limiting, result scrubbing, groups, and deferred activation. `ExecuteWithContext` injects run-scoped data through `context.Context` instead of mutating shared tool instances.\n14\t\n15\tThe policy engine filters tools through global, provider, agent, group, capability, subagent, and sandbox rules. Lazy/deferred activation rechecks policy before exposing or using tools.\n16\t\n17\tMCP bridge tools implement the same interface. The MCP adapter converts remote tools into local handles, rechecks grants at runtime, and normalizes results.\n18\t\n19\tHooks are lifecycle gates, not tools. `pre_tool_use` can block tool execution, while post hooks can observe and mutate limited fields. This maps well to AGH's existing hook payloads.\n20\t\n21\tSkills are exposed partly through searchable artifacts and marker/no-op tools such as skill search/use. AGH should be cautious here: \"use skill\" can be useful telemetry, but skill content loading should be a real catalog operation, not just a marker.\n22\t\n23\t## Relevant Code Paths\n24\t\n25\t- `.resources/goclaw/internal/tools/types.go:14-129`: core executable tool interface and result types.\n26\t- `.resources/goclaw/internal/tools/registry.go:18-459`: registry, aliases, groups, disabled state, rate limiting, scrubber, and deferred activation.\n27\t- `.resources/goclaw/internal/tools/executor.go`: central execution support.\n28\t- `.resources/goclaw/internal/tools/policy.go:13-520`: multi-layer tool policy and filtering.\n29\t- `.resources/goclaw/internal/tools/capability.go`: capability inference and policy integration.\n30\t- `.resources/goclaw/internal/tools/result.go`: result model.\n31\t- `.resources/goclaw/internal/mcp/bridge_tool.go:42-155`: MCP bridge implements local tool contract.\n32\t- `.resources/goclaw/internal/mcp/manager.go:318-515`: MCP manager and tool lifecycle.\n33\t- `.resources/goclaw/internal/mcp/grant_checker.go:46-129`: runtime grant checks.\n34\t- `.resources/goclaw/internal/mcp/mcp_tool_search.go:67-101`: MCP tool search support.\n35\t- `.resources/goclaw/internal/agent/loop_tool_filter.go:22-96`: agent loop filtering.\n36\t- `.resources/goclaw/internal/pipeline/tool_stage.go:51-152`: pipeline stage around tool execution.\n37\t- `.resources/goclaw/internal/hooks/types.go:19-44`: hook types.\n38\t- `.resources/goclaw/internal/hooks/dispatcher.go:153-318`: hook dispatch and mutation.\n39\t- `.resources/goclaw/migrations/000001...:478-499`, `000027...:230-245`: custom tool storage history.\n40\t\n41\t## Transferable Patterns\n42\t\n43\tAGH should promote `tools.Tool` from a record to a runtime contract, but with stronger types than GoClaw. Prefer `json.RawMessage` plus schema validation and typed `ToolResult` over unconstrained `map[string]any` crossing every boundary.\n44\t\n45\tAGH should pass per-call/session/workspace/user information through a `ToolCallContext` or context-bound immutable values, not by mutating registry entries.\n46\t\n47\tAGH should keep external adapters under the same registry: MCP tools, extension sidecar tools, and future bridge tools should all be executable through `Registry.Call`.\n48\t\n49\tAGH should implement dynamic groups/toolsets as policy inputs, not as separate registries.\n50\t\n51\tAGH should recheck grants at runtime even when discovery already filtered a tool.\n52\t\n53\tAGH should treat hooks as gates around dispatch, not as an alternative dispatch surface.\n54\t\n55\tAGH should persist metadata separately from executable code. Installed or extension-provided tool records can remain in resource storage while executable backends are resolved at runtime.\n56\t\n57\t## Risks / Mismatches\n58\t\n59\tGoClaw's `map[string]any` API is weak for AGH. AGH already has JSON schema resources and generated API contracts, so it should preserve schema validation and raw JSON boundaries.\n60\t\n61\tSome GoClaw paths can bypass pre-tool hooks in parallel execution. AGH should enforce one central dispatch pipeline regardless of whether tools run concurrently.\n62\t\n63\tShared registry state can leak per-user or per-session MCP availability if not scoped carefully. AGH should compute context-specific views rather than storing \"available for everyone\" as a global truth.\n64\t\n65\tCapability inference by tool name is brittle. AGH should use explicit metadata and namespaces.\n66\t\n67\tIndividual tool bodies should not own all security logic. The registry must own common gates: permission, availability, schema validation, risk class, hooks, result budget, redaction, telemetry, and concurrency.\n68\t\n69\t## Open Questions\n70\t\n71\t1. Should AGH's runtime registry be global with context-specific projections, or per-session snapshots derived from a global registry?\n72\t2. Should tool concurrency be enforced by a registry scheduler or by individual handlers returning metadata?\n73\t3. How should AGH persist disabled state and operator overrides for extension tools?\n74\t4. Should AGH include marker tools for skill use telemetry, or should skill view/install/load actions be real tools only?\n75\t\n76\t## Evidence\n77\t\n78\t- `.resources/goclaw/internal/tools/types.go:14-129`: executable Go tool contract.\n79\t- `.resources/goclaw/internal/tools/registry.go:18-459`: registry ownership of tools, aliases, groups, disabled state, and deferred activation.\n80\t- `.resources/goclaw/internal/tools/policy.go:13-520`: multi-layer policy.\n81\t- `.resources/goclaw/internal/mcp/bridge_tool.go:42-155`: MCP adapter as local tool.\n82\t- `.resources/goclaw/internal/mcp/manager.go:318-515`: MCP lifecycle.\n83\t- `.resources/goclaw/internal/mcp/grant_checker.go:46-129`: runtime grant checking.\n84\t- `.resources/goclaw/internal/agent/loop_tool_filter.go:22-96`: agent-specific filtering.\n85\t- `.resources/goclaw/internal/pipeline/tool_stage.go:51-152`: tool execution pipeline.\n86\t- `.resources/goclaw/internal/hooks/dispatcher.go:153-318`: hook-driven blocking/mutation.\n87\t- `.resources/goclaw/migrations/000001...:478-499`, `000027...:230-245`: persisted custom-tool metadata history.\n88\t\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:06:55.356867Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:03.87887Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":""}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:03.879185Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"```json\n{\n \"blockers\": [\n {\n \"id\": \"B-001\",\n \"section\": \"Integ"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:04.12773Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ration Points → Hosted MCP / Implementation Steps step 8 "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:04.510692Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"/ Safety Invariants\",\n \"issue\": \"The hosted MCP exposure"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:04.907704Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" path is described as `agent session -\u003e ACP m"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:05.271577Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"cpServers -\u003e agh tool mcp --session \u003cid\u003e -\u003e UDS -\u003e daemon"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:05.653942Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Tool Registry`, but the TechSpec never specifies how the daemon authen"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:06.036991Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ticates that the spawned `agh tool mcp --session \u003cid\u003e` process is actually authorized to"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:06.427122Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" act on behalf of that session. There is no proof-of-session token"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:06.798277Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":", no per-session UDS handshake, no spawn-time secret b"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:07.176379Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ound to the ACP `mcpServers` config, and no rejection rule for a fo"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:07.556317Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"reign process attaching to another session's projection. "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:07.945336Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ADR-002 explicitly requires the TechSpec to 'define how the hosted MCP server is authorized, scoped to the session/workspace, and prevented from"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:08.336697Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" bypassing registry dispatch'; this is unfulfilled.\",\n \"rationale\": \""}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:08.717172Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"Without a specified auth model, any local process (a"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:09.088577Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" malicious extension sidecar, a co-tenant agent, an unrelated C"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:09.49183Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"LI invocation) that can reach the UDS socket can call `agh tool mcp --session \u003cother_session\u003e` and"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:09.844212Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" obtain the full session-callable tool projection — including any approve-all-"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:10.404862Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"permitted mutating tools, `agh__network_send`, and"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:10.602725Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" bounded `agh__task_*` operations. This is a session-impersonation security reg"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:10.986199Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ression and a direct violation of the security-invariant posture in"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:11.36843Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" `internal/CLAUDE.md` (claim_token redaction is pa"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:11.764583Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ired with session/workspace scoping precisely because UDS reachability is not an authorization)."}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:12.12891Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" It is also exactly what ADR-002 stipulated must be in"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:12.50799Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" the TechSpec.\",\n \"suggested_fix\": \"Add a 'Hosted MCP Authentication' subsection that"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:12.893072Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" specifies: (a) the daemon mints a single-use, h"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:13.403211Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ashed session-bound proxy token at session"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:13.676206Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" creation, never logs the raw value, and pers"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:14.03157Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ists only its hash; (b) the ACP `mcpServers` env / ar"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:14.419814Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"g passes the raw token to the spawned `agh tool mcp` process via inher"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:14.794585Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ited stdio handshake, never via argv; (c) the proxy pres"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:15.177108Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ents the token over UDS in an initial bind RPC and"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:15.61728Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" the daemon ties the resulting projection to that single session_"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:15.963032Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"id; (d) the token is invalidated on session end / disconnect / TTL; (e) ra"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:16.32245Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"w tokens are subject to the same redaction rules as `claim_token`. List"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:16.699886Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" this in Safety Invariants as an additional numbered item.\"\n },\n {\n \"id\": \"B-002\","}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:17.08986Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"\n \"section\": \"Implementation Design → Implementation Steps step 6 / Integration Points → Network And"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:17.498927Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Tasks / ADR-004 alignment\",\n \"issue"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:17.846909Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"\": \"The MVP scope still leaves `agh__task_*` as an open"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:18.226392Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" wildcard. The TechSpec lists only `agh__task_list` and `agh__task_read` in"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:18.605488Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" the canonical examples and the build order says 'bounded `agh__task_*`' without"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:19.110164Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" enumerating the set. ADR-004 explicitly requires the TechSpec to enumerate the bounded oper"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:19.368126Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ations (candidates: list/get/create/update/claim/release/complete) and to map each"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:19.746651Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" one to an existing or planned task primitive. That enumeration is missing.\",\n \"rationale\": \"L-003"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:20.156258Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" / L-004 / L-005 are load-bearing: `task_runs` is the"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:20.508423Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" single durable queue, manual==pe"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:20.89506Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"er must converge on the same primitives, and `task."}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:21.272839Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"Service.ClaimNextRun` is the only authoritative claimer. If the TechSpec "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:21.652706Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ships without naming the exact tools and their auth"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:22.030376Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ority routing, an implementer can plausibly intro"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:22.412523Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"duce `agh__task_claim` or `agh__task_release` that wraps task state m"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:22.843678Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"utations outside `ClaimNextRun`, or expose `agh__task_create` as auto-publ"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:23.298639Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ishing — both of which collapse the autonomy"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:23.817621Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" invariants Pedro shipped on "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:24.380055Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"2026-04-26. This also violates `cy"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:24.46777Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"-tasks-tail-qa-pair"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:24.874521Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"` / Web-Docs-Impact discipline because the Q"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:25.253172Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"A pair has no concrete tool list to test.\",\n \"suggested_fix\": \"Add a "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:25.60414Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"'Bootstrap Task Tools' subsection that names every `agh__task_*` tool in"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:26.071892Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" the MVP, with `read_only`/`destructive`"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:26.481202Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"/`open_world` classification and the exact `intern"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:26.753931Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"al/task` method each one calls. Explicitly state which"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:27.20041Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" task tools are excluded from MVP (e.g., `agh__task_claim`"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:27.590454Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" and `agh__task_complete` are excluded because claim/lease state chang"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:27.964946Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"es belong to `ClaimNextRun` and to the task lifecycle authority, not to a gener"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:28.427933Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ic agent-callable tool). Bind this subsection to a"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:28.744674Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Safety Invariant entry: 'No `agh__task_*` tool may"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:29.176991Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" bypass `task.Service.ClaimNextRun`, `Spawn`, or session"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:29.612568Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" manager terminal-state authority.'\"\n },\n {\n \"id\": \"B"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:30.194707Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"-003\",\n \"section\": \"Integration Points → Hosted MCP / API Endpoints (`appro"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:30.335948Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"val_token`) / ADR-005 integration\",\n \"issue\": \"The TechSpec exposes session"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:30.766679Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"-callable tools through the hosted MCP server, but never specifies how a tool whose `E"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:31.078896Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ffectiveToolDecision.approval_required == true` is actually invoked over MCP. M"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:31.46143Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"CP has no per-call approval prompt, the model has no path to surface `approval_token` over"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:31.844278Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" `tools/call`, and the spec does not say whether such"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:32.318467Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" tools are (a) filtered out of `SessionToolView"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:32.617985Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"` entirely, (b) auto-routed through ACP `session/request_permission`, or (c) silently fail"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:33.116408Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":". The CLI/HTTP `invoke` request has an `approval_token` field"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:33.414371Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"; the hosted MCP path has no equivalent.\",\n \"rationale\": \"ADR-005 is explicit that"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:33.784685Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" approval-required tools must integrate with the ACP `permissions.mode` ceiling and the `session/request_permission"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:34.364135Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"` callback path. Without a specified mechanism, the hosted MCP path bec"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:34.544726Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"omes a silent approval-bypass surface (if approval is auto-sk"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:35.082151Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ipped) or a dead surface (if tools are listed but uncallable). Either outcome violates "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:35.274648Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"'truthful UI \u003e plausible UI"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:35.670569Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"' (CLAUDE.md Design System) at the ag"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:36.032422Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ent surface and creates exactly the contradictory"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:36.514096Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" state ADR-005 was written to prevent. The 'partial-surface completion' rule"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:36.794405Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" in `internal/CLAUDE.md` is tripped here too — C"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:37.173292Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"LI/HTTP gets `approval_token`, MCP doesn't.\",\n \"suggested_fix\": \"Add a 'Hos"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:37.743067Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ted MCP Approval Bridge' subsection that specifies: (a) `SessionToolView` for h"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:37.935758Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"osted MCP filters out tools that require approval AND have no live approval chann"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:38.316055Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"el for this session (default fail-closed); (b) when ACP `session"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:38.744427Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"/request_permission` is available, the daemon issues that request from inside `Regist"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:39.125953Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ry.Call` and blocks the MCP `tools/call` response until approved/"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:39.510027Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"denied/timed-out; (c) when no approval channel is available (deny-all + no"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:39.896863Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" live operator), the MCP call returns `ErrToolAp"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:40.269318Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"provalRequired` with `reason_codes` including `approval_unre"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:40.651123Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"achable`. Add a Safety Invariant: 'A hosted MCP `tools/call` c"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:41.03258Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"annot succeed past `approval_required` without a daemon-mediated approval that sat"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:41.411966Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"isfies the same ACP/session ceiling as a CLI/HTTP `approval_token`.'\"\n },\n "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:41.839281Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"{\n \"id\": \"B-004\",\n \"section\": \"MVP Boundary Statement / Impl"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:42.182704Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ementation Steps step 7-8 / Test Strategy → Integration Tests\",\n \"issue\":"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:42.658515Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" \"The MVP boundary is internally contradictory about extension tool exec"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:42.937893Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ution. The MVP statement says 'extension descriptor projection' (descriptors only)."}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:43.498655Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Step 7 builds 'extension manifest backend metadata and descriptor projection.' Step 8 says 'MCP adapter cont"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:43.78631Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"racts and hosted MCP stdio proxy command' — 'contracts' is ambiguous as to"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:44.139908Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" whether the MCP-backed extension dispatch path actually executes. Yet the integration test plan as"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:44.506841Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"serts 'MCP-backed extension tool remains unavailable until backend health/auth pass"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:44.918128Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"es,' which only makes sense if dispatch is functional. The post-MVP list def"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:45.270483Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ers only `bridge`; `m"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:45.650217Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"cp`, `extension_host`, and `subprocess` are silent. This produces the exact partial-surface "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:46.139554Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ambiguity that L-007 and the no-partial-surface rule in"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:46.447161Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" `internal/CLAUDE.md` forbid.\",\n \"rationale\": \"Either MVP "}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:46.85861Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ships extension tool dispatch (mcp / extension_host / subprocess) or it doesn't —"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:47.176896Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" the spec must commit. If it does ship, step 8's de"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:47.573578Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"liverable list is incomplete (no extension_host adapter, no subprocess adapter, no M"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:48.032604Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"CP call-through specified). If it doesn't, the integration test about"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:48.393406Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" MCP backend health is wrong and the descriptor-only state must"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:48.765602Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" be explicit. Without resolution, implementers will pick whichever inter"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:49.329496Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"pretation is convenient mid-stream and the surface will ship half-built, ex"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:49.482157Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"actly the partial-surface failure mode that breaks `make verify`-pass"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:49.841523Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ing-but-not-actually-working features (L-007).\",\n \"suggested_"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:50.221409Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"fix\": \"Replace the ambiguous wording with a concrete table in 'MVP Boundary Statement':"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:50.60332Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" for each backend kind (`builtin`, `mcp`, `extension_host`, `subprocess`, `b"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:50.986751Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ridge`), state explicitly whether MVP delivers (1) descriptor +"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:51.41228Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" availability state only or (2) full dispatch through `Registry.Call`."}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:51.795145Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Update the test strategy to match. If only some kinds d"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:52.269958Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ispatch in MVP, name the others as 'descriptor-only, returns"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:52.592173Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" `ErrToolUnavailable` with reason `backend_not_implemented` if"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:53.00232Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" invoked' so the surface stays truthful and"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:53.362379Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" the post-MVP work list is explicit.\"\n }\n ],\n \"nits\": [\n {\n \"id\": \"N-001\",\n \"section\": \"Data"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:53.746262Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Models → Toolset\",\n \"issue\": \"`ToolsetID` grammar is never"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:54.128289Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" specified; only example values like `agh__b"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:54.507151Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ootstrap` and `linear__read`"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:55.002104Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" appear.\",\n \"suggested_fix\": \"State that `ToolsetID` shares"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:55.26999Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" the canonical `ToolID` grammar (lowercase ASCII segments separ"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:55.662981Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ated by `__`, max 64 chars) and is validated by"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:56.033398Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" the same parser.\"\n },\n {\n \"id\": \"N-002\",\n \"section\": \"Config Lifecycle → Ag"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:56.44496Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ent Definitions\",\n \"issue\": \"Wildcard / pattern grammar for `agent."}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:56.892142Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"tools`, `agent.deny_tools`, and registry all"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:57.187547Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ow/deny is referenced but never formally defined.\",\n \"suggested_fix\": \"Add"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:57.556204Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" a 'Tool Pattern Grammar' subsection: allowed forms (`agh__sk"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:57.936656Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ill_*`, `mcp__github__*`, exact IDs), disallowed forms (reg"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:58.427971Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ex, `*__suffix`, mid-segment wildcards), and mat"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:58.725077Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ching semantics against the canonical `ToolID`.\"\n },\n {\n \"id\": \"N-003\",\n \"section\": \"API Endpoints → Inv"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:59.109728Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"oke request\",\n \"issue\": \"`approval_token` is introduced without spec"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:59.462045Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ifying its format, lifetime, source, or relationship to cla"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:09:59.861997Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"im_token redaction rules.\",\n \"suggested_fix\": \"Define `approval_token` as an opaque local"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:00.226667Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"-approval reference issued by the daemon's approval surface,"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:00.668626Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" hashed at rest, redacted in"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:01.139926Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" logs/SSE/events, and unrelated to `claim_token"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:01.399191Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"` (separate code path).\"\n },\n {\n \"id\": \"N-004\",\n \"section\": \"Data"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:02.263344Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Models → SourceRef\",\n \"issue\": \"`dynamic` source kind is enumerated but has"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:02.283371Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" no MVP producer (synthesis explicitly not"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:02.509068Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"es the existing enum has no producer).\",\n \"suggested_fix\": \"Either"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:02.890841Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" remove `dynamic` from the MVP enum (greenfield-del"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:03.273731Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ete) or note explicitly that it is reserved with no M"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:03.657502Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"VP producer and has no validation surface yet.\"\n },\n {\n \"id\": \"N-005\",\n \"section\": \"Integ"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:04.052625Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ration Points → Hosted MCP\",\n \"issue\": \"Lifecycle of the hosted MCP proxy process"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:04.414595Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" (per-session vs long-lived, stdio ownership,"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:04.794116Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" restart on session resume) is undefined.\",\n \"suggested_fix\": \"Add a one-pa"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:05.176991Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ragraph lifecycle: stdio proxy is spawned by the ACP run"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:05.557129Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"time per `mcpServers` entry, scoped to one session, ex"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:05.938747Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"its when stdio closes, reconnects to the same session on `session/load`.\""}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:06.317734Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"\n },\n {\n \"id\": \"N-006\",\n \"section\": \"Integration Points → Skills\",\n \"issue\": \"`agh__skill_view"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:06.808854Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"` returns full skill content but interaction with `default_max"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:07.109868Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"_result_bytes` (262144) is not specified — large skill"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:07.524285Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" bodies will silently truncate.\",\n \"suggested_fix\": \"State that `agh__skill_view` truncates per"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:07.844064Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" registry result budget and returns `truncated=true` plus a"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:08.22534Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" typed `next_offset`/artifact reference, or raises its"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:08.620178Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" descriptor's `max_result_bytes` to the documented skill body"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:08.988749Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" cap.\"\n },\n {\n \"id\": \"N-007\",\n \"section\": \"Integration Points → Hooks / Implementation Steps step"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:09.36598Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" 11 / Delete Targets\",\n \"issue\": \"Hook payload identity"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:09.795612Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" migration (`tool_name`+`tool_namespace` → `tool_id"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:10.143541Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"`) is a contract change; co-shipping the hook tests /"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:10.508555Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" extension-author docs / fixture matchers in the"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:10.888566Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" same PR is implied but not enforced (L-007 risk).\",\n \"sugg"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:11.393678Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ested_fix\": \"Add an explicit bullet under step 11: 'Update typ"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:11.740852Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ed hook payloads, hook matchers, fixture builders, extension-author docs, and codegen in"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:12.059095Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" the same change; no dual identity allowed m"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:12.678177Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"id-PR.'\"\n },\n {\n \"id\": \"N-008\",\n \"section\": \"Config Lifecycle → Agent Definitions\",\n \"issue\": \"Existing `S"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:12.855967Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"essionPermissionPolicy.Tools` rows"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:13.277073Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" from the autonomy MVP (shipped 2026-04-26)"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:13.555781Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" may not match the new canonical `ToolID` grammar; the spec says impl"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:13.935489Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ementation should 'validate' these atoms but"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:14.317969Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" does not state the cleanup behavior on validation failure.\",\n \""}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:14.701604Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"suggested_fix\": \"State explicitly: 'Greenfield posture — invalid at"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:15.080628Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"oms reject session spawn / lineage load"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:15.459924Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" with a typed error; no silent normalization. Local"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:15.916925Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" databases pre-dating this TechSpec require"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:16.239086Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" a fresh `AGH_HOME` per L-006/"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:16.693869Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"L-008 allowed exception.'\"\n },\n {\n \"id\": \"N-009\",\n \"section\": \"API"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:16.985599Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" Endpoints / Hosted MCP\",\n \"issue\": \"Workspace resol"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:17.49896Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ution path for the hosted MCP `tools/list` is impl"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:17.757123Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"icit. The CallRequest carries `work"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:18.14723Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"space_id`, but the MCP proxy only knows `session_id`.\",\n \"suggested_fix\": \"State that the proxy never"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:18.530321Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" accepts client-supplied workspace_id; the daemon derives workspace_id from session_id at"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:18.887802Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" projection time and at dispatch time.\"\n },\n {\n \"id\": \"N-010\",\n \"section\": \""}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:19.275856Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ADR-004 / Implementation Steps step 6\",\n \"issue\": \"Ris"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:19.650308Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"k / read-only / destructive / open-world classification for each M"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:20.031332Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"VP tool (`agh__network_send`, `agh__task_*`, `agh__skill_view`) is left"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:20.415646Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" to implementation.\",\n \"suggested_fix\": \"Add a small classification table per"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:20.798598Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" MVP tool to remove ambiguity at implementation time and to"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:21.263428Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" feed the policy matrix tests.\"\n }\n ],"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:21.55678Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"\n \"readiness\": \"NEEDS_REWOR"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:21.937135Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"K\",\n \"summary\": \"The TechSpec is structurally strong — it carries"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:22.317314Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":" all six L-012 quality markers, names del"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:22.697725Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ete targets, integrates with ADR-005 ce"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:23.230846Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"iling semantics, forbids in-process third-party handlers, and cent"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:23.461335Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ralizes dispatch. It is blocked on four concrete gaps that break the sp"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:24.31175Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ec's own contracts: hosted-MCP proxy authentication is undefined (ADR-002 explicit"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:24.317205Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ly required it), the bounded `agh__task_*` set is not enumerated ("}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:24.651561Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ADR-004 explicitly required it and L-005 requires the authoritative-prim"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:25.031009Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"itive boundary), the approval flow over hosted MCP has no specified b"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:25.518891Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ridge to ACP `session/request_permission`, and the MVP boundary for ext"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:25.793508Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"ension tool execution (mcp / extension_host / subprocess) is internally contradictory. Res"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:26.08376Z","update":{"kind":"agent_message_chunk","blocks":[{"type":"text","text":"olve those four and the spec is ready.\"\n}\n```"}],"usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:26.155066Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:26.252752Z","update":{"kind":"user_message_chunk","usage":{},"status":"running"},"usage":{}} +{"type":"session.update","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:26.269302Z","update":{"usage":{},"status":"completed"},"usage":{}} +{"type":"run.succeeded","run_id":"exec-20260428-180557-891196000","turn":1,"time":"2026-04-28T18:10:26.279953Z","status":"succeeded","usage":{},"output":"```json\n{\n \"blockers\": [\n {\n \"id\": \"B-001\",\n \"section\": \"Integration Points → Hosted MCP / Implementation Steps step 8 / Safety Invariants\",\n \"issue\": \"The hosted MCP exposure path is described as `agent session -\u003e ACP mcpServers -\u003e agh tool mcp --session \u003cid\u003e -\u003e UDS -\u003e daemon Tool Registry`, but the TechSpec never specifies how the daemon authenticates that the spawned `agh tool mcp --session \u003cid\u003e` process is actually authorized to act on behalf of that session. There is no proof-of-session token, no per-session UDS handshake, no spawn-time secret bound to the ACP `mcpServers` config, and no rejection rule for a foreign process attaching to another session's projection. ADR-002 explicitly requires the TechSpec to 'define how the hosted MCP server is authorized, scoped to the session/workspace, and prevented from bypassing registry dispatch'; this is unfulfilled.\",\n \"rationale\": \"Without a specified auth model, any local process (a malicious extension sidecar, a co-tenant agent, an unrelated CLI invocation) that can reach the UDS socket can call `agh tool mcp --session \u003cother_session\u003e` and obtain the full session-callable tool projection — including any approve-all-permitted mutating tools, `agh__network_send`, and bounded `agh__task_*` operations. This is a session-impersonation security regression and a direct violation of the security-invariant posture in `internal/CLAUDE.md` (claim_token redaction is paired with session/workspace scoping precisely because UDS reachability is not an authorization). It is also exactly what ADR-002 stipulated must be in the TechSpec.\",\n \"suggested_fix\": \"Add a 'Hosted MCP Authentication' subsection that specifies: (a) the daemon mints a single-use, hashed session-bound proxy token at session creation, never logs the raw value, and persists only its hash; (b) the ACP `mcpServers` env / arg passes the raw token to the spawned `agh tool mcp` process via inherited stdio handshake, never via argv; (c) the proxy presents the token over UDS in an initial bind RPC and the daemon ties the resulting projection to that single session_id; (d) the token is invalidated on session end / disconnect / TTL; (e) raw tokens are subject to the same redaction rules as `claim_token`. List this in Safety Invariants as an additional numbered item.\"\n },\n {\n \"id\": \"B-002\",\n \"section\": \"Implementation Design → Implementation Steps step 6 / Integration Points → Network And Tasks / ADR-004 alignment\",\n \"issue\": \"The MVP scope still leaves `agh__task_*` as an open wildcard. The TechSpec lists only `agh__task_list` and `agh__task_read` in the canonical examples and the build order says 'bounded `agh__task_*`' without enumerating the set. ADR-004 explicitly requires the TechSpec to enumerate the bounded operations (candidates: list/get/create/update/claim/release/complete) and to map each one to an existing or planned task primitive. That enumeration is missing.\",\n \"rationale\": \"L-003 / L-004 / L-005 are load-bearing: `task_runs` is the single durable queue, manual==per must converge on the same primitives, and `task.Service.ClaimNextRun` is the only authoritative claimer. If the TechSpec ships without naming the exact tools and their authority routing, an implementer can plausibly introduce `agh__task_claim` or `agh__task_release` that wraps task state mutations outside `ClaimNextRun`, or expose `agh__task_create` as auto-publishing — both of which collapse the autonomy invariants Pedro shipped on 2026-04-26. This also violates `cy-tasks-tail-qa-pair` / Web-Docs-Impact discipline because the QA pair has no concrete tool list to test.\",\n \"suggested_fix\": \"Add a 'Bootstrap Task Tools' subsection that names every `agh__task_*` tool in the MVP, with `read_only`/`destructive`/`open_world` classification and the exact `internal/task` method each one calls. Explicitly state which task tools are excluded from MVP (e.g., `agh__task_claim` and `agh__task_complete` are excluded because claim/lease state changes belong to `ClaimNextRun` and to the task lifecycle authority, not to a generic agent-callable tool). Bind this subsection to a Safety Invariant entry: 'No `agh__task_*` tool may bypass `task.Service.ClaimNextRun`, `Spawn`, or session manager terminal-state authority.'\"\n },\n {\n \"id\": \"B-003\",\n \"section\": \"Integration Points → Hosted MCP / API Endpoints (`approval_token`) / ADR-005 integration\",\n \"issue\": \"The TechSpec exposes session-callable tools through the hosted MCP server, but never specifies how a tool whose `EffectiveToolDecision.approval_required == true` is actually invoked over MCP. MCP has no per-call approval prompt, the model has no path to surface `approval_token` over `tools/call`, and the spec does not say whether such tools are (a) filtered out of `SessionToolView` entirely, (b) auto-routed through ACP `session/request_permission`, or (c) silently fail. The CLI/HTTP `invoke` request has an `approval_token` field; the hosted MCP path has no equivalent.\",\n \"rationale\": \"ADR-005 is explicit that approval-required tools must integrate with the ACP `permissions.mode` ceiling and the `session/request_permission` callback path. Without a specified mechanism, the hosted MCP path becomes a silent approval-bypass surface (if approval is auto-skipped) or a dead surface (if tools are listed but uncallable). Either outcome violates 'truthful UI \u003e plausible UI' (CLAUDE.md Design System) at the agent surface and creates exactly the contradictory state ADR-005 was written to prevent. The 'partial-surface completion' rule in `internal/CLAUDE.md` is tripped here too — CLI/HTTP gets `approval_token`, MCP doesn't.\",\n \"suggested_fix\": \"Add a 'Hosted MCP Approval Bridge' subsection that specifies: (a) `SessionToolView` for hosted MCP filters out tools that require approval AND have no live approval channel for this session (default fail-closed); (b) when ACP `session/request_permission` is available, the daemon issues that request from inside `Registry.Call` and blocks the MCP `tools/call` response until approved/denied/timed-out; (c) when no approval channel is available (deny-all + no live operator), the MCP call returns `ErrToolAprovalRequired` with `reason_codes` including `approval_unreachable`. Add a Safety Invariant: 'A hosted MCP `tools/call` cannot succeed past `approval_required` without a daemon-mediated approval that satisfies the same ACP/session ceiling as a CLI/HTTP `approval_token`.'\"\n },\n {\n \"id\": \"B-004\",\n \"section\": \"MVP Boundary Statement / Implementation Steps step 7-8 / Test Strategy → Integration Tests\",\n \"issue\": \"The MVP boundary is internally contradictory about extension tool execution. The MVP statement says 'extension descriptor projection' (descriptors only). Step 7 builds 'extension manifest backend metadata and descriptor projection.' Step 8 says 'MCP adapter contracts and hosted MCP stdio proxy command' — 'contracts' is ambiguous as to whether the MCP-backed extension dispatch path actually executes. Yet the integration test plan aserts 'MCP-backed extension tool remains unavailable until backend health/auth passes,' which only makes sense if dispatch is functional. The post-MVP list defers only `bridge`; `mcp`, `extension_host`, and `subprocess` are silent. This produces the exact partial-surface ambiguity that L-007 and the no-partial-surface rule in `internal/CLAUDE.md` forbid.\",\n \"rationale\": \"Either MVP ships extension tool dispatch (mcp / extension_host / subprocess) or it doesn't — the spec must commit. If it does ship, step 8's deliverable list is incomplete (no extension_host adapter, no subprocess adapter, no MCP call-through specified). If it doesn't, the integration test about MCP backend health is wrong and the descriptor-only state must be explicit. Without resolution, implementers will pick whichever interpretation is convenient mid-stream and the surface will ship half-built, exactly the partial-surface failure mode that breaks `make verify`-passing-but-not-actually-working features (L-007).\",\n \"suggested_fix\": \"Replace the ambiguous wording with a concrete table in 'MVP Boundary Statement': for each backend kind (`builtin`, `mcp`, `extension_host`, `subprocess`, `bridge`), state explicitly whether MVP delivers (1) descriptor + availability state only or (2) full dispatch through `Registry.Call`. Update the test strategy to match. If only some kinds dispatch in MVP, name the others as 'descriptor-only, returns `ErrToolUnavailable` with reason `backend_not_implemented` if invoked' so the surface stays truthful and the post-MVP work list is explicit.\"\n }\n ],\n \"nits\": [\n {\n \"id\": \"N-001\",\n \"section\": \"Data Models → Toolset\",\n \"issue\": \"`ToolsetID` grammar is never specified; only example values like `agh__bootstrap` and `linear__read` appear.\",\n \"suggested_fix\": \"State that `ToolsetID` shares the canonical `ToolID` grammar (lowercase ASCII segments separated by `__`, max 64 chars) and is validated by the same parser.\"\n },\n {\n \"id\": \"N-002\",\n \"section\": \"Config Lifecycle → Agent Definitions\",\n \"issue\": \"Wildcard / pattern grammar for `agent.tools`, `agent.deny_tools`, and registry allow/deny is referenced but never formally defined.\",\n \"suggested_fix\": \"Add a 'Tool Pattern Grammar' subsection: allowed forms (`agh__skill_*`, `mcp__github__*`, exact IDs), disallowed forms (regex, `*__suffix`, mid-segment wildcards), and matching semantics against the canonical `ToolID`.\"\n },\n {\n \"id\": \"N-003\",\n \"section\": \"API Endpoints → Invoke request\",\n \"issue\": \"`approval_token` is introduced without specifying its format, lifetime, source, or relationship to claim_token redaction rules.\",\n \"suggested_fix\": \"Define `approval_token` as an opaque local-approval reference issued by the daemon's approval surface, hashed at rest, redacted in logs/SSE/events, and unrelated to `claim_token` (separate code path).\"\n },\n {\n \"id\": \"N-004\",\n \"section\": \"Data Models → SourceRef\",\n \"issue\": \"`dynamic` source kind is enumerated but has no MVP producer (synthesis explicitly notes the existing enum has no producer).\",\n \"suggested_fix\": \"Either remove `dynamic` from the MVP enum (greenfield-delete) or note explicitly that it is reserved with no MVP producer and has no validation surface yet.\"\n },\n {\n \"id\": \"N-005\",\n \"section\": \"Integration Points → Hosted MCP\",\n \"issue\": \"Lifecycle of the hosted MCP proxy process (per-session vs long-lived, stdio ownership, restart on session resume) is undefined.\",\n \"suggested_fix\": \"Add a one-paragraph lifecycle: stdio proxy is spawned by the ACP runtime per `mcpServers` entry, scoped to one session, exits when stdio closes, reconnects to the same session on `session/load`.\"\n },\n {\n \"id\": \"N-006\",\n \"section\": \"Integration Points → Skills\",\n \"issue\": \"`agh__skill_view` returns full skill content but interaction with `default_max_result_bytes` (262144) is not specified — large skill bodies will silently truncate.\",\n \"suggested_fix\": \"State that `agh__skill_view` truncates per registry result budget and returns `truncated=true` plus a typed `next_offset`/artifact reference, or raises its descriptor's `max_result_bytes` to the documented skill body cap.\"\n },\n {\n \"id\": \"N-007\",\n \"section\": \"Integration Points → Hooks / Implementation Steps step 11 / Delete Targets\",\n \"issue\": \"Hook payload identity migration (`tool_name`+`tool_namespace` → `tool_id`) is a contract change; co-shipping the hook tests / extension-author docs / fixture matchers in the same PR is implied but not enforced (L-007 risk).\",\n \"suggested_fix\": \"Add an explicit bullet under step 11: 'Update typed hook payloads, hook matchers, fixture builders, extension-author docs, and codegen in the same change; no dual identity allowed mid-PR.'\"\n },\n {\n \"id\": \"N-008\",\n \"section\": \"Config Lifecycle → Agent Definitions\",\n \"issue\": \"Existing `SessionPermissionPolicy.Tools` rows from the autonomy MVP (shipped 2026-04-26) may not match the new canonical `ToolID` grammar; the spec says implementation should 'validate' these atoms but does not state the cleanup behavior on validation failure.\",\n \"suggested_fix\": \"State explicitly: 'Greenfield posture — invalid atoms reject session spawn / lineage load with a typed error; no silent normalization. Local databases pre-dating this TechSpec require a fresh `AGH_HOME` per L-006/L-008 allowed exception.'\"\n },\n {\n \"id\": \"N-009\",\n \"section\": \"API Endpoints / Hosted MCP\",\n \"issue\": \"Workspace resolution path for the hosted MCP `tools/list` is implicit. The CallRequest carries `workspace_id`, but the MCP proxy only knows `session_id`.\",\n \"suggested_fix\": \"State that the proxy never accepts client-supplied workspace_id; the daemon derives workspace_id from session_id at projection time and at dispatch time.\"\n },\n {\n \"id\": \"N-010\",\n \"section\": \"ADR-004 / Implementation Steps step 6\",\n \"issue\": \"Risk / read-only / destructive / open-world classification for each MVP tool (`agh__network_send`, `agh__task_*`, `agh__skill_view`) is left to implementation.\",\n \"suggested_fix\": \"Add a small classification table per MVP tool to remove ambiguity at implementation time and to feed the policy matrix tests.\"\n }\n ],\n \"readiness\": \"NEEDS_REWORK\",\n \"summary\": \"The TechSpec is structurally strong — it carries all six L-012 quality markers, names delete targets, integrates with ADR-005 ceiling semantics, forbids in-process third-party handlers, and centralizes dispatch. It is blocked on four concrete gaps that break the spec's own contracts: hosted-MCP proxy authentication is undefined (ADR-002 explicitly required it), the bounded `agh__task_*` set is not enumerated (ADR-004 explicitly required it and L-005 requires the authoritative-primitive boundary), the approval flow over hosted MCP has no specified bridge to ACP `session/request_permission`, and the MVP boundary for extension tool execution (mcp / extension_host / subprocess) is internally contradictory. Resolve those four and the spec is ready.\"\n}\n```"} diff --git a/.compozy/tasks/tools-registry/qa/peer-review-summary-round2.md b/.compozy/tasks/tools-registry/qa/peer-review-summary-round2.md new file mode 100644 index 000000000..e47f1af10 --- /dev/null +++ b/.compozy/tasks/tools-registry/qa/peer-review-summary-round2.md @@ -0,0 +1,34 @@ +# Peer Review Round 2 Summary + +## Command + +```bash +compozy exec --ide claude --model opus --reasoning-effort xhigh --format json --prompt-file .compozy/tasks/tools-registry/qa/peer-review-prompt-round2.md +``` + +## Raw Output + +- `.compozy/tasks/tools-registry/qa/peer-review-result-round2.json` +- `.compozy/tasks/tools-registry/qa/peer-review-result-round2.err` + +## Verdict + +`NEEDS_REWORK` + +The reviewer agreed the revised direction is sound, but identified five contract-level blockers before task generation: extension wire payload structs, schema digest canonicalization, MCP bearer injection boundary, hosted MCP bind-token threat model, and approval bridge timeout/cancellation. + +## Disposition + +- `B-001` Extension wire contracts: resolved in `_techspec.md` Core Interfaces with protocol constants, capability-method mapping, and `provide_tools` / `tools/call` request-response structs. +- `B-002` Schema digest canonicalization: resolved in `_techspec.md` Data Models and ADR-008 with RFC 8785 JCS canonicalization, lowercase SHA-256 digests, and shared SDK/daemon fixtures. +- `B-003` Remote MCP bearer injection: resolved in `_techspec.md` Core Interfaces / Existing MCP Config And Auth and ADR-010 with `MCPCallExecutor` owned by `internal/mcp`. +- `B-004` Hosted MCP bind-token contradiction: resolved in `_techspec.md` Hosted MCP authentication by replacing bearer bind tokens with non-secret bind nonces plus UDS peer credential and AGH binary validation. +- `B-005` Approval bridge wait: resolved in `_techspec.md` Hosted MCP approval bridge, Config Lifecycle, Test Strategy, and ADR-005 with `approval_timeout_seconds`, `approval_timed_out`, `approval_canceled`, and proxy-disconnect cancellation. +- `N-001` Approval timeout / bind nonce TTL defaults: resolved in Config Lifecycle and Safety Invariants. +- `N-002` Go SDK path: resolved in ADR-009 by committing to `sdk/go`. +- `N-003` acpmock and Playwright fixture updates: resolved in Test Strategy. +- `N-004` coverage/race discipline: resolved in Test Strategy. +- `N-005` long sanitized external IDs: resolved in ToolID and reason codes with `id_too_long`. +- `N-006` hook payload delete targets: resolved in Delete Targets. +- `N-007` external read-only trust: resolved in Config Lifecycle with `trusted_sources`. +- `N-008` child task lineage authority: resolved in Network And Tasks and Integration Tests. diff --git a/.compozy/tasks/tools-registry/qa/peer-review-summary.md b/.compozy/tasks/tools-registry/qa/peer-review-summary.md new file mode 100644 index 000000000..f46a3bd2d --- /dev/null +++ b/.compozy/tasks/tools-registry/qa/peer-review-summary.md @@ -0,0 +1,26 @@ +# Tool Registry TechSpec Peer Review Summary + +## Round 1 + +- Reviewer: Claude Opus via `compozy exec --ide claude --model opus --reasoning-effort xhigh --format json` +- Prompt: `qa/peer-review-prompt.md` +- Raw stream: `qa/peer-review-result.json` +- Extracted verdict: `qa/peer-review-verdict.json` +- Readiness: `NEEDS_REWORK` + +Summary: The TechSpec carries the required quality markers and the core architecture is strong, but Opus found four blockers that needed resolution before approval. + +## Blockers + +- `B-001` Hosted MCP authentication was undefined, so a local process could impersonate another session projection. Resolved by adding session-bound proxy token binding, UDS bind handshake, redaction, invalidation, and safety invariants. +- `B-002` The bounded `agh__task_*` set was still an open wildcard, risking bypass of task authoritative primitives. Resolved by enumerating MVP task tools, excluding claim/release/complete/fail/run-start tools, and mapping included tools to `task.Service` methods. +- `B-003` Hosted MCP approval flow had no bridge to ACP `session/request_permission` and could become either a bypass or dead surface. Resolved by adding a Hosted MCP Approval Bridge with fail-closed behavior. +- `B-004` Extension backend execution scope was internally contradictory. Resolved by making external `mcp`, `extension_host`, and `subprocess` backend tools descriptor-only in MVP and post-MVP for call-through. + +## Nits + +All ten round-1 nits were addressed inline in `_techspec.md` and recorded in the TechSpec `## Nits` section. + +## Follow-Up + +The blockers were resolved after the round-1 verdict. A second-round confirmation was not run because the skill requires an explicit user request for additional rounds. diff --git a/.compozy/tasks/tools-registry/qa/peer-review-verdict.json b/.compozy/tasks/tools-registry/qa/peer-review-verdict.json new file mode 100644 index 000000000..d6618e536 --- /dev/null +++ b/.compozy/tasks/tools-registry/qa/peer-review-verdict.json @@ -0,0 +1,98 @@ + +{ + "blockers": [ + { + "id": "B-001", + "section": "Integration Points → Hosted MCP / Implementation Steps step 8 / Safety Invariants", + "issue": "The hosted MCP exposure path is described as `agent session -> ACP mcpServers -> agh tool mcp --session -> UDS -> daemon Tool Registry`, but the TechSpec never specifies how the daemon authenticates that the spawned `agh tool mcp --session ` process is actually authorized to act on behalf of that session. There is no proof-of-session token, no per-session UDS handshake, no spawn-time secret bound to the ACP `mcpServers` config, and no rejection rule for a foreign process attaching to another session's projection. ADR-002 explicitly requires the TechSpec to 'define how the hosted MCP server is authorized, scoped to the session/workspace, and prevented from bypassing registry dispatch'; this is unfulfilled.", + "rationale": "Without a specified auth model, any local process (a malicious extension sidecar, a co-tenant agent, an unrelated CLI invocation) that can reach the UDS socket can call `agh tool mcp --session ` and obtain the full session-callable tool projection — including any approve-all-permitted mutating tools, `agh__network_send`, and bounded `agh__task_*` operations. This is a session-impersonation security regression and a direct violation of the security-invariant posture in `internal/CLAUDE.md` (claim_token redaction is paired with session/workspace scoping precisely because UDS reachability is not an authorization). It is also exactly what ADR-002 stipulated must be in the TechSpec.", + "suggested_fix": "Add a 'Hosted MCP Authentication' subsection that specifies: (a) the daemon mints a single-use, hashed session-bound proxy token at session creation, never logs the raw value, and persists only its hash; (b) the ACP `mcpServers` env / arg passes the raw token to the spawned `agh tool mcp` process via inherited stdio handshake, never via argv; (c) the proxy presents the token over UDS in an initial bind RPC and the daemon ties the resulting projection to that single session_id; (d) the token is invalidated on session end / disconnect / TTL; (e) raw tokens are subject to the same redaction rules as `claim_token`. List this in Safety Invariants as an additional numbered item." + }, + { + "id": "B-002", + "section": "Implementation Design → Implementation Steps step 6 / Integration Points → Network And Tasks / ADR-004 alignment", + "issue": "The MVP scope still leaves `agh__task_*` as an open wildcard. The TechSpec lists only `agh__task_list` and `agh__task_read` in the canonical examples and the build order says 'bounded `agh__task_*`' without enumerating the set. ADR-004 explicitly requires the TechSpec to enumerate the bounded operations (candidates: list/get/create/update/claim/release/complete) and to map each one to an existing or planned task primitive. That enumeration is missing.", + "rationale": "L-003 / L-004 / L-005 are load-bearing: `task_runs` is the single durable queue, manual==per must converge on the same primitives, and `task.Service.ClaimNextRun` is the only authoritative claimer. If the TechSpec ships without naming the exact tools and their authority routing, an implementer can plausibly introduce `agh__task_claim` or `agh__task_release` that wraps task state mutations outside `ClaimNextRun`, or expose `agh__task_create` as auto-publishing — both of which collapse the autonomy invariants Pedro shipped on 2026-04-26. This also violates `cy-tasks-tail-qa-pair` / Web-Docs-Impact discipline because the QA pair has no concrete tool list to test.", + "suggested_fix": "Add a 'Bootstrap Task Tools' subsection that names every `agh__task_*` tool in the MVP, with `read_only`/`destructive`/`open_world` classification and the exact `internal/task` method each one calls. Explicitly state which task tools are excluded from MVP (e.g., `agh__task_claim` and `agh__task_complete` are excluded because claim/lease state changes belong to `ClaimNextRun` and to the task lifecycle authority, not to a generic agent-callable tool). Bind this subsection to a Safety Invariant entry: 'No `agh__task_*` tool may bypass `task.Service.ClaimNextRun`, `Spawn`, or session manager terminal-state authority.'" + }, + { + "id": "B-003", + "section": "Integration Points → Hosted MCP / API Endpoints (`approval_token`) / ADR-005 integration", + "issue": "The TechSpec exposes session-callable tools through the hosted MCP server, but never specifies how a tool whose `EffectiveToolDecision.approval_required == true` is actually invoked over MCP. MCP has no per-call approval prompt, the model has no path to surface `approval_token` over `tools/call`, and the spec does not say whether such tools are (a) filtered out of `SessionToolView` entirely, (b) auto-routed through ACP `session/request_permission`, or (c) silently fail. The CLI/HTTP `invoke` request has an `approval_token` field; the hosted MCP path has no equivalent.", + "rationale": "ADR-005 is explicit that approval-required tools must integrate with the ACP `permissions.mode` ceiling and the `session/request_permission` callback path. Without a specified mechanism, the hosted MCP path becomes a silent approval-bypass surface (if approval is auto-skipped) or a dead surface (if tools are listed but uncallable). Either outcome violates 'truthful UI > plausible UI' (CLAUDE.md Design System) at the agent surface and creates exactly the contradictory state ADR-005 was written to prevent. The 'partial-surface completion' rule in `internal/CLAUDE.md` is tripped here too — CLI/HTTP gets `approval_token`, MCP doesn't.", + "suggested_fix": "Add a 'Hosted MCP Approval Bridge' subsection that specifies: (a) `SessionToolView` for hosted MCP filters out tools that require approval AND have no live approval channel for this session (default fail-closed); (b) when ACP `session/request_permission` is available, the daemon issues that request from inside `Registry.Call` and blocks the MCP `tools/call` response until approved/denied/timed-out; (c) when no approval channel is available (deny-all + no live operator), the MCP call returns `ErrToolAprovalRequired` with `reason_codes` including `approval_unreachable`. Add a Safety Invariant: 'A hosted MCP `tools/call` cannot succeed past `approval_required` without a daemon-mediated approval that satisfies the same ACP/session ceiling as a CLI/HTTP `approval_token`.'" + }, + { + "id": "B-004", + "section": "MVP Boundary Statement / Implementation Steps step 7-8 / Test Strategy → Integration Tests", + "issue": "The MVP boundary is internally contradictory about extension tool execution. The MVP statement says 'extension descriptor projection' (descriptors only). Step 7 builds 'extension manifest backend metadata and descriptor projection.' Step 8 says 'MCP adapter contracts and hosted MCP stdio proxy command' — 'contracts' is ambiguous as to whether the MCP-backed extension dispatch path actually executes. Yet the integration test plan aserts 'MCP-backed extension tool remains unavailable until backend health/auth passes,' which only makes sense if dispatch is functional. The post-MVP list defers only `bridge`; `mcp`, `extension_host`, and `subprocess` are silent. This produces the exact partial-surface ambiguity that L-007 and the no-partial-surface rule in `internal/CLAUDE.md` forbid.", + "rationale": "Either MVP ships extension tool dispatch (mcp / extension_host / subprocess) or it doesn't — the spec must commit. If it does ship, step 8's deliverable list is incomplete (no extension_host adapter, no subprocess adapter, no MCP call-through specified). If it doesn't, the integration test about MCP backend health is wrong and the descriptor-only state must be explicit. Without resolution, implementers will pick whichever interpretation is convenient mid-stream and the surface will ship half-built, exactly the partial-surface failure mode that breaks `make verify`-passing-but-not-actually-working features (L-007).", + "suggested_fix": "Replace the ambiguous wording with a concrete table in 'MVP Boundary Statement': for each backend kind (`builtin`, `mcp`, `extension_host`, `subprocess`, `bridge`), state explicitly whether MVP delivers (1) descriptor + availability state only or (2) full dispatch through `Registry.Call`. Update the test strategy to match. If only some kinds dispatch in MVP, name the others as 'descriptor-only, returns `ErrToolUnavailable` with reason `backend_not_implemented` if invoked' so the surface stays truthful and the post-MVP work list is explicit." + } + ], + "nits": [ + { + "id": "N-001", + "section": "Data Models → Toolset", + "issue": "`ToolsetID` grammar is never specified; only example values like `agh__bootstrap` and `linear__read` appear.", + "suggested_fix": "State that `ToolsetID` shares the canonical `ToolID` grammar (lowercase ASCII segments separated by `__`, max 64 chars) and is validated by the same parser." + }, + { + "id": "N-002", + "section": "Config Lifecycle → Agent Definitions", + "issue": "Wildcard / pattern grammar for `agent.tools`, `agent.deny_tools`, and registry allow/deny is referenced but never formally defined.", + "suggested_fix": "Add a 'Tool Pattern Grammar' subsection: allowed forms (`agh__skill_*`, `mcp__github__*`, exact IDs), disallowed forms (regex, `*__suffix`, mid-segment wildcards), and matching semantics against the canonical `ToolID`." + }, + { + "id": "N-003", + "section": "API Endpoints → Invoke request", + "issue": "`approval_token` is introduced without specifying its format, lifetime, source, or relationship to claim_token redaction rules.", + "suggested_fix": "Define `approval_token` as an opaque local-approval reference issued by the daemon's approval surface, hashed at rest, redacted in logs/SSE/events, and unrelated to `claim_token` (separate code path)." + }, + { + "id": "N-004", + "section": "Data Models → SourceRef", + "issue": "`dynamic` source kind is enumerated but has no MVP producer (synthesis explicitly notes the existing enum has no producer).", + "suggested_fix": "Either remove `dynamic` from the MVP enum (greenfield-delete) or note explicitly that it is reserved with no MVP producer and has no validation surface yet." + }, + { + "id": "N-005", + "section": "Integration Points → Hosted MCP", + "issue": "Lifecycle of the hosted MCP proxy process (per-session vs long-lived, stdio ownership, restart on session resume) is undefined.", + "suggested_fix": "Add a one-paragraph lifecycle: stdio proxy is spawned by the ACP runtime per `mcpServers` entry, scoped to one session, exits when stdio closes, reconnects to the same session on `session/load`." + }, + { + "id": "N-006", + "section": "Integration Points → Skills", + "issue": "`agh__skill_view` returns full skill content but interaction with `default_max_result_bytes` (262144) is not specified — large skill bodies will silently truncate.", + "suggested_fix": "State that `agh__skill_view` truncates per registry result budget and returns `truncated=true` plus a typed `next_offset`/artifact reference, or raises its descriptor's `max_result_bytes` to the documented skill body cap." + }, + { + "id": "N-007", + "section": "Integration Points → Hooks / Implementation Steps step 11 / Delete Targets", + "issue": "Hook payload identity migration (`tool_name`+`tool_namespace` → `tool_id`) is a contract change; co-shipping the hook tests / extension-author docs / fixture matchers in the same PR is implied but not enforced (L-007 risk).", + "suggested_fix": "Add an explicit bullet under step 11: 'Update typed hook payloads, hook matchers, fixture builders, extension-author docs, and codegen in the same change; no dual identity allowed mid-PR.'" + }, + { + "id": "N-008", + "section": "Config Lifecycle → Agent Definitions", + "issue": "Existing `SessionPermissionPolicy.Tools` rows from the autonomy MVP (shipped 2026-04-26) may not match the new canonical `ToolID` grammar; the spec says implementation should 'validate' these atoms but does not state the cleanup behavior on validation failure.", + "suggested_fix": "State explicitly: 'Greenfield posture — invalid atoms reject session spawn / lineage load with a typed error; no silent normalization. Local databases pre-dating this TechSpec require a fresh `AGH_HOME` per L-006/L-008 allowed exception.'" + }, + { + "id": "N-009", + "section": "API Endpoints / Hosted MCP", + "issue": "Workspace resolution path for the hosted MCP `tools/list` is implicit. The CallRequest carries `workspace_id`, but the MCP proxy only knows `session_id`.", + "suggested_fix": "State that the proxy never accepts client-supplied workspace_id; the daemon derives workspace_id from session_id at projection time and at dispatch time." + }, + { + "id": "N-010", + "section": "ADR-004 / Implementation Steps step 6", + "issue": "Risk / read-only / destructive / open-world classification for each MVP tool (`agh__network_send`, `agh__task_*`, `agh__skill_view`) is left to implementation.", + "suggested_fix": "Add a small classification table per MVP tool to remove ambiguity at implementation time and to feed the policy matrix tests." + } + ], + "readiness": "NEEDS_REWORK", + "summary": "The TechSpec is structurally strong — it carries all six L-012 quality markers, names delete targets, integrates with ADR-005 ceiling semantics, forbids in-process third-party handlers, and centralizes dispatch. It is blocked on four concrete gaps that break the spec's own contracts: hosted-MCP proxy authentication is undefined (ADR-002 explicitly required it), the bounded `agh__task_*` set is not enumerated (ADR-004 explicitly required it and L-005 requires the authoritative-primitive boundary), the approval flow over hosted MCP has no specified bridge to ACP `session/request_permission`, and the MVP boundary for extension tool execution (mcp / extension_host / subprocess) is internally contradictory. Resolve those four and the spec is ready." +} + diff --git a/.compozy/tasks/tools-registry/task_01.md b/.compozy/tasks/tools-registry/task_01.md new file mode 100644 index 000000000..6d1848dad --- /dev/null +++ b/.compozy/tasks/tools-registry/task_01.md @@ -0,0 +1,92 @@ +--- +status: pending +title: Core Tool Contracts and Canonical ToolID +type: backend +complexity: high +dependencies: [] +--- + +# Task 01: Core Tool Contracts and Canonical ToolID + +## Overview + +Establish the canonical runtime contracts for the Tool Registry before any executable backend is added. This task replaces the current cold metadata model with final-shape identifiers, descriptors, backend references, source references, result envelopes, availability states, and deterministic error reasons. + + +- ALWAYS READ `_techspec.md` and every ADR in `adrs/` before starting; this feature has no `_prd.md` +- REFERENCE the TechSpec for interfaces and invariants instead of copying them into task-local designs +- PRESERVE the greenfield hard cut: no dotted IDs, no aliases, no legacy descriptor-only execution model +- TESTS REQUIRED: every validator and error reason introduced here must have focused coverage + + + +1. MUST define canonical `ToolID` validation using lower-snake segments separated by reserved `__`. +2. MUST enforce the TechSpec maximum length and fail with `id_too_long` instead of truncating or hash-suffixing. +3. MUST model descriptors, backend refs, source refs, availability, results, errors, risk flags, and provider/handle interfaces in `internal/tools`. +4. MUST preserve JSON-object input schema validation and result-size metadata without leaking backend secrets. +5. MUST keep `internal/tools` independent from daemon, API, CLI, extension, MCP, session, task, skill, and network packages. +6. MUST include compile-time interface assertions for exported providers/handles that cross package boundaries. + + +## Subtasks +- [ ] 1.1 Replace the current `Tool{Name,...}` shape with final registry contracts in `internal/tools` +- [ ] 1.2 Add `ToolID`, namespace, descriptor, backend, source, availability, result, and error validators +- [ ] 1.3 Add provider and handle interfaces for `native_go`, `extension_host`, and `mcp` backends +- [ ] 1.4 Preserve tool-resource schema validation while separating cold resources from executable descriptors +- [ ] 1.5 Add deterministic reason constants used by policy, availability, dispatch, and public surfaces +- [ ] 1.6 Add package-boundary-safe tests and compile-time assertions + +## Implementation Details + +Use the TechSpec sections "Core Interfaces", "Data Models", "Architectural Boundaries", and "Safety Invariants" as the source of truth. The existing `internal/tools` package is intentionally small and should become the stable dependency that other packages adapt to, not a package that imports those domains. + +### Relevant Files +- `internal/tools/tool.go` - current cold metadata model to replace or reshape +- `internal/tools/resource.go` - existing tool resource schema validation to preserve +- `internal/tools/*_test.go` - new unit tests for IDs, descriptors, schemas, and errors +- `magefile.go` - package boundary checks if new internal packages or allowed imports change + +### Dependent Files +- `internal/extension/resource_publication.go` - later tasks consume the cold-resource/executable-descriptor split +- `internal/mcp/auth/types.go` - later tasks map auth status into availability reasons without exposing tokens +- `internal/hooks/payloads.go` - later tasks use canonical `tool_id` +- `internal/api/contract/` - later tasks expose these contracts through HTTP/UDS DTOs + +### Related ADRs +- [ADR-001: Extension Tool Execution Boundary](adrs/adr-001-extension-tool-execution-boundary.md) - defines executable backend classes +- [ADR-003: Runtime Registry Package Boundary](adrs/adr-003-runtime-registry-package-boundary.md) - constrains `internal/tools` dependencies +- [ADR-007: Canonical Tool ID Format](adrs/adr-007-canonical-tool-id-format.md) - defines public ToolID grammar + +### Web/Docs Impact +- `web/`: none - checked surfaces: `web/src/generated/agh-openapi.d.ts`, `web/src/systems/**`; reason: this task creates backend-internal contracts only and does not expose HTTP/UDS DTOs yet. +- `packages/site`: future docs in task_14 must describe canonical ToolID and backend kinds; no site file changes in this task unless implementation adds public docs early. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: establishes the registry contracts consumed by extensions, hooks, MCP adapters, tool resources, and SDKs. +- Agent manageability: none directly - checked CLI/HTTP/UDS surfaces; public operations are introduced in task_11 and task_12. +- Config lifecycle: none directly - checked `internal/config`; config keys are introduced in task_02. + +## Deliverables +- Final `internal/tools` contract types and validators +- Deterministic error and availability reason constants +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration-style package-boundary tests or `make boundaries` evidence **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] Valid IDs such as `agh__skill_view` and `mcp__github__create_issue` are accepted + - [ ] Dotted, hyphenated, uppercase, empty-segment, reserved-conflict, and over-length IDs are rejected with stable reasons + - [ ] Descriptors require object input schemas and valid backend/source refs + - [ ] Result envelopes preserve content metadata while enforcing redaction/truncation fields + - [ ] Provider and handle interfaces reject nil or incomplete implementations where applicable +- Integration tests: + - [ ] `go test ./internal/tools -race` passes + - [ ] `make boundaries` proves `internal/tools` has no forbidden domain imports +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `internal/tools` can represent native, extension-host, and MCP executable tools without importing their implementation packages +- Invalid public ToolIDs fail closed with deterministic reason codes diff --git a/.compozy/tasks/tools-registry/task_02.md b/.compozy/tasks/tools-registry/task_02.md new file mode 100644 index 000000000..3de37b66c --- /dev/null +++ b/.compozy/tasks/tools-registry/task_02.md @@ -0,0 +1,94 @@ +--- +status: pending +title: Tools Config Lifecycle and Agent Grammar +type: backend +complexity: high +dependencies: + - task_01 +--- + +# Task 02: Tools Config Lifecycle and Agent Grammar + +## Overview + +Add the configuration and agent-definition grammar required by the registry before policy and dispatch consume it. This task introduces final `config.toml` keys, defaults, validation, merge behavior, and agent `tools`/`toolsets`/`deny_tools` semantics without compatibility bridges. + + +- ALWAYS READ `_techspec.md`, ADR-005, ADR-006, and ADR-007 before editing config behavior +- DO NOT add deprecated aliases, fallback key names, or old-state migrations +- DO NOT hand-edit `go.mod`; use `go get` only if a dependency is truly required +- TESTS REQUIRED: config defaults, overlays, validation, and examples must move together + + + +1. MUST add `[tools]`, `[tools.policy]`, and `[tools.hosted_mcp]` config sections with TechSpec defaults. +2. MUST add agent `toolsets` and `deny_tools` while validating existing `tools` as canonical ToolID atoms or explicit patterns allowed by the policy grammar. +3. MUST model approval timeout, hosted MCP bind nonce TTL, result byte limits, source defaults, and `trusted_sources`. +4. MUST validate unknown, contradictory, or unsafe config values at load time with deterministic errors. +5. MUST update merge/overlay behavior, examples, config docs references, and config tests in the same change. +6. MUST avoid compatibility shims for any rejected or renamed key. + + +## Subtasks +- [ ] 2.1 Add tools config structs, defaults, and validation +- [ ] 2.2 Extend agent config with `toolsets` and `deny_tools` +- [ ] 2.3 Validate ToolID atoms, source grants, trusted sources, and hosted MCP values +- [ ] 2.4 Update merge, workspace overlay, and example config behavior +- [ ] 2.5 Add config tests for defaults, invalid values, overlays, and agent grammar +- [ ] 2.6 Document downstream docs and generated examples required by task_14 + +## Implementation Details + +Use TechSpec "Config Lifecycle" and "Agent Manageability" sections. Keep this task focused on loading and validating configuration; do not implement registry policy decisions here beyond parse-time validation needed by task_03. + +### Relevant Files +- `internal/config/config.go` - root config struct and defaults +- `internal/config/agent.go` - agent tool grammar +- `internal/config/provider.go` - MCP server config values consumed by later MCP tasks +- `internal/config/*_test.go` - config validation and merge tests +- `packages/site/content/runtime/core/configuration/config-toml.mdx` - docs target for task_14 + +### Dependent Files +- `internal/tools/policy*.go` - task_03 consumes parsed config +- `internal/acp/permission.go` - task_03 maps ACP ceiling into policy decisions +- `internal/session/manager_start.go` - task_10 consumes hosted MCP config +- `packages/site/content/runtime/core/sessions/permissions.mdx` - task_14 documents permission behavior + +### Related ADRs +- [ADR-005: ACP Approval Policy Integration](adrs/adr-005-acp-approval-policy-integration.md) - constrains policy and approval config +- [ADR-006: Tool Visibility by Surface](adrs/adr-006-tool-visibility-by-surface.md) - distinguishes operator and session visibility +- [ADR-007: Canonical Tool ID Format](adrs/adr-007-canonical-tool-id-format.md) - constrains config atoms + +### Web/Docs Impact +- `web/`: generated settings or diagnostics types may change later through task_11/task_13; no direct web code in this task. +- `packages/site`: task_14 must update `packages/site/content/runtime/core/configuration/config-toml.mdx`, `packages/site/content/runtime/core/sessions/permissions.mdx`, and any config examples covering agents or MCP. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: config now gates extension-host and MCP source trust, toolsets, and risk classes used by later executable providers. +- Agent manageability: establishes agent-readable config semantics but no CLI/HTTP/UDS management endpoints yet. +- Config lifecycle: adds `tools.*`, `tools.policy.*`, `tools.hosted_mcp.*`, agent `toolsets`, and agent `deny_tools`; defaults, validation, examples, docs, and tests must ship together. + +## Deliverables +- Config structs/defaults/validation for tool registry policy and hosted MCP +- Agent definition grammar for `tools`, `toolsets`, and `deny_tools` +- Updated config examples and validation tests +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests for config load/merge behavior **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] Empty config loads safe defaults for `[tools]`, policy, hosted MCP, and result limits + - [ ] Invalid ToolID atoms, invalid toolsets, negative timeouts, and unsafe trusted-source entries fail validation + - [ ] `deny_tools` overrides parsed allow atoms without requiring policy evaluation in config code + - [ ] Workspace overlays preserve deterministic precedence for tools config +- Integration tests: + - [ ] A realistic `config.toml` with tools policy and agent toolsets loads through the same path used by the daemon + - [ ] Existing config fixtures still reject unknown keys and malformed MCP server config +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Tool registry config is validated before runtime policy consumes it +- No compatibility aliases or deprecated config paths are introduced diff --git a/.compozy/tasks/tools-registry/task_03.md b/.compozy/tasks/tools-registry/task_03.md new file mode 100644 index 000000000..c47872542 --- /dev/null +++ b/.compozy/tasks/tools-registry/task_03.md @@ -0,0 +1,97 @@ +--- +status: pending +title: Registry Indexing, Toolsets, Policy, and Projections +type: backend +complexity: critical +dependencies: + - task_01 + - task_02 +--- + +# Task 03: Registry Indexing, Toolsets, Policy, and Projections + +## Overview + +Build the executable registry index and effective policy engine used by every backend and surface. This task turns providers into operator-visible and session-callable projections while enforcing canonical IDs, collision rules, toolsets, ACP ceilings, source policy, and child-session lineage. + + +- ALWAYS READ `_techspec.md`, ADR-003, ADR-005, ADR-006, and ADR-007 before changing registry policy +- DO NOT let `approve-all` bypass explicit denies, unavailable backends, source grants, lineage, conflicts, or hooks +- DO NOT hide unavailable or unauthorized tools from operator projections +- TESTS REQUIRED: policy tests must prove deny, source, lineage, and projection behavior independently + + + +1. MUST aggregate providers into a registry index with deterministic ordering and collision detection. +2. MUST fail closed on canonical ID collisions, sanitized external-name collisions, and `id_too_long`. +3. MUST expand toolsets into concrete ToolID atoms with cycle detection and deterministic errors. +4. MUST evaluate ACP permission mode as a ceiling above registry grants. +5. MUST produce separate operator-visible and session-callable projections with reason codes. +6. MUST enforce child-session subset validation using resolved concrete ToolID atoms. + + +## Subtasks +- [ ] 3.1 Implement provider registration, indexing, sorting, and collision detection +- [ ] 3.2 Implement toolset expansion, pattern validation, and cycle detection +- [ ] 3.3 Implement effective policy evaluation from config, agent, session, source, and ACP ceiling +- [ ] 3.4 Implement operator and session projections with availability/authorization reason codes +- [ ] 3.5 Enforce session lineage and child-session subset constraints with concrete ToolID atoms +- [ ] 3.6 Add focused tests for collisions, source grants, deny precedence, ACP ceiling, and projection differences + +## Implementation Details + +Use TechSpec "Integration Points", "Agent Manageability", "Safety Invariants", and ADR-006 for projection rules. Keep this task focused on decisions and projections; actual invocation happens in task_04 and backend providers arrive in later tasks. + +### Relevant Files +- `internal/tools/registry*.go` - new registry index and provider aggregation +- `internal/tools/policy*.go` - effective policy evaluator +- `internal/tools/projection*.go` - operator/session projections +- `internal/store/session_lineage.go` - child-session tool subset validation +- `internal/acp/permission.go` - ACP permission mode ceiling inputs + +### Dependent Files +- `internal/daemon/*` - later composition root wires providers into the registry +- `internal/api/core/handlers.go` - task_11 injects registry interfaces into handlers +- `internal/api/contract/` - task_11 exposes projection DTOs +- `web/src/systems/tools/**` - task_13 consumes projection reason codes + +### Related ADRs +- [ADR-003: Runtime Registry Package Boundary](adrs/adr-003-runtime-registry-package-boundary.md) - defines ownership boundaries +- [ADR-005: ACP Approval Policy Integration](adrs/adr-005-acp-approval-policy-integration.md) - defines approval ceiling and policy layering +- [ADR-006: Tool Visibility by Surface](adrs/adr-006-tool-visibility-by-surface.md) - defines operator vs session visibility +- [ADR-007: Canonical Tool ID Format](adrs/adr-007-canonical-tool-id-format.md) - defines collision-safe identity + +### Web/Docs Impact +- `web/`: task_13 must render projection states and reason codes through `web/src/systems/tools/**` and generated API types. +- `packages/site`: task_14 must document projection semantics, policy precedence, toolsets, `deny_tools`, and child-session lineage. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: centralizes policy for native, extension-host, MCP, hooks, skills, network, and task tools. +- Agent manageability: establishes structured state that later CLI/HTTP/UDS surfaces expose to agents. +- Config lifecycle: consumes `tools.policy`, agent `tools`, agent `toolsets`, agent `deny_tools`, and `trusted_sources` from task_02. + +## Deliverables +- Registry indexing and projection package behavior +- Effective policy evaluator with ACP ceiling and source grants +- Toolset expansion and child-session lineage enforcement +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests for realistic provider aggregation and session projection **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] Duplicate canonical IDs and sanitized MCP-name collisions fail closed + - [ ] Toolset cycles, unknown members, and invalid patterns return deterministic errors + - [ ] Explicit denies override allows, toolsets, trusted sources, and `approve-all` + - [ ] `approve-reads` does not approve untrusted external read-only tools without source or tool grants + - [ ] Child-session permissions cannot exceed parent concrete ToolID atoms after toolset expansion +- Integration tests: + - [ ] Operator projection includes unavailable, unauthorized, and conflicted tools with reason codes + - [ ] Session projection exposes only callable tools for the effective session +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Every backend can rely on one registry policy engine instead of implementing local gates +- Operator and session projections intentionally differ and are both deterministic diff --git a/.compozy/tasks/tools-registry/task_04.md b/.compozy/tasks/tools-registry/task_04.md new file mode 100644 index 000000000..f7a7c34d0 --- /dev/null +++ b/.compozy/tasks/tools-registry/task_04.md @@ -0,0 +1,98 @@ +--- +status: pending +title: Dispatch Pipeline, Hooks, Budgets, and Observability +type: backend +complexity: critical +dependencies: + - task_03 +--- + +# Task 04: Dispatch Pipeline, Hooks, Budgets, and Observability + +## Overview + +Implement the central call path for all registry-backed tools. This task adds dispatch-time validation, policy rechecks, canonical hook payloads, cancellation, result limiting, redaction metadata, telemetry seams, and normalized errors before native, extension, or MCP providers are exposed publicly. + + +- ALWAYS READ `_techspec.md`, ADR-003, ADR-005, ADR-006, and ADR-007 before editing dispatch +- DO NOT let provider handles bypass schema validation, policy recheck, hooks, budgets, or redaction +- HARD CUT registry-owned hook payloads to canonical `tool_id`; do not create dual `tool_name` aliases +- TESTS REQUIRED: hook and error behavior must be deterministic across success, denial, cancellation, and provider failure + + + +1. MUST implement one registry dispatch pipeline for `native_go`, `extension_host`, and `mcp` handles. +2. MUST validate input schema and re-evaluate availability/policy immediately before invocation. +3. MUST execute pre-call, post-call, and error hooks with canonical `tool_id` payloads. +4. MUST enforce result byte budgets, redaction metadata, and stable truncation semantics. +5. MUST propagate `context.Context` cancellation through provider handles and hook execution. +6. MUST emit structured observability events without leaking tokens, approval secrets, bind nonces, or raw tool input marked sensitive. + + +## Subtasks +- [ ] 4.1 Implement `Registry.Call` and call-context inputs +- [ ] 4.2 Add schema validation, availability recheck, policy recheck, and normalized errors +- [ ] 4.3 Hard-cut registry hook payloads and matchers to canonical `tool_id` +- [ ] 4.4 Add result limiting, redaction metadata, and sensitive-field filtering +- [ ] 4.5 Add telemetry/event seams for success, denial, timeout, cancellation, and provider errors +- [ ] 4.6 Add unit and integration tests for call ordering, hooks, budgets, cancellation, and redaction + +## Implementation Details + +Use TechSpec "Core Interfaces", "Integration Points: Hooks", "Test Strategy", and "Safety Invariants". Keep dispatch provider-agnostic; backend-specific adapters are implemented in tasks 05, 07, and 09. + +### Relevant Files +- `internal/tools/dispatch*.go` - central call path and call context +- `internal/tools/result*.go` - result budgets, truncation, and redaction metadata +- `internal/hooks/payloads.go` - registry-owned hook payload changes +- `internal/hooks/types.go` - hook type contracts affected by canonical tool IDs +- `internal/hooks/matcher.go` - matcher behavior for `tool_id` +- `internal/observe/**` - event/log integration seams if required by existing observability patterns + +### Dependent Files +- `internal/tools/builtin_*.go` - task_05 native handles enter through dispatch +- `internal/extension/manager.go` - task_07 extension calls enter through dispatch +- `internal/mcp/**` - task_09 MCP calls enter through dispatch +- `internal/api/contract/` - task_11 exposes normalized call errors and result envelopes + +### Related ADRs +- [ADR-003: Runtime Registry Package Boundary](adrs/adr-003-runtime-registry-package-boundary.md) - dispatch remains owned by `internal/tools` +- [ADR-005: ACP Approval Policy Integration](adrs/adr-005-acp-approval-policy-integration.md) - dispatch revalidates approval and policy gates +- [ADR-006: Tool Visibility by Surface](adrs/adr-006-tool-visibility-by-surface.md) - callable session projection is rechecked at invocation +- [ADR-007: Canonical Tool ID Format](adrs/adr-007-canonical-tool-id-format.md) - hook payloads use canonical IDs + +### Web/Docs Impact +- `web/`: task_13 must update any tool-call displays or generated event consumers that currently assume `tool_name`/`tool_namespace`. +- `packages/site`: task_14 must document hook payload hard cut, result budgets, redaction behavior, and deterministic error classes. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: establishes the only safe executable path for extension, MCP, native, hook, and future bridge tool providers. +- Agent manageability: later CLI/HTTP/UDS invoke surfaces must use this pipeline and return its structured errors. +- Config lifecycle: consumes result budgets and approval timeout policy from task_02; does not add new config keys. + +## Deliverables +- Central provider-agnostic dispatch pipeline +- Canonical `tool_id` hook payloads and matcher support for registry calls +- Result budget and redaction enforcement +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests for hook order, policy recheck, and cancellation **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] Invalid inputs fail before provider invocation + - [ ] Policy and availability are rechecked after projection and before handle call + - [ ] Pre-call hook denial prevents provider invocation and returns the expected reason + - [ ] Post-call and post-error hooks receive canonical `tool_id` and redacted metadata + - [ ] Context cancellation stops hook/provider execution and returns deterministic cancellation errors + - [ ] Oversized results are truncated with metadata and never leak configured sensitive fields +- Integration tests: + - [ ] A fake provider called through the registry observes schema, policy, hook, budget, and telemetry ordering + - [ ] Existing hook tests are migrated off registry-owned `tool_name`/`tool_namespace` assumptions +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Every executable backend must pass through the same dispatch pipeline +- Registry hook identity is canonical `tool_id` with no dual identity path diff --git a/.compozy/tasks/tools-registry/task_05.md b/.compozy/tasks/tools-registry/task_05.md new file mode 100644 index 000000000..fd1ce7e39 --- /dev/null +++ b/.compozy/tasks/tools-registry/task_05.md @@ -0,0 +1,96 @@ +--- +status: pending +title: Native Go Built-In Providers +type: backend +complexity: high +dependencies: + - task_04 +--- + +# Task 05: Native Go Built-In Providers + +## Overview + +Add the first executable `native_go` providers backed by existing AGH services. This task exposes the bounded MVP tool set for registry, skill, network, and task operations while preserving service ownership and avoiding duplicate business logic. + + +- ALWAYS READ `_techspec.md`, ADR-004, ADR-005, and ADR-006 before adding built-in tools +- DO NOT expose task claim/release/complete/fail/run-start or skill install/remove/update in MVP +- DO NOT bypass existing skill, task, network, or registry services with parallel implementations +- TESTS REQUIRED: every built-in must prove policy flags, input validation, and real service wiring + + + +1. MUST add executable `native_go` providers for `agh__tool_list`, `agh__tool_search`, and `agh__tool_info`. +2. MUST add executable `agh__skill_list`, `agh__skill_search`, and `agh__skill_view` using existing skill registry behavior. +3. MUST add executable `agh__network_peers` and `agh__network_send` using existing network manager boundaries. +4. MUST add only the bounded task tools from ADR-004: list, read, create, child create, update, cancel, and run list. +5. MUST mark read-only, mutating, open-world, and destructive risk metadata accurately for each native tool. +6. MUST wire providers through daemon composition and central dispatch only. + + +## Subtasks +- [ ] 5.1 Add registry bootstrap tools for list, search, and info +- [ ] 5.2 Add skill catalog native tools using existing skill registry APIs +- [ ] 5.3 Add network native tools using existing network manager APIs +- [ ] 5.4 Add bounded task native tools using existing task services and child-lineage rules +- [ ] 5.5 Wire native providers in the daemon composition root +- [ ] 5.6 Add tests proving descriptors, risk flags, dispatch wiring, and excluded tools + +## Implementation Details + +Use TechSpec "MVP Boundary Statement", "Integration Points", and "Implementation Steps" plus ADR-004. Keep adapter code thin: native tools should translate registry inputs/outputs to existing services, not become new domain services. + +### Relevant Files +- `internal/tools/builtin_*.go` - native provider adapters and descriptors +- `internal/daemon/**` - composition root wiring for providers +- `internal/skills/registry.go` - skill list/search/view behavior to reuse +- `internal/task/interfaces.go` - task service boundaries to reuse +- `internal/network/manager.go` - network operations to wrap +- `internal/store/session_lineage.go` - child-task lineage constraints + +### Dependent Files +- `internal/api/contract/tools.go` - task_11 exposes native descriptors/results +- `internal/cli/tool*.go` - task_12 invokes native tools +- `web/src/systems/tools/**` - task_13 displays native tool state +- `packages/site/content/runtime/core/tools.mdx` - task_14 documents native MVP tools + +### Related ADRs +- [ADR-004: MVP Native Tool Scope](adrs/adr-004-mvp-native-tool-scope.md) - defines exact native built-in tool set +- [ADR-005: ACP Approval Policy Integration](adrs/adr-005-acp-approval-policy-integration.md) - constrains mutating/destructive calls +- [ADR-006: Tool Visibility by Surface](adrs/adr-006-tool-visibility-by-surface.md) - constrains operator/session projections + +### Web/Docs Impact +- `web/`: task_13 must render native tool descriptors, callable state, and unavailable/denied reasons through the tool diagnostics surface. +- `packages/site`: task_14 must document each native MVP tool, explicitly listing excluded skill/task lifecycle operations. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: creates first-party `native_go` provider precedent for future runtime-owned tools. +- Agent manageability: native tools become manageable through registry API/CLI/UDS surfaces in tasks 11-12. +- Config lifecycle: consumes policy/toolset config from task_02; no new config keys in this task. + +## Deliverables +- Executable native providers for registry, skill, network, and bounded task tools +- Daemon wiring that registers native providers through the registry +- Tests proving included and excluded tool scope +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests using real services where practical **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] Native descriptors use canonical IDs, accurate backend refs, accurate source refs, and correct risk flags + - [ ] Excluded task and skill lifecycle operations are not registered + - [ ] Invalid input schemas for each native tool fail before service calls + - [ ] Mutating native tools require the expected policy and approval conditions +- Integration tests: + - [ ] Skill list/search/view tools return real skill registry results through `Registry.Call` + - [ ] Task child creation enforces parent/child lineage constraints + - [ ] Network send goes through the existing network manager and preserves deterministic errors +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Built-in MVP tools are executable through the registry, not descriptor-only +- Existing domain services remain the source of truth for skills, tasks, and network behavior diff --git a/.compozy/tasks/tools-registry/task_06.md b/.compozy/tasks/tools-registry/task_06.md new file mode 100644 index 000000000..ad036ddae --- /dev/null +++ b/.compozy/tasks/tools-registry/task_06.md @@ -0,0 +1,94 @@ +--- +status: pending +title: Extension Manifest Tool Metadata and Reconciliation +type: backend +complexity: high +dependencies: + - task_03 +--- + +# Task 06: Extension Manifest Tool Metadata and Reconciliation + +## Overview + +Make extension tool descriptors manifest-authoritative while preparing them for executable runtime reconciliation. This task extends extension metadata, cold resource publication, schema digest fixtures, and availability reasons without invoking extension handlers yet. + + +- ALWAYS READ `_techspec.md`, ADR-001, ADR-008, and ADR-009 before changing extension tool metadata +- DO NOT revive descriptor-only executable behavior; descriptors become callable only after runtime reconciliation +- DO NOT let extensions claim reserved `agh__*` IDs or omit handler binding metadata +- TESTS REQUIRED: manifest, digest, mismatch, disabled, and lifecycle behavior must be covered + + + +1. MUST extend `extension.toml` `resources.tools` with canonical ID, backend metadata, handler binding, risk flags, result budgets, schemas, and toolset metadata. +2. MUST keep manifest descriptors authoritative while allowing runtime descriptors to prove they match. +3. MUST add RFC 8785/JCS-compatible schema digest fixtures shared by daemon, TypeScript SDK, and Go SDK tasks. +4. MUST publish cold tool resources without marking them executable until runtime reconciliation succeeds. +5. MUST surface mismatch, missing handler, disabled extension, inactive extension, and reserved namespace reason codes. +6. MUST update extension manifest validation and resource publication tests. + + +## Subtasks +- [ ] 6.1 Extend extension manifest tool metadata and validation +- [ ] 6.2 Split cold resource publication from executable descriptor readiness +- [ ] 6.3 Add schema digest fixture files and daemon digest tests +- [ ] 6.4 Add reconciliation reason codes for manifest/runtime mismatch states +- [ ] 6.5 Reject reserved namespaces and invalid handler bindings +- [ ] 6.6 Add extension lifecycle tests for enabled, disabled, removed, and unhealthy extensions + +## Implementation Details + +Use TechSpec "Extension Runtime Contract", "Data Models", and ADR-008. Keep this task manifest-side only; the subprocess protocol and SDK handlers are task_07 and task_08. + +### Relevant Files +- `internal/extension/manifest.go` - extension tool metadata fields and validation +- `internal/extension/resource_publication.go` - cold resource publication behavior +- `internal/extension/capability*.go` - extension capability metadata if required +- `internal/tools/descriptor*.go` - descriptor/digest contracts consumed by extensions +- `internal/extension/*_test.go` - manifest and resource publication coverage + +### Dependent Files +- `internal/extension/protocol/host_api.go` - task_07 adds runtime tool provider protocol +- `sdk/typescript/src/types.ts` - task_07 mirrors manifest/runtime descriptors +- `sdk/go/**` - task_08 mirrors descriptor and digest rules +- `sdk/create-extension/src/index.ts` - task_07/task_08 add templates + +### Related ADRs +- [ADR-001: Extension Tool Execution Boundary](adrs/adr-001-extension-tool-execution-boundary.md) - separates first-party native tools from out-of-process extension tools +- [ADR-008: Manifest-Authoritative Extension Tool Descriptors](adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md) - defines manifest/runtime reconciliation +- [ADR-009: Public Go Extension Tool SDK](adrs/adr-009-public-go-extension-tool-sdk.md) - constrains Go SDK compatibility with manifest contracts + +### Web/Docs Impact +- `web/`: task_13 must display extension tool availability, disabled/unhealthy states, and mismatch reasons through generated tool diagnostics. +- `packages/site`: task_14 must update extension authoring docs to explain manifest-authoritative descriptors and executable reconciliation. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: changes extension manifests, tool resources, schema digest contracts, and extension registry behavior. +- Agent manageability: exposes readiness/reason data later through CLI/HTTP/UDS in tasks 11-12. +- Config lifecycle: consumes extension enable/disable lifecycle; no new top-level config keys beyond task_02. + +## Deliverables +- Extended extension tool manifest schema and validation +- Cold-resource to executable-descriptor reconciliation metadata +- Shared schema digest fixtures for daemon/SDK parity +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests for extension resource publication lifecycle **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] Valid manifest tool entries produce canonical descriptors and digest metadata + - [ ] Reserved `agh__*`, invalid ToolID, missing handler, invalid risk, and non-object schema entries fail validation + - [ ] Runtime mismatch placeholders surface deterministic availability reasons + - [ ] Disabled and unhealthy extensions remain operator-visible but session-hidden +- Integration tests: + - [ ] Existing extension resource publication still works for non-tool resources + - [ ] Tool resources publish cold metadata without creating callable handles before task_07 runtime reconciliation +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Extension tools are manifest-authoritative but not descriptor-only callable +- Digest fixtures are ready for TypeScript and Go SDK parity tests diff --git a/.compozy/tasks/tools-registry/task_07.md b/.compozy/tasks/tools-registry/task_07.md new file mode 100644 index 000000000..207779137 --- /dev/null +++ b/.compozy/tasks/tools-registry/task_07.md @@ -0,0 +1,100 @@ +--- +status: pending +title: Extension Runtime Protocol and TypeScript SDK Tools +type: backend +complexity: critical +dependencies: + - task_04 + - task_06 +--- + +# Task 07: Extension Runtime Protocol and TypeScript SDK Tools + +## Overview + +Add executable extension-host tool support through the existing subprocess extension model and TypeScript SDK. This task introduces `tool.provider`, runtime descriptor reconciliation, `provide_tools`, `tools/call`, and `extension.tool(...)` so TypeScript extensions define real handlers rather than descriptors only. + + +- ALWAYS READ `_techspec.md`, ADR-001, ADR-008, and the existing extension subprocess code before editing protocol behavior +- DO NOT run third-party extension handlers in-process inside the daemon +- DO NOT add a parallel TypeScript runtime; extend the existing `Extension.handle(...)` and transport patterns +- TESTS REQUIRED: descriptor reconciliation and `tools/call` must be proven through real subprocess integration + + + +1. MUST add `tool.provider`, `provide_tools`, and `tools/call` protocol constants and request/response structs. +2. MUST require initialized extensions that declare `tool.provider` to implement all required service methods. +3. MUST reconcile runtime-provided descriptors with manifest-authoritative descriptors before marking tools executable. +4. MUST route extension tool invocation through the existing subprocess manager and central registry dispatch. +5. MUST add TypeScript SDK `extension.tool(...)` with typed handler registration, descriptor export, digest generation, and typed errors. +6. MUST add TypeScript tool-provider create-extension template and scaffold tests. + + +## Subtasks +- [ ] 7.1 Add extension protocol constants, structs, and initialization method coverage +- [ ] 7.2 Add manager-side `provide_tools` reconciliation and `tools/call` invocation +- [ ] 7.3 Add TypeScript SDK `extension.tool(...)` handler registration and descriptor export +- [ ] 7.4 Add TypeScript digest parity tests using task_06 fixtures +- [ ] 7.5 Add TypeScript create-extension tool-provider template and scaffold tests +- [ ] 7.6 Add subprocess integration tests for missing methods, schema mismatch, handler mismatch, handler errors, and cancellation + +## Implementation Details + +Use TechSpec "Extension Runtime Contract", "Core Interfaces", and "Implementation Steps" 7-9. Existing extension subprocess handshake and `Process.Call` behavior should be reused; do not create a separate protocol loop. + +### Relevant Files +- `internal/extension/protocol/host_api.go` - protocol constants and method contracts +- `internal/subprocess/handshake.go` - required method validation during initialize +- `internal/subprocess/process.go` - existing JSON-RPC call path to reuse +- `internal/extension/manager.go` - manager-side runtime reconciliation and invocation +- `sdk/typescript/src/extension.ts` - SDK registration API +- `sdk/typescript/src/types.ts` - SDK type contracts +- `sdk/create-extension/src/index.ts` - template selection and generation + +### Dependent Files +- `sdk/typescript/src/generated/contracts.ts` - generated/shared contracts if protocol types are code-generated +- `sdk/typescript/src/integration.test.ts` - subprocess integration coverage +- `sdk/create-extension/src/index.test.ts` - scaffold coverage +- `internal/api/contract/tools.go` - task_11 exposes extension availability and invocation results + +### Related ADRs +- [ADR-001: Extension Tool Execution Boundary](adrs/adr-001-extension-tool-execution-boundary.md) - executable extension tools remain out-of-process +- [ADR-008: Manifest-Authoritative Extension Tool Descriptors](adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md) - defines runtime reconciliation +- [ADR-009: Public Go Extension Tool SDK](adrs/adr-009-public-go-extension-tool-sdk.md) - Go SDK must align with protocol contracts introduced here + +### Web/Docs Impact +- `web/`: task_13 must display executable/unavailable extension-host tools and mismatch reasons from generated API types. +- `packages/site`: task_14 must document TypeScript `extension.tool(...)`, manifest/runtime reconciliation, error handling, and template usage. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: adds executable TypeScript extension tools, extension protocol methods, SDK APIs, and create-extension template. +- Agent manageability: extension-host tools become callable through CLI/HTTP/UDS in tasks 11-12 with deterministic errors. +- Config lifecycle: consumes extension enablement and tool policy from task_02; no additional config keys in this task. + +## Deliverables +- Extension protocol support for `tool.provider`, `provide_tools`, and `tools/call` +- Runtime descriptor reconciliation and executable extension handles +- TypeScript SDK `extension.tool(...)` API and tests +- TypeScript tool-provider create-extension template +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests with real subprocess extension fixtures **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] Initialize rejects `tool.provider` extensions missing `provide_tools` or `tools/call` + - [ ] Runtime descriptor digest, risk, handler, and schema mismatches surface deterministic availability reasons + - [ ] TypeScript `extension.tool(...)` registers descriptors and handlers with digest parity + - [ ] SDK errors redact tool input fields marked sensitive +- Integration tests: + - [ ] A TypeScript extension publishes a read-only executable tool and succeeds through `Registry.Call` + - [ ] A TypeScript extension publishes a mutating tool and is gated by policy/approval + - [ ] Cancellation propagates from registry dispatch through subprocess `tools/call` + - [ ] `bun test` covers SDK and create-extension template behavior +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- TypeScript extensions can define executable tools with functions through the existing extension runtime +- Descriptor-only extension tools are not treated as callable diff --git a/.compozy/tasks/tools-registry/task_08.md b/.compozy/tasks/tools-registry/task_08.md new file mode 100644 index 000000000..0f98d3025 --- /dev/null +++ b/.compozy/tasks/tools-registry/task_08.md @@ -0,0 +1,96 @@ +--- +status: pending +title: Public Go Extension SDK +type: backend +complexity: critical +dependencies: + - task_07 +--- + +# Task 08: Public Go Extension SDK + +## Overview + +Create a public Go SDK for out-of-process extension-host tools so Go authors can define executable tools with Go functions. This task mirrors the TypeScript extension SDK protocol without importing daemon internals or running third-party handlers in-process. + + +- ALWAYS READ `_techspec.md`, ADR-001, ADR-008, and ADR-009 before creating SDK APIs +- DO NOT import `internal/*` from `sdk/go`; public SDK code must build as an external consumer +- DO NOT confuse first-party `native_go` providers with third-party Go extension tools +- TESTS REQUIRED: SDK conformance must prove protocol, digest, and subprocess behavior from an external package + + + +1. MUST create `sdk/go` with public APIs for defining tools using Go functions. +2. MUST implement the extension subprocess JSON-RPC runtime compatible with task_07 protocol constants. +3. MUST expose Host API client primitives needed by Go extension tools without depending on daemon internals. +4. MUST match daemon and TypeScript SDK descriptor/digest fixtures from task_06 and task_07. +5. MUST add Go tool-provider create-extension template support. +6. MUST test the SDK from an external-package perspective. + + +## Subtasks +- [ ] 8.1 Create `sdk/go` package layout, public APIs, and module/test setup +- [ ] 8.2 Implement Go subprocess runtime for initialize, `provide_tools`, and `tools/call` +- [ ] 8.3 Add Go function-based `Tool(...)` registration and typed result/error helpers +- [ ] 8.4 Add Host API client primitives required by tool handlers +- [ ] 8.5 Add digest parity and conformance fixtures shared with daemon and TypeScript SDK +- [ ] 8.6 Add Go create-extension tool-provider template and scaffold tests +- [ ] 8.7 Add external-package tests proving no `internal/*` imports or daemon-only dependencies + +## Implementation Details + +Use TechSpec "Go Extension SDK Contract" and ADR-009. The SDK should behave like a public authoring surface equivalent to `@agh/extension-sdk`, while built-in daemon tools continue to use `native_go` providers from task_05. + +### Relevant Files +- `sdk/go/**` - new public Go extension SDK +- `internal/extension/protocol/host_api.go` - protocol constants and wire contracts to mirror or generate from +- `sdk/typescript/src/extension.ts` - TypeScript authoring precedent +- `sdk/create-extension/src/index.ts` - template generation support +- `sdk/create-extension/src/index.test.ts` - scaffold tests + +### Dependent Files +- `sdk/go/**/fixtures` - conformance and digest fixtures +- `sdk/typescript/test-fixtures/**` - shared digest vector parity if created by task_07 +- `packages/site/content/runtime/core/extensions/develop.mdx` - task_14 documents Go SDK authoring +- `openapi/agh.json` - no direct dependency unless SDK docs link generated API surfaces + +### Related ADRs +- [ADR-001: Extension Tool Execution Boundary](adrs/adr-001-extension-tool-execution-boundary.md) - Go extension tools execute out-of-process +- [ADR-008: Manifest-Authoritative Extension Tool Descriptors](adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md) - Go descriptors must reconcile with manifests +- [ADR-009: Public Go Extension Tool SDK](adrs/adr-009-public-go-extension-tool-sdk.md) - defines public SDK boundary + +### Web/Docs Impact +- `web/`: no direct code impact - checked generated API and systems; Go SDK state appears through generic extension tool descriptors already handled by task_13. +- `packages/site`: task_14 must add Go extension tool authoring docs, examples, and template instructions. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: adds public Go extension SDK, Go authoring API, subprocess runtime, conformance fixtures, and create-extension template. +- Agent manageability: Go extension tools become callable through registry CLI/HTTP/UDS surfaces once installed and enabled. +- Config lifecycle: consumes existing extension config and tool policy; no new config keys in this task. + +## Deliverables +- Public `sdk/go` extension SDK with function-based tool registration +- Go subprocess runtime compatible with TypeScript and daemon protocol contracts +- Go tool-provider create-extension template +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration/conformance tests using external-package SDK consumers **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] `Tool(...)` rejects invalid IDs, schemas, duplicate handlers, and missing metadata + - [ ] Go digest output matches daemon and TypeScript RFC 8785/JCS fixtures + - [ ] Host API client redacts or rejects sensitive values according to protocol rules + - [ ] SDK package tests do not import daemon `internal/*` +- Integration tests: + - [ ] A compiled Go extension publishes and executes a read-only tool through the registry + - [ ] A compiled Go extension publishes a mutating tool gated by policy/approval + - [ ] Go create-extension template scaffolds a buildable tool-provider extension +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Go authors can define extension-host tools with Go functions through a public SDK +- Public SDK conformance is proven without daemon internal imports diff --git a/.compozy/tasks/tools-registry/task_09.md b/.compozy/tasks/tools-registry/task_09.md new file mode 100644 index 000000000..88d75dde8 --- /dev/null +++ b/.compozy/tasks/tools-registry/task_09.md @@ -0,0 +1,98 @@ +--- +status: pending +title: Daemon-Owned MCP Call-Through and Auth Diagnostics +type: backend +complexity: critical +dependencies: + - task_03 + - task_04 +--- + +# Task 09: Daemon-Owned MCP Call-Through and Auth Diagnostics + +## Overview + +Add executable `mcp` backend tools by letting the daemon discover and call configured MCP servers through `internal/mcp`. This task reuses existing MCP config and auth storage, preserves remote server metadata, maps redacted auth status to registry availability reasons, and prevents token material from crossing package boundaries. + + +- ALWAYS READ `_techspec.md`, ADR-002, ADR-005, and ADR-010 before editing MCP behavior +- DO NOT create a second MCP auth store or leak `TokenRecord`, bearer headers, OAuth codes, PKCE verifiers, refresh tokens, or client secrets +- DO NOT convert remote HTTP/SSE MCP servers into blank ACP stdio entries +- TESTS REQUIRED: auth redaction, config preservation, and real/fake MCP call-through must be covered + + + +1. MUST implement `MCPCallExecutor` in `internal/mcp`, not in `internal/tools`. +2. MUST preserve configured MCP `Transport`, `URL`, `Auth`, command, args, and env through daemon resource/config projections. +3. MUST map `internal/mcp/auth` status into registry reason codes such as auth unconfigured, required, expired, invalid, and refresh failed. +4. MUST normalize external MCP tools into canonical `mcp____` IDs while preserving raw names in `SourceRef`. +5. MUST inject bearer/header material only inside `internal/mcp` and return only redacted diagnostics/results. +6. MUST cover stdio, HTTP, SSE, timeout, cancellation, collision, and auth-required behavior. + + +## Subtasks +- [ ] 9.1 Fix MCP config/resource cloning so remote metadata and auth config are preserved +- [ ] 9.2 Add redacted registry-facing MCP auth status adapter +- [ ] 9.3 Implement `MCPCallExecutor` list/call behavior inside `internal/mcp` +- [ ] 9.4 Normalize MCP descriptors and collision handling into registry providers +- [ ] 9.5 Add token/redaction guards across errors, logs, events, CLI/API payloads, and test fixtures +- [ ] 9.6 Add MCP fake-server integration tests for discovery, call-through, auth, timeout, and cancellation + +## Implementation Details + +Use TechSpec "MCP Backend Contract", "MCP Auth/Hosted MCP Existing Surface Alignment", and ADR-010. External MCP backend call-through is distinct from hosted AGH MCP exposure in task_10. + +### Relevant Files +- `internal/mcp/auth/types.go` - redacted auth status values +- `internal/mcp/auth/service.go` - existing token lifecycle to reuse without exposing secrets +- `internal/daemon/tool_mcp_resources.go` - existing clone path that must preserve transport, URL, and auth +- `internal/config/provider.go` - MCP server config model +- `internal/config/mcp_resource.go` - MCP resource validation +- `internal/settings/service.go` - existing settings/status surface to keep aligned +- `internal/tools/registry*.go` - MCP provider adapter registration + +### Dependent Files +- `internal/api/contract/settings.go` - status parity with existing settings MCP surfaces +- `internal/api/contract/tools.go` - task_11 exposes MCP tool descriptors and auth reasons +- `internal/cli/mcp_auth.go` - existing auth CLI must remain the management path +- `web/src/hooks/routes/use-settings-mcp-servers-page.ts` - task_13 keeps settings diagnostics truthful + +### Related ADRs +- [ADR-002: Session Tool Exposure Path](adrs/adr-002-session-tool-exposure-path.md) - distinguishes remote MCP backend from hosted AGH MCP exposure +- [ADR-005: ACP Approval Policy Integration](adrs/adr-005-acp-approval-policy-integration.md) - constrains external tool policy +- [ADR-010: Remote MCP Call-Through](adrs/adr-010-remote-mcp-call-through.md) - defines daemon-owned MCP backend execution + +### Web/Docs Impact +- `web/`: task_13 must align tool diagnostics with existing MCP settings auth status and avoid invented remote login controls. +- `packages/site`: task_14 must update MCP config/auth docs and explain remote MCP call-through versus hosted AGH MCP. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: adds executable MCP backend provider support and source metadata for external tools. +- Agent manageability: existing `agh mcp auth` remains auth management; task_11/task_12 expose registry info/invoke paths. +- Config lifecycle: consumes existing MCP server config and `tools.policy` source grants; fixes preservation of `Transport`, `URL`, and `Auth`. + +## Deliverables +- `MCPCallExecutor` inside `internal/mcp` +- MCP descriptor discovery and call-through provider adapter +- Redacted MCP auth status integration with registry availability +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests against fake stdio, HTTP, and SSE MCP servers **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] MCP config clone preserves `Transport`, `URL`, `Auth`, command, args, and env with deep-copy semantics + - [ ] Auth statuses map exactly to registry availability reason codes without exposing token records + - [ ] Canonical MCP IDs fail closed on sanitized-name collisions and over-length IDs + - [ ] Authorization headers are never visible to `internal/tools`, API DTOs, logs, or events +- Integration tests: + - [ ] Fake stdio MCP server supports `tools/list` and `tools/call` through `Registry.Call` + - [ ] Fake HTTP/SSE MCP servers preserve transport-specific config and auth behavior + - [ ] Timeout, cancellation, auth-required, expired, invalid, and refresh-failed paths return deterministic redacted errors +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- MCP backend tools are executable through daemon-owned call-through +- Existing MCP auth/config surfaces remain the source of truth and do not leak secrets diff --git a/.compozy/tasks/tools-registry/task_10.md b/.compozy/tasks/tools-registry/task_10.md new file mode 100644 index 000000000..ed41d1e56 --- /dev/null +++ b/.compozy/tasks/tools-registry/task_10.md @@ -0,0 +1,99 @@ +--- +status: pending +title: Hosted AGH MCP Session Exposure and Approval Bridge +type: backend +complexity: critical +dependencies: + - task_05 + - task_09 +--- + +# Task 10: Hosted AGH MCP Session Exposure and Approval Bridge + +## Overview + +Expose AGH registry tools to ACP sessions through a session-bound hosted MCP proxy. This task adds the local MCP bind lifecycle, ACP stdio injection, UDS peer and AGH binary validation, and approval bridge behavior while ensuring every hosted MCP call re-enters the registry dispatch pipeline. + + +- ALWAYS READ `_techspec.md`, ADR-002, ADR-005, and ADR-010 before implementing hosted MCP +- DO NOT treat the bind nonce as bearer auth; it is a correlation value plus UDS peer/binary validation +- DO NOT accept client-supplied approval tokens over hosted MCP +- TESTS REQUIRED: bind failures, approval timeout/cancel/unreachable, and ACP `mcpServers` injection must be deterministic + + + +1. MUST add `agh tool mcp --session --bind-nonce ` as the hosted MCP stdio entrypoint used by ACP. +2. MUST mint session-bound bind nonces with TTL, single-use behavior, and redacted diagnostics. +3. MUST validate UDS peer credentials and expected AGH binary before accepting hosted MCP binds; unsupported validation must fail closed. +4. MUST ensure hosted MCP `tools/list` equals the effective session-callable projection. +5. MUST route hosted MCP `tools/call` through `Registry.Call` and existing ACP permission/approval paths. +6. MUST return deterministic `approval_unreachable`, `approval_timed_out`, and `approval_canceled` errors. + + +## Subtasks +- [ ] 10.1 Add hosted MCP launch record, bind nonce lifecycle, and redacted diagnostics +- [ ] 10.2 Implement `agh tool mcp --session --bind-nonce` proxy entrypoint +- [ ] 10.3 Inject only the AGH-hosted stdio MCP entry into ACP session start/load payloads +- [ ] 10.4 Validate UDS peer credentials and expected AGH binary, failing closed when unavailable +- [ ] 10.5 Bridge hosted MCP approval-required calls to existing ACP/session permission flow +- [ ] 10.6 Add acpmock/runtime tests for hosted MCP list/call, bind failure, approval timeout, cancellation, and disconnect + +## Implementation Details + +Use TechSpec "Session Tool Exposure", "Hosted MCP Bind Contract", "Approval Bridge", and ADR-002. Hosted MCP is an exposure transport for AGH registry tools, not the external MCP backend implemented in task_09. + +### Relevant Files +- `internal/acp/client.go` - ACP `mcpServers` conversion/injection boundary +- `internal/acp/permission.go` - ACP permission mode and approval ceiling +- `internal/session/manager_start.go` - session start wiring +- `internal/session/manager_prompt.go` - existing approval route +- `internal/mcp/**` - hosted MCP proxy implementation +- `internal/cli/**` - `agh tool mcp` command entrypoint +- `internal/testutil/acpmock/**` - fixture support for ACP `mcpServers` and approval assertions + +### Dependent Files +- `internal/api/contract/tools.go` - task_11 exposes session projection and invoke semantics +- `internal/cli/tool*.go` - task_12 exposes user-facing command shape +- `web/src/systems/session/**` - task_13 may display hosted tool call/projection state +- `packages/site/content/runtime/core/sessions/permissions.mdx` - task_14 documents approval bridge behavior + +### Related ADRs +- [ADR-002: Session Tool Exposure Path](adrs/adr-002-session-tool-exposure-path.md) - defines AGH-hosted MCP as session exposure +- [ADR-005: ACP Approval Policy Integration](adrs/adr-005-acp-approval-policy-integration.md) - defines approval bridge and timeout behavior +- [ADR-010: Remote MCP Call-Through](adrs/adr-010-remote-mcp-call-through.md) - separates remote MCP backend from hosted AGH MCP + +### Web/Docs Impact +- `web/`: task_13 must reflect session-callable tools and approval-required/unavailable states only when backed by API contracts. +- `packages/site`: task_14 must document hosted MCP threat model, bind lifecycle, approval bridge, ACP injection, and failure modes. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: exposes all allowed registry tools to ACP-compatible agents through a hosted MCP transport. +- Agent manageability: adds `agh tool mcp` internal command path and session projection behavior consumed by agents. +- Config lifecycle: consumes `[tools.hosted_mcp]` and `[tools.policy].approval_timeout_seconds` from task_02. + +## Deliverables +- Hosted AGH MCP proxy and bind lifecycle +- ACP session MCP injection that does not misrepresent remote MCP servers +- Approval bridge and deterministic timeout/cancel/unreachable errors +- Unit tests with 80%+ coverage **(REQUIRED)** +- Runtime/acpmock integration tests **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] Bind nonce is session-bound, single-use, expires deterministically, and is redacted from logs/errors + - [ ] UDS peer credential or expected binary validation failure rejects the bind + - [ ] `toSDKMCPServers` or replacement injection never converts remote HTTP/SSE MCP servers into blank stdio entries + - [ ] Approval timeout, cancellation, and unreachable approval channel return stable reason codes +- Integration tests: + - [ ] acpmock observes the AGH-hosted MCP entry during session start/load + - [ ] Hosted MCP `tools/list` equals `GET /api/sessions/{id}/tools` once task_11 lands, or the equivalent internal projection before routes exist + - [ ] Hosted MCP safe built-in call succeeds and mutating call routes to ACP permission request + - [ ] Proxy disconnect cancels in-flight tool calls without leaving stale bind records +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- ACP sessions receive AGH registry tools through hosted MCP only +- Hosted MCP calls cannot bypass registry dispatch, policy, approval, hooks, or redaction diff --git a/.compozy/tasks/tools-registry/task_11.md b/.compozy/tasks/tools-registry/task_11.md new file mode 100644 index 000000000..1f517e150 --- /dev/null +++ b/.compozy/tasks/tools-registry/task_11.md @@ -0,0 +1,100 @@ +--- +status: pending +title: API Contracts, HTTP/UDS Routes, and Codegen +type: backend +complexity: critical +dependencies: + - task_05 + - task_07 + - task_09 + - task_10 +--- + +# Task 11: API Contracts, HTTP/UDS Routes, and Codegen + +## Overview + +Expose the Tool Registry through stable public daemon contracts after all executable backends exist. This task adds contract DTOs, core handlers, HTTP and UDS routes, OpenAPI generation, generated TypeScript types, structured errors, and session-specific projection/invoke endpoints. + + +- ALWAYS READ `_techspec.md`, ADR-006, and the contract/codegen rules before editing API contracts +- DO NOT expose an API that can only list descriptors; invoke routes must call executable native, extension, and MCP backends +- DO NOT update OpenAPI without regenerating downstream TypeScript and web contract artifacts +- TESTS REQUIRED: HTTP and UDS must stay behaviorally aligned for the same daemon state + + + +1. MUST add public DTOs for tool descriptors, projections, availability, policy decisions, toolsets, call inputs, call results, and structured errors. +2. MUST add core handler interfaces that depend on registry abstractions rather than concrete backend packages. +3. MUST add HTTP routes for operator registry list/search/info/toolsets and invoke operations where allowed by TechSpec. +4. MUST add UDS route parity for agent-operable local management and invocation paths. +5. MUST add session projection endpoints so hosted MCP/web/agents can compare visible callable tools. +6. MUST run codegen and co-ship OpenAPI plus generated web TypeScript contracts. + + +## Subtasks +- [ ] 11.1 Add tool registry DTOs and contract tests +- [ ] 11.2 Inject registry interfaces into core handlers without package-boundary violations +- [ ] 11.3 Add HTTP list/search/info/invoke/toolset/session routes and status-code/body tests +- [ ] 11.4 Add UDS parity routes and UDS client-compatible error payloads +- [ ] 11.5 Regenerate OpenAPI and web generated TypeScript contracts +- [ ] 11.6 Add contract/codegen drift tests and handler integration tests + +## Implementation Details + +Use TechSpec "API Endpoints", "Agent Manageability", "Data Models", and "Implementation Steps" 13-14. Codegen co-ship is mandatory because web and docs consume these contracts. + +### Relevant Files +- `internal/api/contract/` - DTOs for tool registry surfaces +- `internal/api/core/handlers.go` - handler dependency injection +- `internal/api/core/tools.go` - new core tool handlers +- `internal/api/httpapi/routes.go` - HTTP route registration +- `internal/api/udsapi/routes.go` - UDS route registration +- `openapi/agh.json` - regenerated OpenAPI artifact +- `web/src/generated/agh-openapi.d.ts` - regenerated TypeScript contract artifact + +### Dependent Files +- `internal/cli/client.go` - task_12 consumes UDS/HTTP client behavior +- `web/src/systems/tools/**` - task_13 consumes generated tool types +- `packages/site/content/runtime/api-reference/index.mdx` - task_14 references generated API docs +- `internal/api/*_test.go` - HTTP/UDS parity coverage + +### Related ADRs +- [ADR-006: Tool Visibility by Surface](adrs/adr-006-tool-visibility-by-surface.md) - defines operator vs session response behavior +- [ADR-007: Canonical Tool ID Format](adrs/adr-007-canonical-tool-id-format.md) - public DTOs use canonical ToolID only +- [ADR-010: Remote MCP Call-Through](adrs/adr-010-remote-mcp-call-through.md) - API must not leak MCP auth material + +### Web/Docs Impact +- `web/`: regenerate `web/src/generated/agh-openapi.d.ts`; task_13 must build tools adapters, query options, MSW fixtures, and UI from these generated types. +- `packages/site`: task_14 must update API reference and registry endpoint docs after OpenAPI changes. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: exposes native, extension-host, and MCP tool metadata through stable contracts without leaking backend implementation details. +- Agent manageability: adds HTTP and UDS paths for list, search, info, invoke, toolsets, and session projections with structured output/errors. +- Config lifecycle: no new keys; responses reflect config/policy from task_02 and task_03. + +## Deliverables +- Tool Registry contract DTOs and handler interfaces +- HTTP and UDS route parity for registry operations +- Regenerated `openapi/agh.json` and `web/src/generated/agh-openapi.d.ts` +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests for HTTP/UDS parity and codegen drift **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] Contract DTOs serialize canonical `tool_id`, backend kind, source ref, availability, and structured errors without secrets + - [ ] Core handlers map registry errors to deterministic HTTP/UDS status and body payloads + - [ ] Session endpoints return callable projections only while operator endpoints include unavailable/denied tools +- Integration tests: + - [ ] `GET /api/tools`, search, info, invoke, and toolsets routes return status-code plus body assertions + - [ ] Matching UDS routes return behaviorally equivalent payloads for the same state + - [ ] `make codegen` and `make codegen-check` pass with no generated drift + - [ ] `make bun-typecheck` and `make bun-test` pass against regenerated web types +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- HTTP, UDS, OpenAPI, and generated TypeScript contracts describe the same registry behavior +- Public contracts expose executable backends without leaking tokens, nonces, or approval secrets diff --git a/.compozy/tasks/tools-registry/task_12.md b/.compozy/tasks/tools-registry/task_12.md new file mode 100644 index 000000000..231d0ff8e --- /dev/null +++ b/.compozy/tasks/tools-registry/task_12.md @@ -0,0 +1,95 @@ +--- +status: pending +title: CLI Operator Commands +type: backend +complexity: high +dependencies: + - task_11 +--- + +# Task 12: CLI Operator Commands + +## Overview + +Add agent-operable CLI surfaces for registry inspection and invocation. This task implements structured `agh tool` and `agh toolsets` commands backed by the same UDS/HTTP contracts as task_11 and preserves existing `agh mcp auth` as the MCP authentication management path. + + +- ALWAYS READ `_techspec.md`, ADR-005, ADR-006, ADR-007, and CLI docs rules before adding commands +- DO NOT duplicate MCP auth login/logout/status under `agh tool`; existing `agh mcp auth` remains authoritative +- DO NOT print secrets, raw bind nonces, approval tokens, OAuth material, or unredacted sensitive inputs +- TESTS REQUIRED: every command needs JSON output tests and deterministic error-body assertions + + + +1. MUST add `agh tool list`, `agh tool search`, `agh tool info`, and `agh tool invoke`. +2. MUST add `agh toolsets list` and `agh toolsets info`. +3. MUST support structured output modes needed by agents, including JSON where applicable. +4. MUST validate input JSON files/stdin for invoke commands before sending requests. +5. MUST return deterministic non-zero exits and structured errors for denied, unavailable, conflicted, auth-required, approval, and schema-validation failures. +6. MUST regenerate CLI docs in task_14 and include tests that detect missing generated docs. + + +## Subtasks +- [ ] 12.1 Add CLI command registration for `tool` and `toolsets` +- [ ] 12.2 Add UDS/HTTP client methods for list/search/info/invoke/toolsets +- [ ] 12.3 Add JSON/text output rendering with redaction and deterministic errors +- [ ] 12.4 Add input JSON validation and stdin/file support for invocation +- [ ] 12.5 Add command tests, snapshots, and error-path coverage +- [ ] 12.6 Mark CLI docs regeneration requirements for task_14 + +## Implementation Details + +Use TechSpec "Agent Manageability" and "Implementation Steps" 14 and 16. CLI commands should be thin clients over the contracts from task_11, not a second registry implementation. + +### Relevant Files +- `internal/cli/root.go` - command registration +- `internal/cli/client.go` - CLI client methods +- `internal/cli/tool*.go` - new tool and toolset commands +- `internal/cli/mcp_auth.go` - existing MCP auth command to preserve +- `internal/api/contract/tools.go` - DTOs consumed by CLI rendering +- `packages/site/content/runtime/cli-reference/**` - generated docs target for task_14 + +### Dependent Files +- `internal/cli/*_test.go` - command and output tests +- `packages/site/content/runtime/core/tools.mdx` - task_14 references CLI examples +- `web/src/systems/tools/**` - task_13 may mirror CLI-visible states in UI + +### Related ADRs +- [ADR-005: ACP Approval Policy Integration](adrs/adr-005-acp-approval-policy-integration.md) - CLI invoke cannot bypass approval/policy +- [ADR-006: Tool Visibility by Surface](adrs/adr-006-tool-visibility-by-surface.md) - CLI operator surfaces can show diagnostic states +- [ADR-007: Canonical Tool ID Format](adrs/adr-007-canonical-tool-id-format.md) - CLI accepts canonical ToolID only + +### Web/Docs Impact +- `web/`: no direct code impact - checked systems; web consumes API contracts from task_11 in task_13. +- `packages/site`: task_14 must run `make cli-docs` and document `agh tool`, `agh toolsets`, and the relationship to `agh mcp auth`. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: CLI exposes extension-host and MCP tool diagnostics using the same descriptor/reason model as native tools. +- Agent manageability: adds primary local agent-operable commands for list, search, info, invoke, and toolset inspection with structured output. +- Config lifecycle: no new keys; CLI reflects config and policy from earlier tasks. + +## Deliverables +- `agh tool` and `agh toolsets` command families +- Structured JSON/text output and deterministic error handling +- CLI tests and snapshots +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration tests against daemon UDS/HTTP contracts **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] `agh tool list -o json` renders canonical IDs, backend kind, availability, and redacted diagnostics + - [ ] `agh tool info -o json` rejects invalid IDs and unavailable tools with structured errors + - [ ] `agh tool invoke` validates JSON input and redacts sensitive result/error fields + - [ ] `agh toolsets list/info` renders expanded and unavailable members deterministically +- Integration tests: + - [ ] CLI output matches HTTP/UDS payloads for the same daemon state + - [ ] Existing `agh mcp auth status --refresh -o json` remains the auth-management path and agrees with registry diagnostics + - [ ] Generated CLI docs include new command pages after task_14 runs `make cli-docs` +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Agents can manage and invoke registry tools through structured CLI commands +- CLI does not duplicate auth stores or bypass registry policy/approval diff --git a/.compozy/tasks/tools-registry/task_13.md b/.compozy/tasks/tools-registry/task_13.md new file mode 100644 index 000000000..1ac5ac2cb --- /dev/null +++ b/.compozy/tasks/tools-registry/task_13.md @@ -0,0 +1,96 @@ +--- +status: pending +title: Web Operator Tool Diagnostics Surface +type: frontend +complexity: high +dependencies: + - task_11 + - task_12 +--- + +# Task 13: Web Operator Tool Diagnostics Surface + +## Overview + +Add a truthful web diagnostics surface for the Tool Registry using generated contracts from the daemon. This task lets operators inspect tool descriptors, backend/source state, policy/availability reasons, MCP auth diagnostics, and session-callable projections without inventing controls that the backend does not support. + + +- ALWAYS READ `_techspec.md`, `web/CLAUDE.md`, `DESIGN.md`, and task_11 contracts before changing web code +- DO NOT create login, approval, invoke, or remote-management controls unless backed by real HTTP/UDS contracts +- DO NOT hand-write DTOs that should come from `web/src/generated/agh-openapi.d.ts` +- TESTS REQUIRED: adapters, query hooks, MSW fixtures, stories/routes, and generated types must compile together + + + +1. MUST build web UI from generated Tool Registry contracts rather than mirrored manual DTOs. +2. MUST add or update `web/src/systems/tools/**` for adapters, query keys/options, hooks, MSW fixtures, and components where appropriate. +3. MUST integrate MCP auth diagnostics with existing settings patterns without duplicating `agh mcp auth` behavior. +4. MUST display operator-visible unavailable, unauthorized, conflicted, and auth-required reason codes truthfully. +5. MUST show session-callable projections only where backed by task_11 endpoints. +6. MUST preserve `DESIGN.md` visual grammar and web-specific skill requirements. + + +## Subtasks +- [ ] 13.1 Add tools system adapters, query keys/options, hooks, and MSW fixtures from generated contracts +- [ ] 13.2 Add operator diagnostics components for descriptors, backend/source, availability, policy, and auth reasons +- [ ] 13.3 Integrate settings/session views only where daemon-backed state exists +- [ ] 13.4 Add route or settings placement consistent with existing web architecture +- [ ] 13.5 Add Storybook or route stories/fixtures for native, extension, MCP, conflicted, unavailable, and auth-required states +- [ ] 13.6 Add web tests, typecheck, lint, and build coverage + +## Implementation Details + +Use TechSpec "Impact Analysis", "Agent Manageability", and task_11 generated contracts. Follow `web/CLAUDE.md`, `DESIGN.md`, and existing `web/src/systems/settings`, `web/src/systems/skill`, `web/src/systems/network`, and `web/src/systems/session` patterns. + +### Relevant Files +- `web/src/generated/agh-openapi.d.ts` - generated Tool Registry contract types +- `web/src/systems/tools/**` - new tool diagnostics system +- `web/src/systems/settings/**` - MCP auth diagnostics integration if reused +- `web/src/systems/session/**` - session-callable projection display if backed by endpoints +- `web/src/test/**` - MSW/test utilities if existing patterns require updates +- `DESIGN.md` - design-system tokens and visual grammar + +### Dependent Files +- `internal/api/contract/tools.go` - generated source for web types +- `packages/site/content/runtime/core/tools.mdx` - task_14 may include screenshots or UI descriptions only after this surface exists +- `web/src/systems/tools/**/*.stories.*` - story fixtures for visual and state coverage + +### Related ADRs +- [ADR-006: Tool Visibility by Surface](adrs/adr-006-tool-visibility-by-surface.md) - web is operator-visible and must show diagnostic states truthfully +- [ADR-007: Canonical Tool ID Format](adrs/adr-007-canonical-tool-id-format.md) - UI must display canonical ToolID +- [ADR-010: Remote MCP Call-Through](adrs/adr-010-remote-mcp-call-through.md) - MCP auth diagnostics remain redacted + +### Web/Docs Impact +- `web/`: creates or updates `web/src/systems/tools/**`, generated type consumers, MSW fixtures, route/settings integration, and story/test coverage. +- `packages/site`: task_14 may document the operator diagnostics surface after it exists; no site docs are authored in this task. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: displays native, extension-host, and MCP tool source/availability states without changing extension APIs. +- Agent manageability: mirrors daemon-backed CLI/API state for operators; does not create new agent-operable backend verbs. +- Config lifecycle: displays config-derived policy/availability only if exposed by task_11; does not add config keys. + +## Deliverables +- Tool diagnostics web system using generated contracts +- MSW fixtures and state coverage for native, extension, MCP, denied, conflicted, unavailable, and auth-required tools +- Route/settings/session integration where backed by daemon contracts +- Unit tests with 80%+ coverage **(REQUIRED)** +- Integration/component tests for web diagnostics **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] Adapters parse generated tool DTOs without manual duplicated DTO definitions + - [ ] Query hooks handle loading, error, empty, unavailable, conflicted, and auth-required states + - [ ] Components display canonical `tool_id`, backend kind, source, and reason codes without secrets + - [ ] Settings integration does not render unsupported OAuth or approval controls +- Integration tests: + - [ ] MSW-backed route or component test renders native, extension-host, and MCP tool diagnostics + - [ ] Session projection display matches task_11 endpoint semantics + - [ ] `make bun-lint`, `make bun-typecheck`, `make bun-test`, and `make web-build` pass +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Web shows truthful Tool Registry diagnostics backed by daemon contracts +- No plausible-but-unsupported UI controls are introduced diff --git a/.compozy/tasks/tools-registry/task_14.md b/.compozy/tasks/tools-registry/task_14.md new file mode 100644 index 000000000..7a7609eb0 --- /dev/null +++ b/.compozy/tasks/tools-registry/task_14.md @@ -0,0 +1,101 @@ +--- +status: pending +title: Site Documentation and Generated References +type: docs +complexity: high +dependencies: + - task_13 +--- + +# Task 14: Site Documentation and Generated References + +## Overview + +Ship the documentation and generated references required for operators, agents, and extension authors to use the Tool Registry correctly. This task updates Fumadocs content, CLI reference output, API reference links, config documentation, extension authoring docs, and MCP/hosted MCP threat-model guidance. + + +- ALWAYS READ `_techspec.md`, every ADR, `packages/site/CLAUDE.md`, and completed tasks 01-13 before writing docs +- DO NOT document speculative controls, hidden flags, or behavior not backed by merged daemon/web contracts +- DO NOT hand-author generated CLI reference content; regenerate it from Cobra output +- TESTS REQUIRED: site source generation, typecheck, build, and link/reference checks must pass + + + +1. MUST document the Tool Registry model, canonical ToolID, backend kinds, operator/session visibility, policy gates, and result redaction. +2. MUST document `config.toml` keys for `[tools]`, `[tools.policy]`, `[tools.hosted_mcp]`, agent `tools`, agent `toolsets`, and agent `deny_tools`. +3. MUST document TypeScript `extension.tool(...)`, public Go extension SDK authoring, manifest-authoritative descriptors, and runtime reconciliation. +4. MUST document external MCP call-through, existing `agh mcp auth` management, redacted auth diagnostics, and hosted AGH MCP session exposure. +5. MUST regenerate CLI docs for `agh tool`, `agh toolsets`, and related command changes. +6. MUST update API/reference docs or generated references for new HTTP/UDS surfaces. + + +## Subtasks +- [ ] 14.1 Update registry, policy, ToolID, and visibility documentation +- [ ] 14.2 Update config docs and examples for tool policy and hosted MCP +- [ ] 14.3 Update extension authoring docs for TypeScript and Go executable tools +- [ ] 14.4 Update MCP docs for call-through, auth diagnostics, hosted MCP, and approval bridge behavior +- [ ] 14.5 Regenerate CLI reference and API references +- [ ] 14.6 Add docs tests/build verification and remove any obsolete descriptor-only wording + +## Implementation Details + +Use TechSpec "Impact Analysis", "Config Lifecycle", "Extensibility Plan", and ADRs 001-010. Docs must distinguish cold manifest resources from executable extension-host tools to prevent repeating the rejected descriptor-only design. + +### Relevant Files +- `packages/site/content/runtime/core/extensions/develop.mdx` - extension authoring docs +- `packages/site/content/runtime/core/configuration/config-toml.mdx` - tools config keys and examples +- `packages/site/content/runtime/core/configuration/mcp-json.mdx` - MCP config/auth behavior +- `packages/site/content/runtime/core/sessions/permissions.mdx` - approval bridge and hosted MCP behavior +- `packages/site/content/runtime/api-reference/index.mdx` - API reference entry point +- `packages/site/content/runtime/cli-reference/**` - generated CLI docs + +### Dependent Files +- `internal/cli/**` - source for generated CLI docs +- `openapi/agh.json` - source for API reference docs +- `web/src/systems/tools/**` - source of truth for any UI screenshots or operator-surface descriptions +- `.compozy/tasks/tools-registry/adrs/**` - decision evidence to cite + +### Related ADRs +- [ADR-001: Extension Tool Execution Boundary](adrs/adr-001-extension-tool-execution-boundary.md) - docs must explain native vs extension-host execution +- [ADR-002: Session Tool Exposure Path](adrs/adr-002-session-tool-exposure-path.md) - docs must explain hosted MCP session exposure +- [ADR-005: ACP Approval Policy Integration](adrs/adr-005-acp-approval-policy-integration.md) - docs must explain approval and policy layering +- [ADR-008: Manifest-Authoritative Extension Tool Descriptors](adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md) - docs must explain reconciliation +- [ADR-009: Public Go Extension Tool SDK](adrs/adr-009-public-go-extension-tool-sdk.md) - docs must explain Go SDK authoring +- [ADR-010: Remote MCP Call-Through](adrs/adr-010-remote-mcp-call-through.md) - docs must explain external MCP call-through + +### Web/Docs Impact +- `web/`: no code changes unless docs need screenshots or examples from the actual task_13 UI; checked `web/src/systems/tools/**`. +- `packages/site`: updates runtime core docs, config docs, MCP docs, sessions/permissions docs, API reference, generated CLI reference, and navigation metadata as needed. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: documents extension manifests, TypeScript SDK, Go SDK, MCP backend tools, hosted MCP, hooks, and tool resources. +- Agent manageability: documents CLI, HTTP, UDS, structured output, session projections, deterministic errors, and approval behavior. +- Config lifecycle: documents all new/changed tools config keys, defaults, examples, validation semantics, and removed descriptor-only assumptions. + +## Deliverables +- Updated Fumadocs pages for Tool Registry runtime, policy, extensions, MCP, sessions, and configuration +- Regenerated CLI reference pages for new commands +- Updated API reference entry points for tool endpoints +- Documentation tests and build verification +- Unit/documentation checks with 80%+ relevant coverage where applicable **(REQUIRED)** +- Integration/site build tests **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] Docs mention canonical `ToolID` and do not mention dotted aliases or descriptor-only callable extensions + - [ ] Config docs list defaults and validation behavior for every new tools key + - [ ] Extension docs include both TypeScript and Go function-based tool authoring + - [ ] MCP docs distinguish external MCP backend call-through from hosted AGH MCP exposure +- Integration tests: + - [ ] `make cli-docs` regenerates command pages with `agh tool` and `agh toolsets` + - [ ] `cd packages/site && bun run source:generate` passes + - [ ] `cd packages/site && bun run typecheck` passes + - [ ] `cd packages/site && bun run build` passes +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- Operators, agents, and extension authors can follow docs without relying on internal code knowledge +- Generated CLI/API references match the implemented daemon surfaces diff --git a/.compozy/tasks/tools-registry/task_15.md b/.compozy/tasks/tools-registry/task_15.md new file mode 100644 index 000000000..40b99b670 --- /dev/null +++ b/.compozy/tasks/tools-registry/task_15.md @@ -0,0 +1,101 @@ +--- +status: pending +title: QA Plan and Test Coverage +type: test +complexity: high +dependencies: + - task_14 +--- + +# Task 15: QA Plan and Test Coverage + +## Overview + +Generate the release-grade QA planning artifacts for the Tool Registry after implementation and docs land. This task uses `qa-report` to create traceable plans, manual cases, regression suites, and artifact paths covering native tools, TypeScript and Go extension-host tools, MCP call-through, hosted MCP, CLI/HTTP/UDS, web diagnostics, docs, config, policy, approval, and redaction. + + +- ALWAYS READ `_techspec.md`, every ADR, tasks 01-14, and every per-task memory file before drafting test cases +- ACTIVATE `qa-report` with `qa-output-path=.compozy/tasks/tools-registry` +- KEEP the same `qa-output-path` for `qa-execution`; all planning and execution artifacts must live under `.compozy/tasks/tools-registry/qa/` +- DO NOT execute the flows in this task; this is planning, prioritization, traceability, and artifact generation only +- EVERY public surface touched by tasks 01-14 must have explicit coverage + + + +1. MUST use the `qa-report` skill with `qa-output-path=.compozy/tasks/tools-registry`. +2. MUST create a feature-level QA plan under `.compozy/tasks/tools-registry/qa/test-plans/`. +3. MUST create manual test cases under `.compozy/tasks/tools-registry/qa/test-cases/`. +4. MUST create regression-suite documents covering smoke, targeted, full, and security/redaction priorities. +5. MUST trace every P0/P1 case to tasks 01-14, `_techspec.md`, or ADR-001 through ADR-010. +6. MUST reserve stable artifact directories for issues, logs, screenshots, traces, fixtures, and verification reports. + + +## Subtasks +- [ ] 15.1 Activate `qa-report` with `qa-output-path=.compozy/tasks/tools-registry` +- [ ] 15.2 Write feature-level QA plan with environment matrix, risks, entry/exit criteria, and artifact layout +- [ ] 15.3 Generate manual test cases for native, extension-host, MCP, hosted MCP, policy, approval, CLI/API/UDS, web, docs, and config +- [ ] 15.4 Build regression suites with P0/P1 ordering for task_16 execution +- [ ] 15.5 Map each P0/P1 case to tasks, TechSpec invariants, ADR decisions, or safety invariants +- [ ] 15.6 Include negative tests, edge cases, concurrency stress, and redaction leak assertions + +## Implementation Details + +Use the TechSpec "Test Strategy", "Safety Invariants", and all task files 01-14. The QA plan must prove real executable tools through all backend classes and must not rely on mocks alone for final confidence. + +### Relevant Files +- `.agents/skills/qa-report/SKILL.md` - required QA planning workflow +- `.compozy/tasks/tools-registry/_techspec.md` - authoritative requirements and invariants +- `.compozy/tasks/tools-registry/adrs/` - accepted architecture decisions +- `.compozy/tasks/tools-registry/task_01.md` through `.compozy/tasks/tools-registry/task_14.md` - implementation and verification scope +- `web/CLAUDE.md` - web verification rules for UI-bearing coverage +- `packages/site/CLAUDE.md` - site verification rules for docs coverage + +### Dependent Files +- `.compozy/tasks/tools-registry/qa/test-plans/tool-registry-test-plan.md` - feature-level plan +- `.compozy/tasks/tools-registry/qa/test-plans/*-regression.md` - regression suites consumed by task_16 +- `.compozy/tasks/tools-registry/qa/test-cases/TC-*.md` - manual test cases +- `.compozy/tasks/tools-registry/qa/issues/BUG-*.md` - defects found during QA planning or execution +- `.compozy/tasks/tools-registry/qa/verification-report.md` - task_16 final report + +### Related ADRs +- [ADR-001: Extension Tool Execution Boundary](adrs/adr-001-extension-tool-execution-boundary.md) - QA must prove executable native and extension-host tools +- [ADR-002: Session Tool Exposure Path](adrs/adr-002-session-tool-exposure-path.md) - QA must prove hosted MCP exposure +- [ADR-005: ACP Approval Policy Integration](adrs/adr-005-acp-approval-policy-integration.md) - QA must prove policy and approval behavior +- [ADR-008: Manifest-Authoritative Extension Tool Descriptors](adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md) - QA must prove reconciliation +- [ADR-009: Public Go Extension Tool SDK](adrs/adr-009-public-go-extension-tool-sdk.md) - QA must prove Go SDK authoring +- [ADR-010: Remote MCP Call-Through](adrs/adr-010-remote-mcp-call-through.md) - QA must prove MCP call-through and auth redaction + +### Web/Docs Impact +- `web/`: QA plan must include web diagnostics, generated type parity, MSW fixtures, route/component behavior, and Playwright/browser coverage from task_13. +- `packages/site`: QA plan must include site docs, generated CLI reference, config docs, extension docs, MCP docs, and API reference coverage from task_14. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: QA scope includes extension manifests, TypeScript SDK, Go SDK, MCP backend tools, hosted MCP, hooks, tool resources, and registry backends. +- Agent manageability: QA scope includes CLI structured output, HTTP endpoints, UDS routes, session projections, deterministic errors, and approval paths. +- Config lifecycle: QA scope includes `[tools]`, `[tools.policy]`, `[tools.hosted_mcp]`, agent `tools`, `toolsets`, `deny_tools`, defaults, overlays, examples, and docs. + +## Deliverables +- `.compozy/tasks/tools-registry/qa/test-plans/tool-registry-test-plan.md` +- One or more `.compozy/tasks/tools-registry/qa/test-plans/*-regression.md` documents reusable by task_16 +- Manual test cases under `.compozy/tasks/tools-registry/qa/test-cases/` **(REQUIRED)** +- Traceability matrix from P0/P1 cases to tasks, TechSpec sections, ADRs, and safety invariants **(REQUIRED)** +- Stable artifact layout for task_16 logs, screenshots, traces, issues, and verification reporting **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] QA plan includes objectives, scope, risks, environment matrix, entry criteria, and exit criteria across backend, CLI, API, UDS, web, docs, SDKs, MCP, and config + - [ ] Manual test cases exist for native tools, TypeScript extension tools, Go extension tools, MCP call-through, hosted MCP, policy, approval, redaction, and web/docs + - [ ] Regression suites define smoke, targeted, full, and security/redaction lanes with explicit P0/P1 ordering + - [ ] Every P0/P1 case names the exact task, TechSpec invariant, or ADR it proves +- Integration tests: + - [ ] All generated QA artifacts live under `.compozy/tasks/tools-registry/qa/` + - [ ] task_16 can consume the plan without redefining scope, priorities, output paths, or environment setup + - [ ] Any bug report created during planning references the originating test case or documented discrepancy +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- `qa-report` artifacts are complete, traceable, and stored under `.compozy/tasks/tools-registry/qa/` +- task_16 can begin execution without changing QA scope or artifact paths diff --git a/.compozy/tasks/tools-registry/task_16.md b/.compozy/tasks/tools-registry/task_16.md new file mode 100644 index 000000000..af383d564 --- /dev/null +++ b/.compozy/tasks/tools-registry/task_16.md @@ -0,0 +1,111 @@ +--- +status: pending +title: Real-Scenario QA Execution +type: test +complexity: critical +dependencies: + - task_15 +--- + +# Task 16: Real-Scenario QA Execution + +## Overview + +Execute the Tool Registry QA plan against a fresh, isolated, production-like AGH lab. This task uses `qa-execution`, `real-scenario-qa`, and browser automation for the UI-bearing scope, fixes root causes for reproduced defects, reruns gates, and records machine-readable evidence. + + +- ALWAYS READ `.compozy/tasks/tools-registry/qa/test-plans/*` before executing +- ACTIVATE `real-scenario-qa`, `qa-execution`, `agh-qa-bootstrap`, and `agh-worktree-isolation` +- USE unique `AGH_HOME`, daemon ports, provider homes, and tmux bridge/socket paths for this QA pass +- For UI coverage, drive Playwright via `browser-use:browser`; fall back to `agent-browser` only if `browser-use:browser` is unavailable +- DO NOT rely on mocks only; final validation must exercise real daemon, CLI, HTTP/UDS, SDK, MCP, browser, and docs/build flows + + + +1. MUST bootstrap a fresh isolated QA lab and persist the bootstrap manifest path in the verification report. +2. MUST execute task_15 P0/P1 cases for native tools, TypeScript extension tools, Go extension tools, external MCP call-through, hosted MCP, policy, approval, redaction, CLI/HTTP/UDS, web, docs, and config. +3. MUST run `make test-e2e-runtime` and `make test-e2e-web` for the UI-bearing feature scope. +4. MUST drive the highest-risk web diagnostics flow through `browser-use:browser` with `agent-browser` fallback only if needed. +5. MUST create `.compozy/tasks/tools-registry/qa/issues/BUG-NNN.md` for every reproduced defect and fix root causes before claiming completion. +6. MUST run final `make verify` and record evidence, lab root, runtime home, base URL, provider homes, and command outputs in `qa/verification-report.md`. + + +## Subtasks +- [ ] 16.1 Bootstrap a fresh isolated QA lab with `agh-qa-bootstrap` and record manifest/runtime paths +- [ ] 16.2 Execute smoke and P0/P1 backend/CLI/API/UDS/config cases from task_15 +- [ ] 16.3 Execute real TypeScript and Go extension-host tool fixtures through the registry +- [ ] 16.4 Execute real external MCP/OAuth call-through and hosted MCP session exposure flows +- [ ] 16.5 Execute web diagnostics through Playwright and `browser-use:browser`, then validate site docs/build +- [ ] 16.6 File and fix every reproduced defect, rerun targeted checks, then run final `make verify` +- [ ] 16.7 Write `qa/verification-report.md` with machine-readable QA bootstrap evidence + +## Implementation Details + +Use task_15 artifacts as the execution contract. This task validates real behavior across the daemon, extension subprocesses, MCP servers, CLI, HTTP, UDS, web, generated docs, config lifecycle, and redaction boundaries. + +Run `make test-e2e-runtime` (daemon harness) and `make test-e2e-web` (Playwright). Drive the highest-risk UI workflow through `browser-use:browser`; fall back to `agent-browser` only if `browser-use:browser` is unavailable. Do not silently substitute shell-only checks. + +For CLI/API/agent-manageability coverage, exercise structured CLI output, HTTP/UDS routes, status/config discovery, deterministic errors, and compare persisted daemon state. For extensibility/config coverage, validate TypeScript and Go extension tool authoring, MCP auth/config lifecycle, hosted MCP, and config overlays end-to-end. + +### Relevant Files +- `.compozy/tasks/tools-registry/qa/test-plans/*` - execution plan and regression suites +- `.compozy/tasks/tools-registry/qa/test-cases/TC-*.md` - manual and scenario test cases +- `.agents/skills/agh-qa-bootstrap/SKILL.md` - deterministic QA lab bootstrap workflow +- `.agents/skills/real-scenario-qa/SKILL.md` - release-grade scenario QA workflow +- `.agents/skills/qa-execution/SKILL.md` - execution and reporting workflow +- `web/e2e/**` - browser-side E2E coverage if tests are added or extended +- `internal/testutil/acpmock/**` - ACP hosted MCP and approval bridge fixtures + +### Dependent Files +- `.compozy/tasks/tools-registry/qa/verification-report.md` - final evidence report +- `.compozy/tasks/tools-registry/qa/issues/BUG-*.md` - reproduced defects and fixes +- `.compozy/tasks/tools-registry/qa/screenshots/**` - browser evidence +- `.compozy/tasks/tools-registry/qa/logs/**` - daemon, CLI, MCP, extension, and web logs +- `.compozy/tasks/tools-registry/qa/bootstrap-manifest.json` - copied or referenced QA bootstrap manifest + +### Related ADRs +- [ADR-001: Extension Tool Execution Boundary](adrs/adr-001-extension-tool-execution-boundary.md) - execution must prove executable native and extension-host tools +- [ADR-002: Session Tool Exposure Path](adrs/adr-002-session-tool-exposure-path.md) - execution must prove hosted MCP exposure +- [ADR-005: ACP Approval Policy Integration](adrs/adr-005-acp-approval-policy-integration.md) - execution must prove approval behavior +- [ADR-008: Manifest-Authoritative Extension Tool Descriptors](adrs/adr-008-manifest-authoritative-extension-tool-descriptors.md) - execution must prove reconciliation +- [ADR-009: Public Go Extension Tool SDK](adrs/adr-009-public-go-extension-tool-sdk.md) - execution must prove Go SDK authoring +- [ADR-010: Remote MCP Call-Through](adrs/adr-010-remote-mcp-call-through.md) - execution must prove remote MCP call-through and auth redaction + +### Web/Docs Impact +- `web/`: execute `make test-e2e-web`, browser-use diagnostics flow, generated type checks, MSW-backed tests, and web build. +- `packages/site`: execute docs source generation, typecheck/build, generated CLI reference verification, and docs scenario checks from task_15. + +## Extensibility / Agent Manageability / Config Lifecycle +- Extensibility: validates real TypeScript SDK, public Go SDK, extension manifests, MCP backend tools, hosted MCP, hooks, and registry dispatch. +- Agent manageability: validates CLI structured output, HTTP/UDS parity, session projections, invoke flows, deterministic errors, and approval paths. +- Config lifecycle: validates fresh config defaults, overlays, invalid values, MCP auth config, hosted MCP keys, policy keys, docs examples, and no legacy aliases. + +## Deliverables +- Fresh QA bootstrap manifest and isolated lab evidence +- Executed QA cases with logs, screenshots, traces, and command evidence +- `BUG-NNN.md` reports for every reproduced defect **(REQUIRED when defects are found)** +- Root-cause fixes and rerun evidence for every resolved defect **(REQUIRED when defects are found)** +- Final `.compozy/tasks/tools-registry/qa/verification-report.md` **(REQUIRED)** +- Final `make verify` evidence **(REQUIRED)** + +## Tests +- Unit tests: + - [ ] Targeted package tests rerun for every defect fix + - [ ] Redaction leak checks assert sentinel tokens are absent from logs, events, CLI JSON, HTTP JSON, UDS JSON, MCP responses, web payloads, and QA artifacts + - [ ] Config validation tests cover invalid aliases, unsafe values, and overlay precedence +- Integration tests: + - [ ] `make test-e2e-runtime` passes against an isolated lab + - [ ] `make test-e2e-web` passes and the highest-risk UI workflow is driven through `browser-use:browser` + - [ ] Real TypeScript and Go extension fixtures publish and execute tools through the registry + - [ ] Real external MCP/OAuth fixture proves call-through and redacted auth diagnostics + - [ ] Hosted MCP `tools/list` and `tools/call` match session projections and approval behavior + - [ ] CLI, HTTP, and UDS outputs agree for the same persisted state + - [ ] `make verify` passes after all fixes +- Test coverage target: >=80% +- All tests must pass + +## Success Criteria +- All tests passing +- Test coverage >=80% +- QA verification report includes manifest path, lab root, runtime home, provider homes, base URL, command evidence, and final gate results +- No known P0/P1 QA defects remain open diff --git a/internal/api/contract/contract.go b/internal/api/contract/contract.go index 73963f161..6645a1aca 100644 --- a/internal/api/contract/contract.go +++ b/internal/api/contract/contract.go @@ -118,6 +118,33 @@ type TurnHistoryPayload struct { Events []SessionEventPayload `json:"events"` } +// SessionRepairPayload reports one dry-run or persisted session repair pass. +type SessionRepairPayload struct { + SessionID string `json:"session_id"` + Issues []SessionRepairIssuePayload `json:"issues"` + Actions []SessionRepairActionPayload `json:"actions"` + Persisted bool `json:"persisted"` +} + +// SessionRepairIssuePayload is one inconsistency found during session repair. +type SessionRepairIssuePayload struct { + Code string `json:"code"` + Severity string `json:"severity"` + TurnID string `json:"turn_id,omitempty"` + EventID string `json:"event_id,omitempty"` + Detail string `json:"detail,omitempty"` +} + +// SessionRepairActionPayload is one append-only repair action. +type SessionRepairActionPayload struct { + Code string `json:"code"` + TurnID string `json:"turn_id"` + EventID string `json:"event_id,omitempty"` + ToolCallID string `json:"tool_call_id,omitempty"` + ToolName string `json:"tool_name,omitempty"` + Persisted bool `json:"persisted"` +} + // AgentPayload is the shared agent definition response payload. type AgentPayload struct { Name string `json:"name"` diff --git a/internal/api/contract/responses.go b/internal/api/contract/responses.go index 07eaee58e..d200886ff 100644 --- a/internal/api/contract/responses.go +++ b/internal/api/contract/responses.go @@ -29,6 +29,11 @@ type SessionTranscriptResponse struct { Messages []transcript.UIMessage `json:"messages"` } +// SessionRepairResponse wraps the repair report for one session. +type SessionRepairResponse struct { + Repair SessionRepairPayload `json:"repair"` +} + // SessionApprovalResponse wraps the approve-session success payload. type SessionApprovalResponse struct { Status string `json:"status"` diff --git a/internal/api/core/conversions.go b/internal/api/core/conversions.go index 33135b68f..a9e50cde2 100644 --- a/internal/api/core/conversions.go +++ b/internal/api/core/conversions.go @@ -233,6 +233,43 @@ func SessionEventPayloadFromEvent(event store.SessionEvent, info *session.Info) return payload } +// SessionRepairPayloadFromResult converts a session repair report into the shared payload. +func SessionRepairPayloadFromResult(result *session.RepairResult) contract.SessionRepairPayload { + if result == nil { + return contract.SessionRepairPayload{} + } + + issues := make([]contract.SessionRepairIssuePayload, 0, len(result.Issues)) + for _, issue := range result.Issues { + issues = append(issues, contract.SessionRepairIssuePayload{ + Code: issue.Code, + Severity: issue.Severity, + TurnID: issue.TurnID, + EventID: issue.EventID, + Detail: issue.Detail, + }) + } + + actions := make([]contract.SessionRepairActionPayload, 0, len(result.Actions)) + for _, action := range result.Actions { + actions = append(actions, contract.SessionRepairActionPayload{ + Code: action.Code, + TurnID: action.TurnID, + EventID: action.EventID, + ToolCallID: action.ToolCallID, + ToolName: action.ToolName, + Persisted: action.Persisted, + }) + } + + return contract.SessionRepairPayload{ + SessionID: result.SessionID, + Issues: issues, + Actions: actions, + Persisted: result.Persisted, + } +} + // AgentPayloadFromDef converts an agent definition into the shared payload. func AgentPayloadFromDef(agent aghconfig.AgentDef) contract.AgentPayload { mcpServers := make([]contract.AgentMCPServerJSON, 0, len(agent.MCPServers)) diff --git a/internal/api/core/handlers.go b/internal/api/core/handlers.go index 814f2c823..0e15e174f 100644 --- a/internal/api/core/handlers.go +++ b/internal/api/core/handlers.go @@ -307,6 +307,32 @@ func (h *BaseHandlers) ResumeSession(c *gin.Context) { c.JSON(http.StatusOK, contract.SessionResponse{Session: SessionPayloadFromInfo(sess.Info())}) } +// RepairSession inspects and optionally repairs an interrupted persisted session transcript. +func (h *BaseHandlers) RepairSession(c *gin.Context) { + dryRun, err := repairBoolQuery(c, "dry_run", "dry-run") + if err != nil { + h.respondError(c, http.StatusBadRequest, err) + return + } + force, err := repairBoolQuery(c, "force") + if err != nil { + h.respondError(c, http.StatusBadRequest, err) + return + } + + result, err := h.Sessions.RepairSession(c.Request.Context(), session.RepairOpts{ + SessionID: c.Param("id"), + DryRun: dryRun, + Force: force, + }) + if err != nil { + h.respondError(c, StatusForSessionError(err), err) + return + } + + c.JSON(http.StatusOK, contract.SessionRepairResponse{Repair: SessionRepairPayloadFromResult(result)}) +} + // ClearSessionConversation clears persisted conversation history and restarts the // session with a fresh ACP conversation context while preserving the same id. func (h *BaseHandlers) ClearSessionConversation(c *gin.Context) { @@ -393,6 +419,32 @@ func (h *BaseHandlers) SessionTranscript(c *gin.Context) { c.JSON(http.StatusOK, contract.SessionTranscriptResponse{Messages: messages}) } +func repairBoolQuery(c *gin.Context, names ...string) (bool, error) { + var ( + value bool + seen bool + ) + for _, name := range names { + raw, ok := c.GetQuery(name) + if !ok { + continue + } + parsed, err := ParseOptionalBool(raw) + if err != nil { + return false, fmt.Errorf("invalid %s query: %w", name, err) + } + if seen && parsed != value { + return false, fmt.Errorf( + "conflicting boolean query values for %s", + strings.Join(names, ", "), + ) + } + value = parsed + seen = true + } + return value, nil +} + // StreamSession streams session events over SSE. func (h *BaseHandlers) StreamSession(c *gin.Context) { info, err := h.streamSessionInfo(c.Request.Context(), c.Param("id")) diff --git a/internal/api/core/handlers_internal_test.go b/internal/api/core/handlers_internal_test.go index 5dd37811a..3465502c5 100644 --- a/internal/api/core/handlers_internal_test.go +++ b/internal/api/core/handlers_internal_test.go @@ -52,6 +52,13 @@ func (s sessionManagerStub) Transcript(context.Context, string) ([]transcript.UI return nil, nil } +func (s sessionManagerStub) RepairSession( + context.Context, + session.RepairOpts, +) (*session.RepairResult, error) { + return &session.RepairResult{}, nil +} + func (s sessionManagerStub) Delete(context.Context, string) error { return nil } func (s sessionManagerStub) Stop(context.Context, string) error { return nil } diff --git a/internal/api/core/handlers_test.go b/internal/api/core/handlers_test.go index 355b58a8a..f47f8a483 100644 --- a/internal/api/core/handlers_test.go +++ b/internal/api/core/handlers_test.go @@ -28,6 +28,7 @@ func TestBaseHandlersSessionEndpoints(t *testing.T) { now := time.Date(2026, 4, 3, 12, 0, 0, 0, time.UTC) var createCalled atomic.Bool + var repairSeen session.RepairOpts manager := testutil.StubSessionManager{ ListAllFn: func(context.Context) ([]*session.Info, error) { return []*session.Info{testutil.NewSessionInfo("sess-a")}, nil @@ -71,6 +72,18 @@ func TestBaseHandlersSessionEndpoints(t *testing.T) { resumed.State = session.StateActive return resumed, nil }, + RepairFn: func(_ context.Context, opts session.RepairOpts) (*session.RepairResult, error) { + repairSeen = opts + return &session.RepairResult{ + SessionID: opts.SessionID, + Actions: []session.RepairAction{{ + Code: session.RepairActionAppendTerminalError, + TurnID: "turn-1", + Persisted: !opts.DryRun, + }}, + Persisted: !opts.DryRun, + }, nil + }, EventsFn: func(_ context.Context, id string, query store.EventQuery) ([]store.SessionEvent, error) { if id != "sess-a" || query.Limit != 10 || query.AfterSequence != 5 { t.Fatalf("Events call = %q %#v", id, query) @@ -184,6 +197,61 @@ func TestBaseHandlersSessionEndpoints(t *testing.T) { } }) + t.Run("ShouldRepairSession", func(t *testing.T) { + repairResp := performRequest( + t, + fixture.Engine, + http.MethodPost, + "/sessions/sess-a/repair?dry_run=true&force=true", + nil, + ) + if repairResp.Code != http.StatusOK { + t.Fatalf("repair status = %d, want %d", repairResp.Code, http.StatusOK) + } + if repairSeen.SessionID != "sess-a" || !repairSeen.DryRun || !repairSeen.Force { + t.Fatalf("repair opts = %#v, want sess-a dry-run force", repairSeen) + } + var payload contract.SessionRepairResponse + if err := json.Unmarshal(repairResp.Body.Bytes(), &payload); err != nil { + t.Fatalf("json.Unmarshal(repair response) error = %v", err) + } + if payload.Repair.SessionID != "sess-a" { + t.Fatalf("repair session id = %q, want sess-a", payload.Repair.SessionID) + } + if payload.Repair.Persisted { + t.Fatalf("repair persisted = %v, want false for dry-run", payload.Repair.Persisted) + } + if got, want := len(payload.Repair.Actions), 1; got != want { + t.Fatalf("repair actions len = %d, want %d", got, want) + } + action := payload.Repair.Actions[0] + if got, want := action.Code, session.RepairActionAppendTerminalError; got != want { + t.Fatalf("repair action code = %q, want %q", got, want) + } + if got, want := action.TurnID, "turn-1"; got != want { + t.Fatalf("repair action turn id = %q, want %q", got, want) + } + if action.Persisted { + t.Fatalf("repair action persisted = %v, want false for dry-run", action.Persisted) + } + }) + + t.Run("ShouldRejectConflictingRepairQueryAliases", func(t *testing.T) { + repairResp := performRequest( + t, + fixture.Engine, + http.MethodPost, + "/sessions/sess-a/repair?dry_run=true&dry-run=false", + nil, + ) + if repairResp.Code != http.StatusBadRequest { + t.Fatalf("repair conflicting alias status = %d, want %d", repairResp.Code, http.StatusBadRequest) + } + if !strings.Contains(repairResp.Body.String(), "conflicting boolean query values for dry_run, dry-run") { + t.Fatalf("repair conflicting alias body = %q, want conflict message", repairResp.Body.String()) + } + }) + t.Run("ShouldReturnSessionEvents", func(t *testing.T) { eventsResp := performRequest( t, diff --git a/internal/api/core/interfaces.go b/internal/api/core/interfaces.go index 60f8b9d40..b29052e1a 100644 --- a/internal/api/core/interfaces.go +++ b/internal/api/core/interfaces.go @@ -44,6 +44,7 @@ type SessionManager interface { Events(ctx context.Context, id string, query store.EventQuery) ([]store.SessionEvent, error) History(ctx context.Context, id string, query store.EventQuery) ([]store.TurnHistory, error) Transcript(ctx context.Context, id string) ([]transcript.UIMessage, error) + RepairSession(ctx context.Context, opts session.RepairOpts) (*session.RepairResult, error) Delete(ctx context.Context, id string) error Stop(ctx context.Context, id string) error StopWithCause(ctx context.Context, id string, cause session.StopCause, detail string) error diff --git a/internal/api/core/test_helpers_test.go b/internal/api/core/test_helpers_test.go index c196acc42..4ced48b81 100644 --- a/internal/api/core/test_helpers_test.go +++ b/internal/api/core/test_helpers_test.go @@ -160,6 +160,7 @@ func newHandlerFixtureWithAutomationAndTasks( engine.DELETE("/sessions/:id", handlers.DeleteSession) engine.POST("/sessions/:id/stop", handlers.StopSession) engine.POST("/sessions/:id/resume", handlers.ResumeSession) + engine.POST("/sessions/:id/repair", handlers.RepairSession) engine.GET("/sessions/:id/events", handlers.SessionEvents) engine.GET("/sessions/:id/history", handlers.SessionHistory) engine.GET("/sessions/:id/transcript", handlers.SessionTranscript) diff --git a/internal/api/httpapi/handlers_test.go b/internal/api/httpapi/handlers_test.go index 82099fc0b..66f7d842d 100644 --- a/internal/api/httpapi/handlers_test.go +++ b/internal/api/httpapi/handlers_test.go @@ -165,6 +165,7 @@ func TestRegisterRoutesCoversTechSpecEndpoints(t *testing.T) { "POST /api/sessions/:id/clear", "POST /api/sessions/:id/prompt", "POST /api/sessions/:id/prompt/cancel", + "POST /api/sessions/:id/repair", "POST /api/sessions/:id/resume", "POST /api/sessions/:id/stop", "POST /api/settings/actions/restart", diff --git a/internal/api/httpapi/routes.go b/internal/api/httpapi/routes.go index 775be7d61..d0b4724de 100644 --- a/internal/api/httpapi/routes.go +++ b/internal/api/httpapi/routes.go @@ -69,6 +69,7 @@ func registerSessionRoutes(api gin.IRouter, handlers *Handlers) { sessions.DELETE("/:id", handlers.DeleteSession) sessions.POST("/:id/stop", handlers.StopSession) sessions.POST("/:id/resume", handlers.ResumeSession) + sessions.POST("/:id/repair", handlers.RepairSession) sessions.POST("/:id/clear", handlers.ClearSessionConversation) sessions.POST("/:id/prompt", handlers.promptSession) sessions.POST("/:id/prompt/cancel", handlers.cancelSessionPrompt) diff --git a/internal/api/spec/spec.go b/internal/api/spec/spec.go index 236226658..1c376bd0f 100644 --- a/internal/api/spec/spec.go +++ b/internal/api/spec/spec.go @@ -1771,6 +1771,25 @@ var operationRegistry = []OperationSpec{ {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, }, }, + { + Method: "POST", + Path: "/api/sessions/{id}/repair", + OperationID: "repairSession", + Summary: "Inspect and repair an interrupted session transcript", + Tags: []string{"sessions"}, + Transports: []Transport{TransportHTTP, TransportUDS}, + Parameters: []ParameterSpec{ + pathParam("id", "Session id"), + boolQueryParam("dry_run", "Report planned repairs without persisting new events"), + boolQueryParam("force", "Allow repair for stopped sessions whose stop reason is not crash or error"), + }, + Responses: []ResponseSpec{ + {Status: 200, Description: "OK", Body: contract.SessionRepairResponse{}}, + {Status: 400, Description: "Invalid repair options", Body: contract.ErrorPayload{}}, + {Status: 404, Description: "Session not found", Body: contract.ErrorPayload{}}, + {Status: 500, Description: "Internal server error", Body: contract.ErrorPayload{}}, + }, + }, { Method: "GET", Path: "/api/sessions/{id}/events", diff --git a/internal/api/testutil/apitest.go b/internal/api/testutil/apitest.go index e11853680..2625dc285 100644 --- a/internal/api/testutil/apitest.go +++ b/internal/api/testutil/apitest.go @@ -51,6 +51,7 @@ type StubSessionManager struct { EventsFn func(context.Context, string, store.EventQuery) ([]store.SessionEvent, error) HistoryFn func(context.Context, string, store.EventQuery) ([]store.TurnHistory, error) TranscriptFn func(context.Context, string) ([]transcript.UIMessage, error) + RepairFn func(context.Context, session.RepairOpts) (*session.RepairResult, error) DeleteFn func(context.Context, string) error StopFn func(context.Context, string) error StopWithCauseFn func(context.Context, string, session.StopCause, string) error @@ -125,6 +126,16 @@ func (s StubSessionManager) Transcript(ctx context.Context, id string) ([]transc return nil, nil } +func (s StubSessionManager) RepairSession( + ctx context.Context, + opts session.RepairOpts, +) (*session.RepairResult, error) { + if s.RepairFn != nil { + return s.RepairFn(ctx, opts) + } + return &session.RepairResult{SessionID: opts.SessionID}, nil +} + func (s StubSessionManager) Delete(ctx context.Context, id string) error { if s.DeleteFn != nil { return s.DeleteFn(ctx, id) diff --git a/internal/api/udsapi/handlers_test.go b/internal/api/udsapi/handlers_test.go index 964442481..9da845a08 100644 --- a/internal/api/udsapi/handlers_test.go +++ b/internal/api/udsapi/handlers_test.go @@ -228,6 +228,7 @@ func TestRegisterRoutesCoversTechSpecEndpoints(t *testing.T) { "POST /api/sessions/:id/clear", "POST /api/sessions/:id/prompt", "POST /api/sessions/:id/prompt/cancel", + "POST /api/sessions/:id/repair", "POST /api/sessions/:id/resume", "POST /api/sessions/:id/stop", "POST /api/settings/actions/restart", diff --git a/internal/api/udsapi/routes.go b/internal/api/udsapi/routes.go index b03cc7da6..29e709532 100644 --- a/internal/api/udsapi/routes.go +++ b/internal/api/udsapi/routes.go @@ -67,6 +67,7 @@ func registerSessionRoutes(api gin.IRouter, handlers *Handlers) { sessions.DELETE("/:id", handlers.DeleteSession) sessions.POST("/:id/stop", handlers.StopSession) sessions.POST("/:id/resume", handlers.ResumeSession) + sessions.POST("/:id/repair", handlers.RepairSession) sessions.POST("/:id/clear", handlers.ClearSessionConversation) sessions.POST("/:id/prompt", handlers.promptSession) sessions.POST("/:id/prompt/cancel", handlers.cancelSessionPrompt) diff --git a/internal/cli/client.go b/internal/cli/client.go index f8280b6a9..778d9ea7e 100644 --- a/internal/cli/client.go +++ b/internal/cli/client.go @@ -61,6 +61,7 @@ type DaemonClient interface { GetSession(ctx context.Context, id string) (SessionRecord, error) StopSession(ctx context.Context, id string) error ResumeSession(ctx context.Context, id string) (SessionRecord, error) + RepairSession(ctx context.Context, id string, query SessionRepairQuery) (SessionRepairRecord, error) PromptSession(ctx context.Context, id string, message string) ([]AgentEventRecord, error) SessionEvents(ctx context.Context, id string, query SessionEventQuery) ([]SessionEventRecord, error) StreamSessionEvents( @@ -209,6 +210,21 @@ type SessionRecord = contract.SessionPayload // ACPCapsRecord captures optional runtime capabilities exposed by the daemon API. type ACPCapsRecord = contract.ACPCapsPayload +// SessionRepairRecord reports one session repair pass returned by the daemon API. +type SessionRepairRecord = contract.SessionRepairPayload + +// SessionRepairIssueRecord is one inconsistency reported by session repair. +type SessionRepairIssueRecord = contract.SessionRepairIssuePayload + +// SessionRepairActionRecord is one planned or persisted repair action. +type SessionRepairActionRecord = contract.SessionRepairActionPayload + +// SessionRepairQuery captures CLI repair modifiers. +type SessionRepairQuery struct { + DryRun bool + Force bool +} + // SessionEventRecord is one persisted session event row returned by the daemon API. type SessionEventRecord = contract.SessionEventPayload @@ -578,6 +594,8 @@ type unixSocketClient struct { httpClient *http.Client } +var _ DaemonClient = (*unixSocketClient)(nil) + var errStopSSE = sse.ErrStop // NewClient constructs a daemon client that talks HTTP over a Unix domain socket. @@ -869,6 +887,27 @@ func (c *unixSocketClient) ResumeSession(ctx context.Context, id string) (Sessio return response.Session, nil } +func (c *unixSocketClient) RepairSession( + ctx context.Context, + id string, + query SessionRepairQuery, +) (SessionRepairRecord, error) { + var response struct { + Repair SessionRepairRecord `json:"repair"` + } + if err := c.doJSON( + ctx, + http.MethodPost, + "/api/sessions/"+url.PathEscape(strings.TrimSpace(id))+"/repair", + sessionRepairValues(query), + nil, + &response, + ); err != nil { + return SessionRepairRecord{}, err + } + return response.Repair, nil +} + func (c *unixSocketClient) PromptSession(ctx context.Context, id string, message string) ([]AgentEventRecord, error) { path := "/api/sessions/" + url.PathEscape(strings.TrimSpace(id)) + "/prompt" var events []AgentEventRecord @@ -2187,6 +2226,17 @@ func sessionListValues(query SessionListQuery) url.Values { return values } +func sessionRepairValues(query SessionRepairQuery) url.Values { + values := url.Values{} + if query.DryRun { + values.Set("dry_run", "true") + } + if query.Force { + values.Set("force", "true") + } + return values +} + func networkPeersValues(query NetworkPeersQuery) url.Values { values := url.Values{} if trimmed := strings.TrimSpace(query.Channel); trimmed != "" { diff --git a/internal/cli/client_test.go b/internal/cli/client_test.go index 4d50066e8..d3c057286 100644 --- a/internal/cli/client_test.go +++ b/internal/cli/client_test.go @@ -498,6 +498,17 @@ func TestUnixSocketClientMethods(t *testing.T) { http.StatusOK, `{"session":{"id":"sess-1","agent_name":"coder","workspace_id":"ws-1","workspace_path":"/tmp","state":"active","created_at":"2026-04-03T12:00:00Z","updated_at":"2026-04-03T12:00:00Z"}}`, ), nil + case req.Method == http.MethodPost && req.URL.Path == "/api/sessions/sess-1/repair": + if got := req.URL.Query().Get("dry_run"); got != "true" { + t.Fatalf("repair dry_run query = %q, want true", got) + } + if got := req.URL.Query().Get("force"); got != "true" { + t.Fatalf("repair force query = %q, want true", got) + } + return newHTTPResponse( + http.StatusOK, + `{"repair":{"session_id":"sess-1","persisted":false,"issues":[],"actions":[{"code":"append_terminal_error","turn_id":"turn-1","persisted":false}]}}`, + ), nil case req.Method == http.MethodPost && req.URL.Path == "/api/sessions/sess-1/prompt": body, err := io.ReadAll(req.Body) if err != nil { @@ -969,6 +980,44 @@ func TestUnixSocketClientMethods(t *testing.T) { } } +func TestUnixSocketClientRepairSession(t *testing.T) { + t.Parallel() + + t.Run("Should repair session with dry run and force", func(t *testing.T) { + t.Parallel() + + client := &unixSocketClient{ + socketPath: "/tmp/agh.sock", + httpClient: &http.Client{ + Transport: roundTripperFunc(func(req *http.Request) (*http.Response, error) { + if req.Method != http.MethodPost || req.URL.Path != "/api/sessions/sess-1/repair" { + return newHTTPResponse(http.StatusNotFound, `{"error":"missing"}`), nil + } + if got := req.URL.Query().Get("dry_run"); got != "true" { + t.Fatalf("repair dry_run query = %q, want true", got) + } + if got := req.URL.Query().Get("force"); got != "true" { + t.Fatalf("repair force query = %q, want true", got) + } + return newHTTPResponse( + http.StatusOK, + `{"repair":{"session_id":"sess-1","persisted":false,"issues":[],"actions":[{"code":"append_terminal_error","turn_id":"turn-1","persisted":false}]}}`, + ), nil + }), + }, + } + + repaired, err := client.RepairSession( + context.Background(), + "sess-1", + SessionRepairQuery{DryRun: true, Force: true}, + ) + if err != nil || repaired.SessionID != "sess-1" || len(repaired.Actions) != 1 { + t.Fatalf("RepairSession() = %#v, %v", repaired, err) + } + }) +} + func TestUnixSocketClientExtensionMethods(t *testing.T) { t.Parallel() @@ -2308,6 +2357,21 @@ func TestCLIUsesSharedContractAliases(t *testing.T) { cliType: TurnHistoryRecord{}, want: contract.TurnHistoryPayload{}, }, + { + name: "Should alias SessionRepairRecord to the shared contract", + cliType: SessionRepairRecord{}, + want: contract.SessionRepairPayload{}, + }, + { + name: "Should alias SessionRepairIssueRecord to the shared contract", + cliType: SessionRepairIssueRecord{}, + want: contract.SessionRepairIssuePayload{}, + }, + { + name: "Should alias SessionRepairActionRecord to the shared contract", + cliType: SessionRepairActionRecord{}, + want: contract.SessionRepairActionPayload{}, + }, { name: "Should alias AgentRecord to the shared contract", cliType: AgentRecord{}, diff --git a/internal/cli/helpers_test.go b/internal/cli/helpers_test.go index 46e21fc96..3fc3eb0ed 100644 --- a/internal/cli/helpers_test.go +++ b/internal/cli/helpers_test.go @@ -43,6 +43,7 @@ type stubClient struct { getSessionFn func(context.Context, string) (SessionRecord, error) stopSessionFn func(context.Context, string) error resumeSessionFn func(context.Context, string) (SessionRecord, error) + repairSessionFn func(context.Context, string, SessionRepairQuery) (SessionRepairRecord, error) promptSessionFn func(context.Context, string, string) ([]AgentEventRecord, error) sessionEventsFn func(context.Context, string, SessionEventQuery) ([]SessionEventRecord, error) streamSessionFn func(context.Context, string, SessionEventQuery, string, SSEHandler) error @@ -326,6 +327,17 @@ func (s *stubClient) ResumeSession(ctx context.Context, id string) (SessionRecor return SessionRecord{}, errors.New("unexpected ResumeSession call") } +func (s *stubClient) RepairSession( + ctx context.Context, + id string, + query SessionRepairQuery, +) (SessionRepairRecord, error) { + if s.repairSessionFn != nil { + return s.repairSessionFn(ctx, id, query) + } + return SessionRepairRecord{}, errors.New("unexpected RepairSession call") +} + func (s *stubClient) PromptSession( ctx context.Context, id string, diff --git a/internal/cli/session.go b/internal/cli/session.go index b7f64e66e..7b91cf8e0 100644 --- a/internal/cli/session.go +++ b/internal/cli/session.go @@ -24,6 +24,7 @@ func newSessionCommand(deps commandDeps) *cobra.Command { cmd.AddCommand(newSessionStopCommand(deps)) cmd.AddCommand(newSessionStatusCommand(deps)) cmd.AddCommand(newSessionResumeCommand(deps)) + cmd.AddCommand(newSessionRepairCommand(deps)) cmd.AddCommand(newSessionWaitCommand(deps)) cmd.AddCommand(newSessionPromptCommand(deps)) cmd.AddCommand(newSessionEventsCommand(deps)) @@ -198,6 +199,42 @@ func newSessionResumeCommand(deps commandDeps) *cobra.Command { } } +func newSessionRepairCommand(deps commandDeps) *cobra.Command { + var ( + dryRun bool + force bool + ) + + cmd := &cobra.Command{ + Use: "repair ", + Short: "Inspect and repair an interrupted session transcript", + Example: ` # Report the repair actions without writing new events + agh session repair sess_1234 --dry-run + + # Force repair for a stopped session whose stop reason is not crash or error + agh session repair sess_1234 --force`, + Args: cobra.ExactArgs(1), + RunE: func(cmd *cobra.Command, args []string) error { + client, err := clientFromDeps(deps) + if err != nil { + return err + } + + result, err := client.RepairSession(cmd.Context(), args[0], SessionRepairQuery{ + DryRun: dryRun, + Force: force, + }) + if err != nil { + return err + } + return writeCommandOutput(cmd, sessionRepairBundle(result)) + }, + } + cmd.Flags().BoolVar(&dryRun, "dry-run", false, "Report planned repairs without persisting events") + cmd.Flags().BoolVar(&force, "force", false, "Allow repair for stopped non-crash sessions") + return cmd +} + func newSessionWaitCommand(deps commandDeps) *cobra.Command { return &cobra.Command{ Use: "wait ", @@ -554,6 +591,70 @@ func sessionSandboxBackend(info SessionRecord) string { return strings.TrimSpace(info.Sandbox.Backend) } +func sessionRepairBundle(record SessionRepairRecord) outputBundle { + return outputBundle{ + jsonValue: record, + human: func() (string, error) { + return renderHumanSection("Session Repair", []keyValue{ + {Label: "Session", Value: stringOrDash(record.SessionID)}, + {Label: "Persisted", Value: strconv.FormatBool(record.Persisted)}, + {Label: "Issues", Value: stringOrDash(sessionRepairIssueSummary(record.Issues))}, + {Label: "Actions", Value: stringOrDash(sessionRepairActionSummary(record.Actions))}, + }), nil + }, + toon: func() (string, error) { + return renderToonObject("repair", []string{ + "session_id", + "persisted", + "issues", + "actions", + }, []string{ + record.SessionID, + strconv.FormatBool(record.Persisted), + sessionRepairIssueSummary(record.Issues), + sessionRepairActionSummary(record.Actions), + }), nil + }, + } +} + +func sessionRepairIssueSummary(items []SessionRepairIssueRecord) string { + if len(items) == 0 { + return "" + } + parts := make([]string, 0, len(items)) + for _, item := range items { + parts = append(parts, repairSummaryPart(item.Code, item.TurnID, item.EventID)) + } + return strings.Join(parts, ", ") +} + +func sessionRepairActionSummary(items []SessionRepairActionRecord) string { + if len(items) == 0 { + return "" + } + parts := make([]string, 0, len(items)) + for _, item := range items { + ref := item.EventID + if ref == "" { + ref = item.ToolCallID + } + parts = append(parts, repairSummaryPart(item.Code, item.TurnID, ref)) + } + return strings.Join(parts, ", ") +} + +func repairSummaryPart(code string, turnID string, ref string) string { + value := strings.TrimSpace(code) + if trimmedTurn := strings.TrimSpace(turnID); trimmedTurn != "" { + value += ":" + trimmedTurn + } + if trimmedRef := strings.TrimSpace(ref); trimmedRef != "" { + value += ":" + trimmedRef + } + return value +} + func sessionFailureKind(info SessionRecord) string { if info.Failure == nil { return "" diff --git a/internal/cli/session_test.go b/internal/cli/session_test.go index 106ff6dcd..ec9337524 100644 --- a/internal/cli/session_test.go +++ b/internal/cli/session_test.go @@ -301,6 +301,60 @@ func TestSessionListPassesWorkspaceFilter(t *testing.T) { } } +func TestSessionRepairPassesFlagsAndRendersJSON(t *testing.T) { + t.Run("Should pass flags and render JSON", func(t *testing.T) { + t.Parallel() + + var seenQuery SessionRepairQuery + var seenID string + deps := newTestDeps(t, &stubClient{ + repairSessionFn: func(_ context.Context, id string, query SessionRepairQuery) (SessionRepairRecord, error) { + seenID = id + seenQuery = query + return SessionRepairRecord{ + SessionID: id, + Issues: []SessionRepairIssueRecord{{ + Code: session.RepairIssueStopReasonRequiresForce, + Severity: session.RepairSeverityError, + TurnID: "turn-1", + }}, + Actions: []SessionRepairActionRecord{{ + Code: session.RepairActionAppendTerminalError, + TurnID: "turn-1", + Persisted: false, + }}, + }, nil + }, + }) + + stdout, _, err := executeRootCommand( + t, + deps, + "session", + "repair", + "sess-1", + "--dry-run", + "--force", + "-o", + "json", + ) + if err != nil { + t.Fatalf("executeRootCommand(session repair) error = %v", err) + } + if seenID != "sess-1" || !seenQuery.DryRun || !seenQuery.Force { + t.Fatalf("repair call = id %q query %#v, want dry-run force for sess-1", seenID, seenQuery) + } + + var decoded SessionRepairRecord + if err := json.Unmarshal([]byte(stdout), &decoded); err != nil { + t.Fatalf("json.Unmarshal(session repair) error = %v", err) + } + if decoded.SessionID != "sess-1" || len(decoded.Issues) != 1 || len(decoded.Actions) != 1 { + t.Fatalf("decoded repair = %#v, want one issue and one action for sess-1", decoded) + } + }) +} + func TestSessionEventsFollowUsesSSE(t *testing.T) { t.Parallel() diff --git a/internal/daemon/boot.go b/internal/daemon/boot.go index 1c7946431..7b7560e93 100644 --- a/internal/daemon/boot.go +++ b/internal/daemon/boot.go @@ -31,6 +31,7 @@ import ( "github.com/pedronauck/agh/internal/situation" "github.com/pedronauck/agh/internal/skills" "github.com/pedronauck/agh/internal/skills/bundled" + "github.com/pedronauck/agh/internal/store" taskpkg "github.com/pedronauck/agh/internal/task" "github.com/pedronauck/agh/internal/toolruntime" toolspkg "github.com/pedronauck/agh/internal/tools" @@ -151,52 +152,7 @@ func (d *Daemon) boot(ctx context.Context) (err error) { cleanup := &bootCleanup{} defer cleanup.run(&err) - if err := d.bootConfig(state, cleanup); err != nil { - return err - } - if err := d.bootPromptProviders(ctx, state); err != nil { - return err - } - if err := d.bootRuntime(ctx, state, cleanup); err != nil { - return err - } - if err := d.bootTasks(ctx, state); err != nil { - return err - } - if err := d.bootSpawnReaper(ctx, state, cleanup); err != nil { - return err - } - if err := d.bootScheduler(ctx, state, cleanup); err != nil { - return err - } - if err := d.bootNetwork(ctx, state, cleanup); err != nil { - return err - } - if err := d.bootHooks(ctx, state, cleanup); err != nil { - return err - } - if err := d.bootCoordinator(ctx, state, cleanup); err != nil { - return err - } - if err := d.bootAutomation(ctx, state, cleanup); err != nil { - return err - } - if err := d.bootBundles(ctx, state); err != nil { - return err - } - if err := d.bootResourceReconcile(ctx, state, cleanup); err != nil { - return err - } - if err := d.bootExtensions(ctx, state, cleanup); err != nil { - return err - } - if err := d.bootSettings(ctx, state); err != nil { - return err - } - if err := d.bootServers(ctx, state, cleanup); err != nil { - return err - } - if err := d.bootFinalize(ctx, state); err != nil { + if err := d.bootComponents(ctx, state, cleanup); err != nil { return err } if err := d.markRestartReadyIfRequested(state.info); err != nil { @@ -207,6 +163,34 @@ func (d *Daemon) boot(ctx context.Context) (err error) { return nil } +func (d *Daemon) bootComponents(ctx context.Context, state *bootState, cleanup *bootCleanup) error { + steps := []func() error{ + func() error { return d.bootConfig(state, cleanup) }, + func() error { return d.bootPromptProviders(ctx, state) }, + func() error { return d.bootRuntime(ctx, state, cleanup) }, + func() error { return d.bootSessionRepair(ctx, state) }, + func() error { return d.bootTasks(ctx, state) }, + func() error { return d.bootSpawnReaper(ctx, state, cleanup) }, + func() error { return d.bootScheduler(ctx, state, cleanup) }, + func() error { return d.bootNetwork(ctx, state, cleanup) }, + func() error { return d.bootHooks(ctx, state, cleanup) }, + func() error { return d.bootCoordinator(ctx, state, cleanup) }, + func() error { return d.bootAutomation(ctx, state, cleanup) }, + func() error { return d.bootBundles(ctx, state) }, + func() error { return d.bootResourceReconcile(ctx, state, cleanup) }, + func() error { return d.bootExtensions(ctx, state, cleanup) }, + func() error { return d.bootSettings(ctx, state) }, + func() error { return d.bootServers(ctx, state, cleanup) }, + func() error { return d.bootFinalize(ctx, state) }, + } + for _, step := range steps { + if err := step(); err != nil { + return err + } + } + return nil +} + func (d *Daemon) beginBoot() error { d.mu.Lock() defer d.mu.Unlock() @@ -526,6 +510,90 @@ func (d *Daemon) bootRuntimeServices( return nil } +func (d *Daemon) bootSessionRepair(ctx context.Context, state *bootState) error { + if state == nil { + return errors.New("daemon: boot session repair state is required") + } + if state.sessions == nil { + return errors.New("daemon: boot session repair requires session manager") + } + + infos, err := state.sessions.ListAll(ctx) + if err != nil { + if ctxErr := ctx.Err(); ctxErr != nil { + return fmt.Errorf("daemon: boot session repair canceled: %w", ctxErr) + } + state.logger.Warn("daemon: boot session repair skipped session list", "error", err) + return nil + } + + for _, info := range infos { + if err := ctx.Err(); err != nil { + return fmt.Errorf("daemon: boot session repair canceled: %w", err) + } + if !bootShouldRepairSession(info) { + continue + } + + result, repairErr := state.sessions.RepairSession(ctx, session.RepairOpts{SessionID: info.ID}) + if repairErr != nil { + if ctxErr := ctx.Err(); ctxErr != nil { + return fmt.Errorf("daemon: boot session repair canceled: %w", ctxErr) + } + state.logger.Warn( + "daemon: boot session repair failed", + "session_id", info.ID, + "error", repairErr, + ) + continue + } + if result == nil { + continue + } + errorIssues := repairIssueCount(result, session.RepairSeverityError) + if len(result.Actions) == 0 && errorIssues == 0 { + continue + } + state.logger.Info( + "daemon: boot session repair complete", + "session_id", result.SessionID, + "persisted", result.Persisted, + "actions", len(result.Actions), + "issues", len(result.Issues), + "error_issues", errorIssues, + ) + } + return nil +} + +func bootShouldRepairSession(info *session.Info) bool { + if info == nil || strings.TrimSpace(info.ID) == "" { + return false + } + if info.State != session.StateStopped { + return false + } + switch info.StopReason { + case store.StopAgentCrashed, store.StopError: + return true + default: + return false + } +} + +func repairIssueCount(result *session.RepairResult, severity string) int { + if result == nil { + return 0 + } + count := 0 + for _, issue := range result.Issues { + if strings.TrimSpace(issue.Severity) == severity { + count++ + } + } + return count +} + func (d *Daemon) sessionManagerDeps(state *bootState) SessionManagerDeps { return SessionManagerDeps{ HomePaths: d.homePaths, diff --git a/internal/daemon/daemon_test.go b/internal/daemon/daemon_test.go index 68eedab72..087e8c897 100644 --- a/internal/daemon/daemon_test.go +++ b/internal/daemon/daemon_test.go @@ -4263,6 +4263,7 @@ type fakeSessionManager struct { promptCtxCancelled chan struct{} stopCalls []string deleteCalls []string + repairCalls []session.RepairOpts stopWithCauseCalls []fakeStopWithCauseCall requestStopCalls []fakeStopWithCauseCall waitFinalizationsRelease <-chan struct{} @@ -4379,6 +4380,16 @@ func (f *fakeSessionManager) Transcript(context.Context, string) ([]transcript.U return nil, nil } +func (f *fakeSessionManager) RepairSession( + _ context.Context, + opts session.RepairOpts, +) (*session.RepairResult, error) { + f.mu.Lock() + defer f.mu.Unlock() + f.repairCalls = append(f.repairCalls, opts) + return &session.RepairResult{SessionID: opts.SessionID}, nil +} + func (f *fakeSessionManager) Stop(_ context.Context, id string) error { f.mu.Lock() defer f.mu.Unlock() @@ -4524,6 +4535,41 @@ func TestFakeSessionManagerClearConversationTreatsMissingSessionAsFreshConversat }) } +func TestBootSessionRepair(t *testing.T) { + t.Parallel() + + t.Run("ShouldRepairOnlyStoppedCrashOrErrorSessions", func(t *testing.T) { + t.Parallel() + + manager := &fakeSessionManager{ + infos: []*session.Info{ + {ID: "sess-crash", State: session.StateStopped, StopReason: store.StopAgentCrashed}, + {ID: "sess-error", State: session.StateStopped, StopReason: store.StopError}, + {ID: "sess-complete", State: session.StateStopped, StopReason: store.StopCompleted}, + {ID: "sess-active", State: session.StateActive, StopReason: store.StopAgentCrashed}, + }, + } + state := &bootState{ + logger: discardLogger(), + sessions: manager, + } + daemon := &Daemon{} + + if err := daemon.bootSessionRepair(testutil.Context(t), state); err != nil { + t.Fatalf("bootSessionRepair() error = %v", err) + } + + manager.mu.Lock() + defer manager.mu.Unlock() + if got, want := len(manager.repairCalls), 2; got != want { + t.Fatalf("repair calls = %d, want %d", got, want) + } + if manager.repairCalls[0].SessionID != "sess-crash" || manager.repairCalls[1].SessionID != "sess-error" { + t.Fatalf("repair calls = %#v, want crash then error sessions", manager.repairCalls) + } + }) +} + func (f *fakeSessionManager) Prompt(ctx context.Context, id string, msg string) (<-chan acp.AgentEvent, error) { f.mu.Lock() f.promptCalls = append(f.promptCalls, struct { diff --git a/internal/session/repair.go b/internal/session/repair.go new file mode 100644 index 000000000..9f04bee21 --- /dev/null +++ b/internal/session/repair.go @@ -0,0 +1,532 @@ +package session + +import ( + "context" + "encoding/json" + "errors" + "fmt" + "slices" + "strings" + + "github.com/pedronauck/agh/internal/acp" + "github.com/pedronauck/agh/internal/store" + "github.com/pedronauck/agh/internal/transcript" +) + +const ( + RepairSeverityInfo = "info" + RepairSeverityWarning = "warning" + RepairSeverityError = "error" + + RepairIssueSequenceGap = "event_sequence_gap" + RepairIssueSequenceDuplicate = "event_sequence_duplicate" + RepairIssueSequenceRegression = "event_sequence_regression" + RepairIssueInvalidEventJSON = "invalid_event_json" + RepairIssueEventTypeMismatch = "event_type_mismatch" + RepairIssueNoRepairableTurn = "no_repairable_turn" + RepairIssueSessionNotStopped = "session_not_stopped" + RepairIssueStopReasonRequiresForce = "stop_reason_requires_force" + RepairIssueDanglingToolCallMissingID = "dangling_tool_call_missing_id" + RepairIssueTerminalEventAlreadyExists = "terminal_event_already_exists" + + RepairActionAppendInterruptedToolResult = "append_interrupted_tool_result" + RepairActionAppendTerminalError = "append_terminal_error" + + repairInterruptedToolMessage = "Tool call interrupted before a result was persisted." + repairTerminalErrorMessage = "Session interrupted before a terminal prompt event was persisted." +) + +// RepairOpts controls one persisted session repair pass. +type RepairOpts struct { + SessionID string + DryRun bool + Force bool +} + +// RepairResult describes the detected inconsistencies and append-only +// repair events planned or persisted for a session. +type RepairResult struct { + SessionID string + Issues []RepairIssue + Actions []RepairAction + Persisted bool +} + +// RepairIssue is one non-mutating diagnostic discovered during repair. +type RepairIssue struct { + Code string + Severity string + TurnID string + EventID string + Detail string +} + +// RepairAction is one append-only mutation planned or persisted by repair. +type RepairAction struct { + Code string + TurnID string + EventID string + ToolCallID string + ToolName string + Persisted bool +} + +// repairEvent keeps the stored row and decoded ACP payload together so repair +// analysis can reconcile metadata and transcript content in one pass. +type repairEvent struct { + stored store.SessionEvent + agent acp.AgentEvent +} + +// repairTurnState tracks the latest prompt turn shape so repair can append only +// the terminal events that are still missing for that turn. +type repairTurnState struct { + turnID string + hasPromptData bool + terminal bool + toolCalls map[string]repairToolCall + toolResults map[string]struct{} +} + +// repairToolCall captures the minimum persisted tool-call metadata needed to +// synthesize a matching interrupted tool_result when none was recorded. +type repairToolCall struct { + toolName string +} + +// repairAnalysis accumulates diagnostics plus the final turn state that drives +// append-only repair planning, including whether analysis must block mutation. +type repairAnalysis struct { + issues []RepairIssue + turn repairTurnState + block bool +} + +// RepairSession inspects one persisted session transcript and, when safe, +// appends terminal repair events for an interrupted final prompt turn. +func (m *Manager) RepairSession( + ctx context.Context, + opts RepairOpts, +) (result *RepairResult, err error) { + if ctx == nil { + return nil, errors.New("session: repair context is required") + } + + target, err := normalizeStoredSessionID(opts.SessionID) + if err != nil { + return nil, err + } + + meta, err := m.readMetaWithContext(ctx, target) + if err != nil { + return nil, err + } + + recorder, cleanup, err := m.openQueryRecorder(ctx, target) + if err != nil { + return nil, err + } + defer func() { + if cleanupErr := cleanup(); cleanupErr != nil && err == nil { + err = cleanupErr + } + }() + + events, err := recorder.Query(ctx, store.EventQuery{}) + if err != nil { + return nil, fmt.Errorf("session: query events for repair %q: %w", target, err) + } + + result, actions := planSessionRepair(target, meta, opts, events) + if len(actions) == 0 { + return result, nil + } + + persisted, err := m.persistRepairActions(ctx, recorder, meta, actions) + if err != nil { + return result, err + } + result.Actions = persisted + result.Persisted = len(persisted) > 0 + return result, nil +} + +func planSessionRepair( + target string, + meta store.SessionMeta, + opts RepairOpts, + events []store.SessionEvent, +) (*RepairResult, []RepairAction) { + result := &RepairResult{SessionID: target} + analysis := analyzeRepairEvents(events) + result.Issues = append(result.Issues, analysis.issues...) + if analysis.block { + return result, nil + } + if strings.TrimSpace(analysis.turn.turnID) == "" { + result.Issues = append(result.Issues, RepairIssue{ + Code: RepairIssueNoRepairableTurn, + Severity: RepairSeverityInfo, + Detail: "no prompt turn exists in the session event store", + }) + return result, nil + } + if analysis.turn.terminal { + result.Issues = append(result.Issues, RepairIssue{ + Code: RepairIssueTerminalEventAlreadyExists, + Severity: RepairSeverityInfo, + TurnID: analysis.turn.turnID, + Detail: "the final prompt turn already has a terminal event", + }) + } + + if strings.TrimSpace(meta.State) != string(StateStopped) { + result.Issues = append(result.Issues, RepairIssue{ + Code: RepairIssueSessionNotStopped, + Severity: RepairSeverityError, + TurnID: analysis.turn.turnID, + Detail: "repair only mutates stopped sessions", + }) + return result, nil + } + stopReason := sessionMetaStopReason(meta) + if !opts.Force && !repairDefaultStopReason(stopReason) { + result.Issues = append(result.Issues, RepairIssue{ + Code: RepairIssueStopReasonRequiresForce, + Severity: RepairSeverityError, + TurnID: analysis.turn.turnID, + Detail: fmt.Sprintf("stop reason %q requires force before repair can mutate", stopReason), + }) + return result, nil + } + + actions := planRepairActions(analysis.turn, !analysis.turn.terminal) + result.Actions = append(result.Actions, actions...) + if opts.DryRun || len(actions) == 0 { + return result, nil + } + return result, actions +} + +func analyzeRepairEvents(events []store.SessionEvent) repairAnalysis { + analysis := repairAnalysis{} + turns := make(map[string]*repairTurnState) + var lastTurnID string + var previousSequence int64 + + for _, event := range sortedRepairEvents(events) { + trackRepairSequence(&analysis, event, &previousSequence) + agentEvent, eventType, ok := decodeRepairEvent(&analysis, event) + if !ok { + continue + } + turnID := strings.TrimSpace(firstNonEmpty(event.TurnID, agentEvent.TurnID)) + if turnID == "" || eventType == EventTypeSessionStopped { + continue + } + turn := ensureRepairTurn(turns, turnID) + lastTurnID = turnID + applyRepairEvent(turn, repairEvent{stored: event, agent: agentEvent}, eventType, &analysis) + } + + if lastTurnID != "" { + analysis.turn = *turns[lastTurnID] + } + return analysis +} + +func sortedRepairEvents(events []store.SessionEvent) []store.SessionEvent { + ordered := append([]store.SessionEvent(nil), events...) + slices.SortStableFunc(ordered, func(a store.SessionEvent, b store.SessionEvent) int { + switch { + case a.Sequence < b.Sequence: + return -1 + case a.Sequence > b.Sequence: + return 1 + default: + return strings.Compare(a.ID, b.ID) + } + }) + return ordered +} + +func trackRepairSequence( + analysis *repairAnalysis, + event store.SessionEvent, + previousSequence *int64, +) { + expected := *previousSequence + 1 + switch { + case *previousSequence > 0 && event.Sequence == *previousSequence: + analysis.issues = append(analysis.issues, RepairIssue{ + Code: RepairIssueSequenceDuplicate, + Severity: RepairSeverityError, + EventID: event.ID, + Detail: fmt.Sprintf("duplicate sequence %d", event.Sequence), + }) + analysis.block = true + case *previousSequence > 0 && event.Sequence < *previousSequence: + analysis.issues = append(analysis.issues, RepairIssue{ + Code: RepairIssueSequenceRegression, + Severity: RepairSeverityError, + EventID: event.ID, + Detail: fmt.Sprintf("sequence regressed from %d to %d", *previousSequence, event.Sequence), + }) + analysis.block = true + case event.Sequence != expected: + analysis.issues = append(analysis.issues, RepairIssue{ + Code: RepairIssueSequenceGap, + Severity: RepairSeverityWarning, + EventID: event.ID, + Detail: fmt.Sprintf("expected sequence %d, found %d", expected, event.Sequence), + }) + } + if event.Sequence > *previousSequence { + *previousSequence = event.Sequence + } +} + +func decodeRepairEvent( + analysis *repairAnalysis, + event store.SessionEvent, +) (acp.AgentEvent, string, bool) { + agentEvent, err := transcript.UnmarshalAgentEvent(event.Content) + if err != nil { + analysis.issues = append(analysis.issues, RepairIssue{ + Code: RepairIssueInvalidEventJSON, + Severity: RepairSeverityError, + TurnID: strings.TrimSpace(event.TurnID), + EventID: event.ID, + Detail: err.Error(), + }) + analysis.block = true + return acp.AgentEvent{}, "", false + } + + eventType := strings.TrimSpace(event.Type) + decodedType := strings.TrimSpace(agentEvent.Type) + if decodedType != "" && eventType != "" && decodedType != eventType { + analysis.issues = append(analysis.issues, RepairIssue{ + Code: RepairIssueEventTypeMismatch, + Severity: RepairSeverityError, + TurnID: strings.TrimSpace(event.TurnID), + EventID: event.ID, + Detail: fmt.Sprintf("stored type %q does not match payload type %q", eventType, decodedType), + }) + analysis.block = true + } + if eventType == "" { + eventType = decodedType + } + return agentEvent, eventType, true +} + +func ensureRepairTurn(turns map[string]*repairTurnState, turnID string) *repairTurnState { + turn := turns[turnID] + if turn != nil { + return turn + } + turn = &repairTurnState{ + turnID: turnID, + toolCalls: make(map[string]repairToolCall), + toolResults: make(map[string]struct{}), + } + turns[turnID] = turn + return turn +} + +func applyRepairEvent( + turn *repairTurnState, + event repairEvent, + eventType string, + analysis *repairAnalysis, +) { + switch eventType { + case acp.EventTypeUserMessage, + acp.EventTypeSyntheticReentry, + acp.EventTypeAgentMessage, + acp.EventTypeThought, + acp.EventTypePlan, + acp.EventTypeRuntimeProgress, + acp.EventTypeRuntimeWarning, + acp.EventTypePermission, + acp.EventTypeSystem: + turn.hasPromptData = true + case acp.EventTypeToolCall: + turn.hasPromptData = true + toolCallID := strings.TrimSpace(event.agent.ToolCallID) + if toolCallID == "" { + analysis.issues = append(analysis.issues, RepairIssue{ + Code: RepairIssueDanglingToolCallMissingID, + Severity: RepairSeverityWarning, + TurnID: turn.turnID, + EventID: event.stored.ID, + Detail: "tool_call event cannot be individually closed because tool_call_id is empty", + }) + return + } + turn.toolCalls[toolCallID] = repairToolCall{ + toolName: strings.TrimSpace(event.agent.Title), + } + case acp.EventTypeToolResult: + turn.hasPromptData = true + toolCallID := strings.TrimSpace(event.agent.ToolCallID) + if toolCallID != "" { + turn.toolResults[toolCallID] = struct{}{} + } + case acp.EventTypeDone, acp.EventTypeError: + turn.hasPromptData = true + turn.terminal = true + } +} + +func repairDefaultStopReason(reason store.StopReason) bool { + switch reason { + case store.StopAgentCrashed, store.StopError: + return true + default: + return false + } +} + +func planRepairActions(turn repairTurnState, includeTerminal bool) []RepairAction { + actions := make([]RepairAction, 0, len(turn.toolCalls)+1) + toolCallIDs := make([]string, 0, len(turn.toolCalls)) + for toolCallID := range turn.toolCalls { + if _, ok := turn.toolResults[toolCallID]; ok { + continue + } + toolCallIDs = append(toolCallIDs, toolCallID) + } + slices.Sort(toolCallIDs) + + for _, toolCallID := range toolCallIDs { + toolCall := turn.toolCalls[toolCallID] + actions = append(actions, RepairAction{ + Code: RepairActionAppendInterruptedToolResult, + TurnID: turn.turnID, + ToolCallID: toolCallID, + ToolName: toolCall.toolName, + }) + } + if includeTerminal && turn.hasPromptData { + actions = append(actions, RepairAction{ + Code: RepairActionAppendTerminalError, + TurnID: turn.turnID, + }) + } + return actions +} + +func (m *Manager) persistRepairActions( + ctx context.Context, + recorder EventRecorder, + meta store.SessionMeta, + actions []RepairAction, +) ([]RepairAction, error) { + persisted := make([]RepairAction, 0, len(actions)) + for _, action := range actions { + event, err := m.repairActionEvent(meta, action) + if err != nil { + return persisted, err + } + content, err := marshalAgentEvent(event) + if err != nil { + return persisted, err + } + eventID := store.NewID("ev") + if err := recorder.Record(ctx, store.SessionEvent{ + ID: eventID, + TurnID: event.TurnID, + Type: event.Type, + AgentName: strings.TrimSpace(meta.AgentName), + Content: content, + Timestamp: event.Timestamp, + }); err != nil { + return persisted, fmt.Errorf("session: persist repair event for %q: %w", strings.TrimSpace(meta.ID), err) + } + + action.EventID = eventID + action.Persisted = true + persisted = append(persisted, action) + m.notifyRepairEvent(ctx, strings.TrimSpace(meta.ID), event) + } + return persisted, nil +} + +func (m *Manager) repairActionEvent(meta store.SessionMeta, action RepairAction) (acp.AgentEvent, error) { + now := m.now().UTC() + event := acp.AgentEvent{ + SessionID: repairACPSessionID(meta), + TurnID: strings.TrimSpace(action.TurnID), + Timestamp: now, + } + + switch action.Code { + case RepairActionAppendInterruptedToolResult: + raw, err := interruptedToolResultRaw(action.ToolCallID, action.ToolName) + if err != nil { + return acp.AgentEvent{}, err + } + event.Type = acp.EventTypeToolResult + event.ToolCallID = strings.TrimSpace(action.ToolCallID) + event.Title = firstNonEmpty(action.ToolName, "interrupted tool result") + event.Error = repairInterruptedToolMessage + event.Raw = raw + case RepairActionAppendTerminalError: + event.Type = acp.EventTypeError + event.Error = repairTerminalErrorMessage + event.StopReason = string(sessionMetaStopReason(meta)) + event.Failure = store.CloneSessionFailure(meta.Failure) + default: + return acp.AgentEvent{}, fmt.Errorf("session: unknown repair action %q", action.Code) + } + return event, nil +} + +func interruptedToolResultRaw(toolCallID string, toolName string) (json.RawMessage, error) { + metadata := map[string]any{ + "agh": map[string]any{ + "repair": true, + "toolName": strings.TrimSpace(toolName), + }, + } + payload := map[string]any{ + "sessionUpdate": "tool_call_update", + "status": "failed", + "toolCallId": strings.TrimSpace(toolCallID), + "rawOutput": map[string]string{ + "stderr": repairInterruptedToolMessage, + "error": repairInterruptedToolMessage, + }, + "content": []map[string]any{ + { + "type": "content", + "content": map[string]string{ + "type": "text", + "text": repairInterruptedToolMessage, + }, + }, + }, + "_meta": metadata, + } + data, err := json.Marshal(payload) + if err != nil { + return nil, fmt.Errorf("session: marshal interrupted tool result: %w", err) + } + return data, nil +} + +func (m *Manager) notifyRepairEvent(ctx context.Context, sessionID string, event acp.AgentEvent) { + if m == nil || m.notifier == nil { + return + } + m.notifier.OnAgentEvent(ctx, sessionID, event) +} + +func repairACPSessionID(meta store.SessionMeta) string { + if meta.ACPSessionID == nil { + return "" + } + return strings.TrimSpace(*meta.ACPSessionID) +} diff --git a/internal/session/repair_test.go b/internal/session/repair_test.go new file mode 100644 index 000000000..7bf9b9847 --- /dev/null +++ b/internal/session/repair_test.go @@ -0,0 +1,376 @@ +package session + +import ( + "os" + "path/filepath" + "testing" + "time" + + "github.com/pedronauck/agh/internal/acp" + "github.com/pedronauck/agh/internal/store" + "github.com/pedronauck/agh/internal/store/sessiondb" + "github.com/pedronauck/agh/internal/testutil" + "github.com/pedronauck/agh/internal/transcript" +) + +func TestManagerRepairSession(t *testing.T) { + t.Parallel() + + t.Run("ShouldReportPlannedActionsWithoutPersistingWhenDryRun", func(t *testing.T) { + t.Parallel() + + h := newHarness(t) + meta := repairSessionMeta("sess-repair-dry", store.StopAgentCrashed, h.workspaceID) + seedRepairSession(t, h, meta, interruptedTurnEvents(t, meta.ID, meta.AgentName)...) + + result, err := h.manager.RepairSession(testutil.Context(t), RepairOpts{ + SessionID: meta.ID, + DryRun: true, + }) + if err != nil { + t.Fatalf("RepairSession(dry-run) error = %v", err) + } + if result.Persisted { + t.Fatal("RepairSession(dry-run).Persisted = true, want false") + } + if got, want := len(result.Actions), 2; got != want { + t.Fatalf("RepairSession(dry-run) actions = %d, want %d", got, want) + } + for _, action := range result.Actions { + if action.Persisted { + t.Fatalf("dry-run action %#v persisted, want planned only", action) + } + } + + events := readRepairEvents(t, h, meta.ID) + if got, want := len(events), 3; got != want { + t.Fatalf("stored events after dry-run = %d, want %d", got, want) + } + if containsEventType(events, acp.EventTypeError) { + t.Fatalf("stored events contain %q after dry-run", acp.EventTypeError) + } + }) + + t.Run("ShouldAppendInterruptedToolResultAndTerminalError", func(t *testing.T) { + t.Parallel() + + h := newHarness(t) + meta := repairSessionMeta("sess-repair-append", store.StopAgentCrashed, h.workspaceID) + seedRepairSession(t, h, meta, interruptedTurnEvents(t, meta.ID, meta.AgentName)...) + + result, err := h.manager.RepairSession(testutil.Context(t), RepairOpts{SessionID: meta.ID}) + if err != nil { + t.Fatalf("RepairSession() error = %v", err) + } + if !result.Persisted { + t.Fatal("RepairSession().Persisted = false, want true") + } + if got, want := len(result.Actions), 2; got != want { + t.Fatalf("RepairSession() actions = %d, want %d", got, want) + } + for _, action := range result.Actions { + if !action.Persisted || action.EventID == "" { + t.Fatalf("persisted action = %#v, want persisted event id", action) + } + } + + events := readRepairEvents(t, h, meta.ID) + if got, want := len(events), 5; got != want { + t.Fatalf("stored events after repair = %d, want %d", got, want) + } + if got, want := events[3].Type, acp.EventTypeToolResult; got != want { + t.Fatalf("events[3].Type = %q, want %q", got, want) + } + if got, want := events[4].Type, acp.EventTypeError; got != want { + t.Fatalf("events[4].Type = %q, want %q", got, want) + } + + toolResult, err := transcript.UnmarshalAgentEvent(events[3].Content) + if err != nil { + t.Fatalf("UnmarshalAgentEvent(tool result) error = %v", err) + } + if got, want := toolResult.ToolCallID, "tool-1"; got != want { + t.Fatalf("repair tool result ToolCallID = %q, want %q", got, want) + } + if toolResult.Error == "" { + t.Fatal("repair tool result Error = empty, want interruption detail") + } + + messages, err := h.manager.Transcript(testutil.Context(t), meta.ID) + if err != nil { + t.Fatalf("Transcript(repaired) error = %v", err) + } + assertTranscriptHasDonePart(t, messages) + }) + + t.Run("ShouldAppendInterruptedToolResultWithoutDuplicatingTerminalEvent", func(t *testing.T) { + t.Parallel() + + h := newHarness(t) + meta := repairSessionMeta("sess-repair-terminal-present", store.StopAgentCrashed, h.workspaceID) + events := append( + interruptedTurnEvents(t, meta.ID, meta.AgentName), + repairStoredEvent( + t, + meta.ID, + meta.AgentName, + acp.EventTypeError, + "turn-1", + time.Date(2026, 4, 28, 13, 0, 3, 0, time.UTC), + acp.AgentEvent{ + Type: acp.EventTypeError, + Error: repairTerminalErrorMessage, + }, + ), + ) + seedRepairSession(t, h, meta, events...) + + result, err := h.manager.RepairSession(testutil.Context(t), RepairOpts{SessionID: meta.ID}) + if err != nil { + t.Fatalf("RepairSession(terminal present) error = %v", err) + } + if !result.Persisted { + t.Fatal("RepairSession(terminal present).Persisted = false, want true") + } + if !repairIssuesContain(result.Issues, RepairIssueTerminalEventAlreadyExists) { + t.Fatalf( + "RepairSession(terminal present) issues = %#v, want %q", + result.Issues, + RepairIssueTerminalEventAlreadyExists, + ) + } + if got, want := len(result.Actions), 1; got != want { + t.Fatalf("RepairSession(terminal present) actions = %d, want %d", got, want) + } + if got, want := result.Actions[0].Code, RepairActionAppendInterruptedToolResult; got != want { + t.Fatalf("RepairSession(terminal present) action code = %q, want %q", got, want) + } + + storedEvents := readRepairEvents(t, h, meta.ID) + if got, want := len(storedEvents), 5; got != want { + t.Fatalf("stored events after repair with terminal = %d, want %d", got, want) + } + if got, want := storedEvents[4].Type, acp.EventTypeToolResult; got != want { + t.Fatalf("storedEvents[4].Type = %q, want %q", got, want) + } + errorCount := 0 + for _, event := range storedEvents { + if event.Type == acp.EventTypeError { + errorCount++ + } + } + if got, want := errorCount, 1; got != want { + t.Fatalf("stored error events = %d, want %d", got, want) + } + }) + + t.Run("ShouldReportInvalidEventJSONWithoutMutating", func(t *testing.T) { + t.Parallel() + + h := newHarness(t) + meta := repairSessionMeta("sess-repair-invalid-json", store.StopAgentCrashed, h.workspaceID) + seedRepairSession(t, h, meta, store.SessionEvent{ + TurnID: "turn-1", + Type: acp.EventTypeAgentMessage, + AgentName: meta.AgentName, + Content: "{", + Timestamp: time.Date(2026, 4, 28, 13, 0, 0, 0, time.UTC), + }) + + result, err := h.manager.RepairSession(testutil.Context(t), RepairOpts{SessionID: meta.ID}) + if err != nil { + t.Fatalf("RepairSession(invalid JSON) error = %v", err) + } + if result.Persisted { + t.Fatal("RepairSession(invalid JSON).Persisted = true, want false") + } + if !repairIssuesContain(result.Issues, RepairIssueInvalidEventJSON) { + t.Fatalf("RepairSession(invalid JSON) issues = %#v, want %q", result.Issues, RepairIssueInvalidEventJSON) + } + + events := readRepairEvents(t, h, meta.ID) + if got, want := len(events), 1; got != want { + t.Fatalf("stored events after invalid JSON repair = %d, want %d", got, want) + } + }) + + t.Run("ShouldRequireForceForStoppedNonCrashSession", func(t *testing.T) { + t.Parallel() + + h := newHarness(t) + meta := repairSessionMeta("sess-repair-force", store.StopCompleted, h.workspaceID) + seedRepairSession(t, h, meta, interruptedTurnEvents(t, meta.ID, meta.AgentName)...) + + blocked, err := h.manager.RepairSession(testutil.Context(t), RepairOpts{SessionID: meta.ID}) + if err != nil { + t.Fatalf("RepairSession(non-force) error = %v", err) + } + if blocked.Persisted { + t.Fatal("RepairSession(non-force).Persisted = true, want false") + } + if !repairIssuesContain(blocked.Issues, RepairIssueStopReasonRequiresForce) { + t.Fatalf("RepairSession(non-force) issues = %#v, want force issue", blocked.Issues) + } + + forced, err := h.manager.RepairSession(testutil.Context(t), RepairOpts{ + SessionID: meta.ID, + Force: true, + }) + if err != nil { + t.Fatalf("RepairSession(force) error = %v", err) + } + if !forced.Persisted { + t.Fatal("RepairSession(force).Persisted = false, want true") + } + if got, want := len(forced.Actions), 2; got != want { + t.Fatalf("RepairSession(force) actions = %d, want %d", got, want) + } + }) +} + +func repairSessionMeta(id string, reason store.StopReason, workspaceID string) store.SessionMeta { + now := time.Date(2026, 4, 28, 13, 0, 0, 0, time.UTC) + return store.SessionMeta{ + ID: id, + Name: id, + AgentName: "coder", + WorkspaceID: workspaceID, + State: string(StateStopped), + StopReason: &reason, + CreatedAt: now, + UpdatedAt: now, + } +} + +func interruptedTurnEvents(t *testing.T, sessionID string, agentName string) []store.SessionEvent { + t.Helper() + + base := time.Date(2026, 4, 28, 13, 0, 0, 0, time.UTC) + return []store.SessionEvent{ + repairStoredEvent(t, sessionID, agentName, acp.EventTypeUserMessage, "turn-1", base, acp.AgentEvent{ + Type: acp.EventTypeUserMessage, + Text: "run pwd", + }), + repairStoredEvent( + t, + sessionID, + agentName, + acp.EventTypeAgentMessage, + "turn-1", + base.Add(time.Second), + acp.AgentEvent{ + Type: acp.EventTypeAgentMessage, + Text: "Running command", + }, + ), + repairStoredEvent( + t, + sessionID, + agentName, + acp.EventTypeToolCall, + "turn-1", + base.Add(2*time.Second), + acp.AgentEvent{ + Type: acp.EventTypeToolCall, + Title: "Bash", + ToolCallID: "tool-1", + Raw: []byte(`{"rawInput":{"command":"pwd"},"_meta":{"claudeCode":{"toolName":"Bash"}}}`), + }, + ), + } +} + +func repairStoredEvent( + t *testing.T, + sessionID string, + agentName string, + eventType string, + turnID string, + timestamp time.Time, + event acp.AgentEvent, +) store.SessionEvent { + t.Helper() + + event.SessionID = sessionID + event.TurnID = turnID + event.Timestamp = timestamp + payload, err := transcript.MarshalAgentEvent(event) + if err != nil { + t.Fatalf("MarshalAgentEvent(%s) error = %v", eventType, err) + } + return store.SessionEvent{ + TurnID: turnID, + Type: eventType, + AgentName: agentName, + Content: payload, + Timestamp: timestamp, + } +} + +func seedRepairSession(t *testing.T, h *harness, meta store.SessionMeta, events ...store.SessionEvent) { + t.Helper() + + sessionDir := filepath.Join(h.homePaths.SessionsDir, meta.ID) + if err := os.MkdirAll(sessionDir, 0o755); err != nil { + t.Fatalf("MkdirAll(sessionDir) error = %v", err) + } + if err := store.WriteSessionMeta(store.SessionMetaFile(sessionDir), meta); err != nil { + t.Fatalf("WriteSessionMeta() error = %v", err) + } + + recorder, err := sessiondb.OpenSessionDB(testutil.Context(t), meta.ID, store.SessionDBFile(sessionDir)) + if err != nil { + t.Fatalf("OpenSessionDB() error = %v", err) + } + for _, event := range events { + if err := recorder.Record(testutil.Context(t), event); err != nil { + t.Fatalf("Record(%s) error = %v", event.Type, err) + } + } + if err := recorder.Close(testutil.Context(t)); err != nil { + t.Fatalf("Close(seed recorder) error = %v", err) + } +} + +func readRepairEvents(t *testing.T, h *harness, sessionID string) []store.SessionEvent { + t.Helper() + + dbPath := store.SessionDBFile(filepath.Join(h.homePaths.SessionsDir, sessionID)) + recorder, err := sessiondb.OpenSessionDB(testutil.Context(t), sessionID, dbPath) + if err != nil { + t.Fatalf("OpenSessionDB(read) error = %v", err) + } + defer func() { + if err := recorder.Close(testutil.Context(t)); err != nil { + t.Fatalf("Close(read recorder) error = %v", err) + } + }() + + events, err := recorder.Query(testutil.Context(t), store.EventQuery{}) + if err != nil { + t.Fatalf("Query(read) error = %v", err) + } + return events +} + +func repairIssuesContain(issues []RepairIssue, code string) bool { + for _, issue := range issues { + if issue.Code == code { + return true + } + } + return false +} + +func assertTranscriptHasDonePart(t *testing.T, messages []transcript.UIMessage) { + t.Helper() + + for _, message := range messages { + for _, part := range message.Parts { + if part.State == "done" { + return + } + } + } + t.Fatalf("Transcript() = %#v, want at least one done part after repair", messages) +} diff --git a/openapi/agh.json b/openapi/agh.json index 3a4663c69..e4890a1e5 100644 --- a/openapi/agh.json +++ b/openapi/agh.json @@ -22154,6 +22154,179 @@ "x-agh-transports": ["http", "uds"] } }, + "/api/sessions/{id}/repair": { + "post": { + "operationId": "repairSession", + "parameters": [ + { + "description": "Session id", + "in": "path", + "name": "id", + "required": true, + "schema": { + "type": "string" + } + }, + { + "description": "Report planned repairs without persisting new events", + "in": "query", + "name": "dry_run", + "schema": { + "type": "boolean" + } + }, + { + "description": "Allow repair for stopped sessions whose stop reason is not crash or error", + "in": "query", + "name": "force", + "schema": { + "type": "boolean" + } + } + ], + "responses": { + "200": { + "content": { + "application/json": { + "schema": { + "properties": { + "repair": { + "properties": { + "actions": { + "items": { + "properties": { + "code": { + "type": "string" + }, + "event_id": { + "type": "string" + }, + "persisted": { + "type": "boolean" + }, + "tool_call_id": { + "type": "string" + }, + "tool_name": { + "type": "string" + }, + "turn_id": { + "type": "string" + } + }, + "required": [ + "code", + "persisted", + "turn_id" + ], + "type": "object" + }, + "type": "array" + }, + "issues": { + "items": { + "properties": { + "code": { + "type": "string" + }, + "detail": { + "type": "string" + }, + "event_id": { + "type": "string" + }, + "severity": { + "type": "string" + }, + "turn_id": { + "type": "string" + } + }, + "required": ["code", "severity"], + "type": "object" + }, + "type": "array" + }, + "persisted": { + "type": "boolean" + }, + "session_id": { + "type": "string" + } + }, + "required": [ + "actions", + "issues", + "persisted", + "session_id" + ], + "type": "object" + } + }, + "required": ["repair"], + "type": "object" + } + } + }, + "description": "OK" + }, + "400": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Invalid repair options" + }, + "404": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Session not found" + }, + "500": { + "content": { + "application/json": { + "schema": { + "properties": { + "error": { + "type": "string" + } + }, + "required": ["error"], + "type": "object" + } + } + }, + "description": "Internal server error" + }, + "default": { + "description": "" + } + }, + "summary": "Inspect and repair an interrupted session transcript", + "tags": ["sessions"], + "x-agh-transports": ["http", "uds"] + } + }, "/api/sessions/{id}/resume": { "post": { "operationId": "resumeSession", diff --git a/packages/site/components/landing/__tests__/landing.test.tsx b/packages/site/components/landing/__tests__/landing.test.tsx index 98952973d..c68d555f2 100644 --- a/packages/site/components/landing/__tests__/landing.test.tsx +++ b/packages/site/components/landing/__tests__/landing.test.tsx @@ -37,7 +37,9 @@ import { NetworkSection } from "../network-section"; import { InstallSection } from "../install-section"; import { Comparison } from "../comparison"; import { FinalCta } from "../final-cta"; -import { KindChip, KIND_MEANING, type NetworkKind } from "../primitives/kind-chip"; +import { Pill } from "@agh/ui"; + +import { KIND_MEANING, type NetworkKind } from "../primitives/network-kinds"; describe("Hero", () => { it("leads with the runtime + network headline and drops ACP from the fold", () => { @@ -339,8 +341,8 @@ describe("FinalCta", () => { }); }); -describe("KindChip", () => { - it("has a meaning string for every NetworkKind", () => { +describe("Network kind pill", () => { + it("has a meaning string for every NetworkKind and renders inside Pill", () => { const kinds: NetworkKind[] = [ "greet", "whois", @@ -352,7 +354,11 @@ describe("KindChip", () => { ]; for (const kind of kinds) { expect(KIND_MEANING[kind]).toBeDefined(); - render(); + render( + + {kind} + + ); expect(screen.getAllByText(kind)).toBeDefined(); } }); diff --git a/packages/site/components/landing/bridges-section.tsx b/packages/site/components/landing/bridges-section.tsx index 471b61ca1..34b805a7f 100644 --- a/packages/site/components/landing/bridges-section.tsx +++ b/packages/site/components/landing/bridges-section.tsx @@ -1,5 +1,6 @@ import { ArrowRight } from "lucide-react"; import type { ReactNode } from "react"; +import { Pill } from "@agh/ui"; import { DiscordLogo, GithubLogo, @@ -10,7 +11,6 @@ import { TelegramLogo, WhatsAppLogo, } from "@agh/ui/logos"; -import { MonoBadge } from "./primitives/mono-badge"; import { SectionFrame } from "./primitives/section-frame"; import { SectionHeader } from "./primitives/section-header"; @@ -84,9 +84,13 @@ export function BridgesSection() {
{bridge.logo}
{bridge.status === "live" ? ( - live + + live + ) : ( - next + + next + )}

{bridge.name}

@@ -104,7 +108,9 @@ export function BridgesSection() {

How a bridge delivers a session

- inside the daemon + + inside the daemon +
diff --git a/packages/site/components/landing/network-protocol-visual.tsx b/packages/site/components/landing/network-protocol-visual.tsx index 57665e0cc..7f9054789 100644 --- a/packages/site/components/landing/network-protocol-visual.tsx +++ b/packages/site/components/landing/network-protocol-visual.tsx @@ -2,10 +2,10 @@ import { useEffect, useReducer, useRef } from "react"; import { ArrowLeftRight, Pause, Play } from "lucide-react"; -import { Button } from "@agh/ui"; +import { Button, Pill } from "@agh/ui"; import { cn } from "@agh/ui/utils"; import { AnimatedDiagram } from "./primitives/animated-diagram"; -import { KindChip, type NetworkKind } from "./primitives/kind-chip"; +import { KIND_MEANING, type NetworkKind } from "./primitives/network-kinds"; type Lane = "A" | "NET" | "B"; type Direction = "->" | "<-" | ".."; @@ -232,7 +232,15 @@ function Inner({ active, reducedMotion }: { active: boolean; reducedMotion: bool
- + + {step.kind} + ; - -interface KindChipProps { - kind: NetworkKind; - className?: string; - /** Force a visual "active" / highlighted state. */ - active?: boolean; -} - -export function KindChip({ kind, className, active = false }: KindChipProps) { - return ( - - {kind} - - ); -} diff --git a/packages/site/components/landing/primitives/mono-badge.tsx b/packages/site/components/landing/primitives/mono-badge.tsx deleted file mode 100644 index d17a3f4c9..000000000 --- a/packages/site/components/landing/primitives/mono-badge.tsx +++ /dev/null @@ -1,31 +0,0 @@ -import type { ReactNode } from "react"; -import { cn } from "@agh/ui/utils"; - -type Tone = "accent" | "neutral" | "success"; - -const TONE_CLASS: Record = { - accent: "bg-(--color-accent-tint) text-(--color-accent)", - neutral: "bg-(--color-surface-elevated) text-(--color-text-tertiary)", - success: "bg-(--color-success-tint) text-(--color-success)", -}; - -interface MonoBadgeProps { - children: ReactNode; - tone?: Tone; - className?: string; -} - -/** 11px uppercase mono chip — the ubiquitous eyebrow/label on the landing page. */ -export function MonoBadge({ children, tone = "accent", className }: MonoBadgeProps) { - return ( - - {children} - - ); -} diff --git a/packages/site/components/landing/primitives/network-kinds.ts b/packages/site/components/landing/primitives/network-kinds.ts new file mode 100644 index 000000000..e0256c837 --- /dev/null +++ b/packages/site/components/landing/primitives/network-kinds.ts @@ -0,0 +1,16 @@ +/** + * Wire-protocol kinds rendered on the landing diagrams. Kept as data-only so + * the chrome (a `Pill mono` from `@agh/ui`) can be composed inline by callers. + */ +export type NetworkKind = "greet" | "whois" | "say" | "direct" | "capability" | "receipt" | "trace"; + +/** One-line purpose for every kind — tooltip copy, alt text, and copy audit source. */ +export const KIND_MEANING = { + greet: "Announce presence + capabilities to a channel", + whois: "Ask the network which peers match a capability", + say: "Free-form operator chat to a channel", + direct: "Send a structured task to a named peer", + capability: "Transfer a full capability artifact to a peer", + receipt: "Confirm completion with status and trace IDs", + trace: "Stream progress updates during a task", +} as const satisfies Record; diff --git a/packages/site/content/runtime/cli-reference/session/index.mdx b/packages/site/content/runtime/cli-reference/session/index.mdx index b9b82de71..2a388b32a 100644 --- a/packages/site/content/runtime/cli-reference/session/index.mdx +++ b/packages/site/content/runtime/cli-reference/session/index.mdx @@ -30,14 +30,15 @@ Every AGH command supports `-o, --output`: ## Subcommands -| Command | Description | -| ------------------------------------------------------------- | ------------------------------------ | -| [agh session events](/runtime/cli-reference/session/events) | Read session events | -| [agh session history](/runtime/cli-reference/session/history) | Show session history grouped by turn | -| [agh session list](/runtime/cli-reference/session/list) | List sessions | -| [agh session new](/runtime/cli-reference/session/new) | Create a new session | -| [agh session prompt](/runtime/cli-reference/session/prompt) | Send a prompt to a session | -| [agh session resume](/runtime/cli-reference/session/resume) | Resume a stopped session | -| [agh session status](/runtime/cli-reference/session/status) | Show session status | -| [agh session stop](/runtime/cli-reference/session/stop) | Stop a session | -| [agh session wait](/runtime/cli-reference/session/wait) | Block until a session stops | +| Command | Description | +| ------------------------------------------------------------- | ---------------------------------------------------- | +| [agh session events](/runtime/cli-reference/session/events) | Read session events | +| [agh session history](/runtime/cli-reference/session/history) | Show session history grouped by turn | +| [agh session list](/runtime/cli-reference/session/list) | List sessions | +| [agh session new](/runtime/cli-reference/session/new) | Create a new session | +| [agh session prompt](/runtime/cli-reference/session/prompt) | Send a prompt to a session | +| [agh session repair](/runtime/cli-reference/session/repair) | Inspect and repair an interrupted session transcript | +| [agh session resume](/runtime/cli-reference/session/resume) | Resume a stopped session | +| [agh session status](/runtime/cli-reference/session/status) | Show session status | +| [agh session stop](/runtime/cli-reference/session/stop) | Stop a session | +| [agh session wait](/runtime/cli-reference/session/wait) | Block until a session stops | diff --git a/packages/site/content/runtime/cli-reference/session/meta.json b/packages/site/content/runtime/cli-reference/session/meta.json index b39b466fd..863962d37 100644 --- a/packages/site/content/runtime/cli-reference/session/meta.json +++ b/packages/site/content/runtime/cli-reference/session/meta.json @@ -7,6 +7,7 @@ "list", "new", "prompt", + "repair", "resume", "status", "stop", diff --git a/packages/site/content/runtime/cli-reference/session/repair.mdx b/packages/site/content/runtime/cli-reference/session/repair.mdx new file mode 100644 index 000000000..fb3724e96 --- /dev/null +++ b/packages/site/content/runtime/cli-reference/session/repair.mdx @@ -0,0 +1,51 @@ +--- +title: "agh session repair" +description: "Inspect and repair an interrupted session transcript" +--- + +## agh session repair + +Inspect and repair an interrupted session transcript + +``` +agh session repair [flags] +``` + +### Examples + +``` + # Report the repair actions without writing new events + agh session repair sess_1234 --dry-run + + # Force repair for a stopped session whose stop reason is not crash or error + agh session repair sess_1234 --force +``` + +### Options + +``` + --dry-run Report planned repairs without persisting events + --force Allow repair for stopped non-crash sessions + -h, --help help for repair +``` + +### Options inherited from parent commands + +``` + -o, --output string Output format: human, json, jsonl, or toon (default "human") +``` + +## Output Formats + +Every AGH command supports `-o, --output`: + +- `human` for interactive terminal use +- `json` for scripts and other machine-readable consumers +- `jsonl` for wait or streaming commands that emit one JSON record per line +- `toon` for compact agent-readable summaries + +Example: + +```bash +agh session repair -o json +``` diff --git a/packages/site/content/runtime/core/operations/troubleshooting.mdx b/packages/site/content/runtime/core/operations/troubleshooting.mdx index ac15f7206..86a7e5c91 100644 --- a/packages/site/content/runtime/core/operations/troubleshooting.mdx +++ b/packages/site/content/runtime/core/operations/troubleshooting.mdx @@ -141,6 +141,7 @@ Commands: agh daemon start agh session list --all agh session status +agh session repair --dry-run ``` The repair rules are: @@ -151,6 +152,15 @@ The repair rules are: | `stopping` | `stopped` with stop reason `agent_crashed` | | `starting` | `stopped` with stop reason `error` | +For sessions already stopped with `agent_crashed` or `error`, boot also repairs interrupted +transcripts by appending terminal repair events. If a transcript or chat replay still shows a +dangling tool call or streaming assistant message after restart, run: + +```bash +agh session repair --dry-run +agh session repair +``` + If resume still fails, check that the workspace directory, agent definition, and `$AGH_HOME/sessions//events.db` still exist. diff --git a/packages/site/content/runtime/core/sessions/lifecycle.mdx b/packages/site/content/runtime/core/sessions/lifecycle.mdx index a75b2dd43..dda256519 100644 --- a/packages/site/content/runtime/core/sessions/lifecycle.mdx +++ b/packages/site/content/runtime/core/sessions/lifecycle.mdx @@ -231,7 +231,18 @@ That repair is what makes status, list, and resume safe after a crash: - stale `stopping` becomes `stopped` with `agent_crashed`, `process_exit`, and detail `stop did not complete` - stale `starting` becomes `stopped` with `error`, `startup_failure`, and detail `start did not complete` +During daemon boot, AGH also inspects stopped sessions whose stop reason is `agent_crashed` or +`error`. If the final persisted turn was interrupted, AGH appends repair events to terminalize the +transcript: dangling tool calls receive interrupted tool results, then the turn receives a terminal +error event. The repair is append-only; AGH does not truncate, delete, or resequence session events. + This repair happens before resume validation. The repaired session keeps the same AGH session ID. +Operators and agents can inspect or run the same transcript repair explicitly: + +```bash +agh session repair --dry-run +agh session repair +``` ## What gets persisted diff --git a/packages/site/content/runtime/core/sessions/resume.mdx b/packages/site/content/runtime/core/sessions/resume.mdx index 2420c959a..a628113d8 100644 --- a/packages/site/content/runtime/core/sessions/resume.mdx +++ b/packages/site/content/runtime/core/sessions/resume.mdx @@ -67,6 +67,12 @@ For an inactive session, AGH follows this sequence: 5. Attempt ACP `session/load`. 6. On success, transition the AGH session back to `active`. +Daemon boot also performs append-only transcript repair for sessions stopped with `agent_crashed` or +`error`. That pass closes interrupted final turns before replay, so resumed sessions do not inherit +permanently streaming assistant messages or dangling tool calls. Use +`agh session repair --dry-run` when you want to inspect the planned repair before +writing events. + ### Native ACP resume If the agent supports `session/load`, AGH asks the upstream ACP runtime to restore its own native diff --git a/packages/ui/.storybook/preview.css b/packages/ui/.storybook/preview.css index 4cb5c8bd7..e72841f9e 100644 --- a/packages/ui/.storybook/preview.css +++ b/packages/ui/.storybook/preview.css @@ -1,3 +1,5 @@ @import "tailwindcss"; +@import "@agh/ui/tokens.css"; +@import "shadcn/tailwind.css"; @source "../src/**/*.{ts,tsx}"; diff --git a/packages/ui/.storybook/preview.ts b/packages/ui/.storybook/preview.ts index 1d598c756..a44e0b4a4 100644 --- a/packages/ui/.storybook/preview.ts +++ b/packages/ui/.storybook/preview.ts @@ -2,7 +2,6 @@ import type { Preview } from "@storybook/react-vite"; import { withThemeByClassName } from "@storybook/addon-themes"; import "./preview.css"; -import "@agh/ui/tokens.css"; export const themeDecorator = withThemeByClassName({ themes: { diff --git a/packages/ui/README.md b/packages/ui/README.md index 83e4a2836..75b500ccd 100644 --- a/packages/ui/README.md +++ b/packages/ui/README.md @@ -26,7 +26,7 @@ package live in the maintained docs below. Ask one question: **is this a domain-free shape that could serve a second surface without code changes?** -If yes — keep it here. Pull tokens from [`./src/tokens.css`](./src/tokens.css), wire the API in terms of slots (`rail`, `list`, `detail`, `leading`, `trailing`, …) and variants (`tone`, `size`, `density`), and hold no AGH-specific defaults in its props. Examples: `Sidebar`, `SplitPane`, `Metric`, `StatusDot`, `ConnectionIndicator`, `ChatMessageBubble` (the **shell**, not the session-aware message body). +If yes — keep it here. Pull tokens from [`./src/tokens.css`](./src/tokens.css), wire the API in terms of slots (`rail`, `list`, `detail`, `leading`, `trailing`, …) and variants (`tone`, `size`, `density`), and hold no AGH-specific defaults in its props. Examples: `Sidebar`, `SplitPane`, `Metric`, `Pill`, `ChatMessageBubble` (the **shell**, not the session-aware message body). If it reads session events, hits a TanStack query, consumes the `agh-openapi` types, or only makes sense inside one domain — keep it in `web/src/systems//components/`. The `@agh/ui` shell stays ignorant of that domain; the domain component composes the shell. This matches the package boundary documented above — `@agh/ui` does not import from `web/src/**`. @@ -99,23 +99,18 @@ Controls, selection, and the input scaffolding primitives. Status, alerting, progress, and density-sensitive signal primitives. -| Export | Story | Notes | -| -------------------------------------------------------------------------------------------------------------------------------- | ----------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | -| `Alert` · `AlertTitle` · `AlertDescription` · `AlertAction` · `alertVariants` · `AlertProps` | [`alert.stories.tsx`](./src/components/stories/alert.stories.tsx) | Inline alerts (default/destructive/warning/success/info/accent). | -| `Progress` · `ProgressTrack` · `ProgressIndicator` · `ProgressLabel` · `ProgressValue` | [`progress.stories.tsx`](./src/components/stories/progress.stories.tsx) | Linear progress. | -| `Badge` · `badgeVariants` | [`badge.stories.tsx`](./src/components/stories/badge.stories.tsx) | Tinted badges — use `MonoBadge` for status, `KindChip` for kind labels. | -| `Skeleton` | [`skeleton.stories.tsx`](./src/components/stories/skeleton.stories.tsx) | Shimmer placeholder. | -| `Spinner` | [`spinner.stories.tsx`](./src/components/stories/spinner.stories.tsx) | Spinner atom. | -| `Toaster` · `toast` · `ToasterProps` | [`sonner.stories.tsx`](./src/components/stories/sonner.stories.tsx) | `sonner` re-export. Mount `` once at the app root. Default `theme="system"`. | -| `StatusDot` · `StatusDotProps` · `StatusDotTone` · `StatusDotSize` | [`status-dot.stories.tsx`](./src/components/stories/status-dot.stories.tsx) | Live-status dot. Tone vocabulary: `success \| warning \| danger \| info \| accent \| neutral`. | -| `MonoBadge` · `monoBadgeVariants` · `MonoBadgeProps` · `MonoBadgeTone` | [`mono-badge.stories.tsx`](./src/components/stories/mono-badge.stories.tsx) | 11px mono status badge (`RUNNING`, `DONE`, `ERROR`, …). `tone="solid-accent"` is reserved for unread pills. | -| `MonoChip` · `MonoChipProps` | [`mono-chip.stories.tsx`](./src/components/stories/mono-chip.stories.tsx) | Neutral inline chip — capability descriptors, tag rows. For tinted semantic variants use `MonoBadge`. | -| `KindChip` · `KindChipProps` · `KIND_DOT_COLORS` | [`kind-chip.stories.tsx`](./src/components/stories/kind-chip.stories.tsx) | Wire-dot kind marker (`say`, `greet`, `direct`, `receipt`, `recipe`, `trace`, `whois`). Unknown kinds render without a dot. `KIND_DOT_COLORS` is the canonical kind→color map. | -| `WireChip` · `WireChipProps` | [`wire-chip.stories.tsx`](./src/components/stories/wire-chip.stories.tsx) | Free-floating filter chip with optional leading wire-dot. For a contained segmented toggle use `Pills` instead. | -| `ConnectionIndicator` · `ConnectionIndicatorProps` · `ConnectionStatus` | [`connection-indicator.stories.tsx`](./src/components/stories/connection-indicator.stories.tsx) | Live-connection composite (`StatusDot` + label). Default labels `Connected` / `Disconnected` / `Reconnecting`. | -| `Metric` · `MetricProps` · `MetricTone` | [`metric.stories.tsx`](./src/components/stories/metric.stories.tsx) | Dashboard metric with `detail` (inline mono unit) + `subtext` (secondary line) slots. | -| `Pill` · `Pills` · `pillVariants` · `pillToggleVariants` · `PillProps` · `PillsProps` · `PillsItem` · `PillVariant` · `PillSize` | [`pills.stories.tsx`](./src/components/stories/pills.stories.tsx) | `Pill` standalone + `Pills` tablist (`role="tab"`, `aria-selected`). | -| `Avatar` · `AvatarBadge` · `AvatarFallback` · `AvatarGroup` · `AvatarGroupCount` · `AvatarImage` | [`avatar.stories.tsx`](./src/components/stories/avatar.stories.tsx) | Identity avatar with grouping. | +| Export | Story | Notes | +| ------------------------------------------------------------------------------------------------ | --------------------------------------------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| `Alert` · `AlertTitle` · `AlertDescription` · `AlertAction` · `alertVariants` · `AlertProps` | [`alert.stories.tsx`](./src/components/stories/alert.stories.tsx) | Inline alerts (default/destructive/warning/success/info/accent). | +| `Progress` · `ProgressTrack` · `ProgressIndicator` · `ProgressLabel` · `ProgressValue` | [`progress.stories.tsx`](./src/components/stories/progress.stories.tsx) | Linear progress. | +| `Badge` · `badgeVariants` | [`badge.stories.tsx`](./src/components/stories/badge.stories.tsx) | shadcn baseline — kept untouched so we can pull future shadcn updates. Use `Pill` for the AGH design-system pill family. | +| `Skeleton` | [`skeleton.stories.tsx`](./src/components/stories/skeleton.stories.tsx) | Shimmer placeholder. | +| `Spinner` | [`spinner.stories.tsx`](./src/components/stories/spinner.stories.tsx) | Spinner atom. | +| `Toaster` · `toast` · `ToasterProps` | [`sonner.stories.tsx`](./src/components/stories/sonner.stories.tsx) | `sonner` re-export. Mount `` once at the app root. Default `theme="system"`. | +| `Pill` · `PillDot` · `pillVariants` · `PillProps` · `PillDotProps` · `PillTone` · `PillSize` | [`pill.stories.tsx`](./src/components/stories/pill.stories.tsx) | Unified semantic pill — `tone` × `size` × `mono` × `solid` + composable `Pill.Dot`. Replaces the legacy `MonoBadge`, `MonoChip`, `KindChip`, `StatusDot`, `WireChip`, `ConnectionIndicator`. Compose `label` for kind chips; `` standalone replaces `StatusDot`. | +| `PillGroup` · `pillGroupSegmentVariants` · `PillGroupProps` · `PillGroupItem` · `PillGroupSize` | [`pill-group.stories.tsx`](./src/components/stories/pill-group.stories.tsx) | Segmented toggle track — `items` + controlled `value`/`onChange`. Renamed from the legacy `Pills`. | +| `Metric` · `MetricProps` · `MetricTone` | [`metric.stories.tsx`](./src/components/stories/metric.stories.tsx) | Dashboard metric with `detail` (inline mono unit) + `subtext` (secondary line) slots. | +| `Avatar` · `AvatarBadge` · `AvatarFallback` · `AvatarGroup` · `AvatarGroupCount` · `AvatarImage` | [`avatar.stories.tsx`](./src/components/stories/avatar.stories.tsx) | Identity avatar with grouping. | ### Chat diff --git a/packages/ui/src/components/connection-indicator.test.tsx b/packages/ui/src/components/connection-indicator.test.tsx deleted file mode 100644 index 8bcf47868..000000000 --- a/packages/ui/src/components/connection-indicator.test.tsx +++ /dev/null @@ -1,91 +0,0 @@ -import { render } from "@testing-library/react"; -import { MotionConfig } from "motion/react"; -import type { ReactNode } from "react"; -import { describe, expect, it } from "vitest"; - -import { ConnectionIndicator, type ConnectionStatus } from "./connection-indicator"; - -function WithMotion({ - reducedMotion, - children, -}: { - reducedMotion: "always" | "never"; - children: ReactNode; -}) { - return {children}; -} - -describe("ConnectionIndicator", () => { - it.each<{ - status: ConnectionStatus; - tone: string; - label: string; - }>([ - { status: "connected", tone: "success", label: "Connected" }, - { status: "disconnected", tone: "danger", label: "Disconnected" }, - { status: "reconnecting", tone: "warning", label: "Reconnecting" }, - ])( - "Should compose a StatusDot with the correct tone + default label for $status", - ({ status, tone, label }) => { - const { container } = render(); - const root = container.querySelector('[data-slot="connection-indicator"]'); - expect(root?.getAttribute("data-status")).toBe(status); - expect(root?.getAttribute("role")).toBe("status"); - expect(root?.getAttribute("aria-live")).toBe("polite"); - const dot = container.querySelector('[data-slot="status-dot"]'); - expect(dot?.getAttribute("data-tone")).toBe(tone); - const labelNode = container.querySelector( - '[data-slot="connection-indicator-label"]' - ); - expect(labelNode?.textContent).toBe(label); - } - ); - - it("Should pulse the dot while reconnecting", () => { - const { container } = render( - - - - ); - const dot = container.querySelector('[data-slot="status-dot"]'); - expect(dot?.className).toContain("animate-pulse"); - }); - - it("Should not pulse the dot while connected or disconnected", () => { - const { container: connected } = render( - - - - ); - expect( - connected.querySelector('[data-slot="status-dot"]')?.className - ).not.toContain("animate-pulse"); - const { container: disconnected } = render( - - - - ); - expect( - disconnected.querySelector('[data-slot="status-dot"]')?.className - ).not.toContain("animate-pulse"); - }); - - it("Should suppress the pulse when prefers-reduced-motion is reduce", () => { - const { container } = render( - - - - ); - expect( - container.querySelector('[data-slot="status-dot"]')?.className - ).not.toContain("animate-pulse"); - }); - - it("Should allow overriding the default label", () => { - const { container } = render(); - const labelNode = container.querySelector( - '[data-slot="connection-indicator-label"]' - ); - expect(labelNode?.textContent).toBe("Live"); - }); -}); diff --git a/packages/ui/src/components/input-group.tsx b/packages/ui/src/components/input-group.tsx index 5857ad8dc..ff4da8ee2 100644 --- a/packages/ui/src/components/input-group.tsx +++ b/packages/ui/src/components/input-group.tsx @@ -12,7 +12,7 @@ function InputGroup({ className, ...props }: React.ComponentProps<"div">) { data-slot="input-group" role="group" className={cn( - "group/input-group relative flex h-8 w-full min-w-0 items-center rounded-lg border border-input transition-colors outline-none in-data-[slot=combobox-content]:focus-within:border-inherit in-data-[slot=combobox-content]:focus-within:ring-0 has-disabled:bg-input/50 has-disabled:opacity-50 has-[[data-slot=input-group-control]:focus-visible]:border-ring has-[[data-slot=input-group-control]:focus-visible]:ring-3 has-[[data-slot=input-group-control]:focus-visible]:ring-ring/50 has-[[data-slot][aria-invalid=true]]:border-destructive has-[[data-slot][aria-invalid=true]]:ring-3 has-[[data-slot][aria-invalid=true]]:ring-destructive/20 has-[>[data-align=block-end]]:h-auto has-[>[data-align=block-end]]:flex-col has-[>[data-align=block-start]]:h-auto has-[>[data-align=block-start]]:flex-col has-[>textarea]:h-auto dark:bg-input/30 dark:has-disabled:bg-input/80 dark:has-[[data-slot][aria-invalid=true]]:ring-destructive/40 has-[>[data-align=block-end]]:[&>input]:pt-3 has-[>[data-align=block-start]]:[&>input]:pb-3 has-[>[data-align=inline-end]]:[&>input]:pr-1.5 has-[>[data-align=inline-start]]:[&>input]:pl-1.5", + "group/input-group relative flex h-8 w-full min-w-0 items-center rounded-lg border border-input bg-[color:var(--color-surface-panel)] transition-colors outline-none in-data-[slot=combobox-content]:focus-within:border-inherit in-data-[slot=combobox-content]:focus-within:ring-0 has-disabled:bg-input/50 has-disabled:opacity-50 has-[[data-slot=input-group-control]:focus-visible]:border-[color:var(--color-text-tertiary)] has-[[data-slot][aria-invalid=true]]:border-destructive has-[[data-slot][aria-invalid=true]]:ring-3 has-[[data-slot][aria-invalid=true]]:ring-destructive/20 has-[>[data-align=block-end]]:h-auto has-[>[data-align=block-end]]:flex-col has-[>[data-align=block-start]]:h-auto has-[>[data-align=block-start]]:flex-col has-[>textarea]:h-auto dark:has-disabled:bg-input/80 dark:has-[[data-slot][aria-invalid=true]]:ring-destructive/40 has-[>[data-align=block-end]]:[&>input]:pt-3 has-[>[data-align=block-start]]:[&>input]:pb-3 has-[>[data-align=inline-end]]:[&>input]:pr-1.5 has-[>[data-align=inline-start]]:[&>input]:pl-1.5", className )} {...props} diff --git a/packages/ui/src/components/input.tsx b/packages/ui/src/components/input.tsx index bba61c23e..95b965d7c 100644 --- a/packages/ui/src/components/input.tsx +++ b/packages/ui/src/components/input.tsx @@ -9,7 +9,7 @@ function Input({ className, type, ...props }: React.ComponentProps<"input">) { type={type} data-slot="input" className={cn( - "h-9 w-full min-w-0 rounded-lg border border-input bg-[color:var(--color-surface-elevated)] px-3 py-0 text-sm text-[color:var(--color-text-primary)] transition-colors outline-none selection:bg-[color:var(--color-accent-tint-strong)] selection:text-[color:var(--color-text-primary)] file:inline-flex file:h-6 file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-[color:var(--color-text-tertiary)] focus-visible:border-ring focus-visible:ring-3 focus-visible:ring-ring/20 disabled:pointer-events-none disabled:cursor-not-allowed disabled:border-[color:var(--color-surface-elevated)] disabled:bg-[color:var(--color-surface)] disabled:text-[color:var(--color-disabled)] disabled:opacity-100 aria-invalid:border-destructive aria-invalid:ring-3 aria-invalid:ring-destructive/20", + "h-8 w-full min-w-0 rounded-lg border border-input bg-[color:var(--color-surface-panel)] px-3 py-0 text-sm text-[color:var(--color-text-primary)] transition-colors outline-none selection:bg-[color:var(--color-accent-tint-strong)] selection:text-[color:var(--color-text-primary)] file:inline-flex file:h-6 file:border-0 file:bg-transparent file:text-sm file:font-medium file:text-foreground placeholder:text-[color:var(--color-text-tertiary)] focus-visible:border-[color:var(--color-text-tertiary)] disabled:pointer-events-none disabled:cursor-not-allowed disabled:border-[color:var(--color-surface-panel)] disabled:bg-[color:var(--color-surface)] disabled:text-[color:var(--color-disabled)] disabled:opacity-100 aria-invalid:border-destructive aria-invalid:ring-3 aria-invalid:ring-destructive/20", className )} {...props} diff --git a/packages/ui/src/components/kind-chip.test.tsx b/packages/ui/src/components/kind-chip.test.tsx deleted file mode 100644 index bfe6b6cb2..000000000 --- a/packages/ui/src/components/kind-chip.test.tsx +++ /dev/null @@ -1,53 +0,0 @@ -import { render } from "@testing-library/react"; -import { describe, expect, it } from "vitest"; - -import { KindChip, KIND_DOT_COLORS } from "./kind-chip"; - -describe("KindChip", () => { - it("Should render the kind label uppercase with the wire-dot chrome", () => { - const { container } = render(); - const chip = container.querySelector('[data-slot="kind-chip"]'); - expect(chip).not.toBeNull(); - expect(chip?.textContent).toBe("greet"); - expect(chip?.className).toContain("font-mono"); - expect(chip?.className).toContain("uppercase"); - expect(chip?.className).toContain("border-[color:var(--color-divider)]"); - expect(chip?.className).toContain("bg-transparent"); - expect(chip?.className).toContain("text-[color:var(--color-text-tertiary)]"); - expect(chip?.getAttribute("data-kind")).toBe("greet"); - }); - - it("Should render a colored 7px dot for known protocol kinds", () => { - const { container } = render(); - const dot = container.querySelector('[data-slot="kind-chip-dot"]'); - expect(dot).not.toBeNull(); - expect(dot).toHaveStyle({ background: KIND_DOT_COLORS.receipt }); - }); - - it("Should omit the dot for unknown kinds (platforms, event ids)", () => { - const { container } = render(); - expect(container.querySelector('[data-slot="kind-chip-dot"]')).toBeNull(); - }); - - it("Should display the explicit label when provided", () => { - const { container } = render(); - const chip = container.querySelector('[data-slot="kind-chip"]'); - expect(chip?.textContent).toBe("presence"); - }); - - it("Should forward className alongside the defaults", () => { - const { container } = render(); - const chip = container.querySelector('[data-slot="kind-chip"]'); - expect(chip?.className).toContain("custom-class"); - expect(chip?.className).toContain("border-[color:var(--color-divider)]"); - }); - - it("Should preserve internal data markers when conflicting attributes are passed", () => { - const { container } = render( - - ); - const chip = container.querySelector('[data-slot="kind-chip"]'); - expect(chip).not.toBeNull(); - expect(chip?.getAttribute("data-kind")).toBe("whois"); - }); -}); diff --git a/packages/ui/src/components/kind-chip.tsx b/packages/ui/src/components/kind-chip.tsx deleted file mode 100644 index feb2db259..000000000 --- a/packages/ui/src/components/kind-chip.tsx +++ /dev/null @@ -1,56 +0,0 @@ -"use client"; - -import * as React from "react"; - -import { cn } from "../lib/utils"; - -export interface KindChipProps extends Omit, "children"> { - kind: string; - /** Optional explicit label; defaults to `kind`. */ - label?: React.ReactNode; -} - -/** - * Protocol kind marker — mirrors `.intent-badge` + `.wire-dot` in - * `docs/design/web-inspiration/styles/app.css`. Transparent surface, neutral - * border + tertiary label, leading 7px colored dot keyed off the protocol - * kind. Unknown kinds (platform names, event ids) render without a dot. - */ -const KIND_DOT_COLORS: Record = { - say: "#8E8E93", - greet: "#5BA6FF", - direct: "var(--color-accent)", - receipt: "var(--color-success)", - recipe: "var(--color-warning)", - trace: "#B892FF", - whois: "#4FD1C5", -}; - -function KindChip({ kind, label, className, ...props }: KindChipProps) { - const dotColor = KIND_DOT_COLORS[kind.toLowerCase()]; - - return ( - - {dotColor ? ( - - ); -} - -export { KindChip, KIND_DOT_COLORS }; diff --git a/packages/ui/src/components/mono-badge.test.tsx b/packages/ui/src/components/mono-badge.test.tsx deleted file mode 100644 index a61cc88f3..000000000 --- a/packages/ui/src/components/mono-badge.test.tsx +++ /dev/null @@ -1,79 +0,0 @@ -import { render } from "@testing-library/react"; -import { describe, expect, it } from "vitest"; - -import { MonoBadge, type MonoBadgeTone } from "./mono-badge"; - -describe("MonoBadge", () => { - it("Should render children uppercase in mono with the default outline tone", () => { - const { container } = render(agent-42); - const badge = container.querySelector('[data-slot="mono-badge"]'); - expect(badge).not.toBeNull(); - expect(badge?.textContent).toBe("agent-42"); - expect(badge?.className).toContain("font-mono"); - expect(badge?.className).toContain("uppercase"); - expect(badge?.className).toContain("rounded-[var(--radius-mono-badge)]"); - expect(badge?.className).toContain("border-[color:var(--color-divider)]"); - expect(badge?.getAttribute("data-tone")).toBe("default"); - }); - - it("Should respect uppercase={false} and keep the provided casing", () => { - const { container } = render(Agent); - const badge = container.querySelector('[data-slot="mono-badge"]'); - expect(badge?.className).not.toContain("uppercase"); - }); - - it.each<{ tone: MonoBadgeTone; background: string; text: string }>([ - { - tone: "accent", - background: "bg-[color:var(--color-accent-tint)]", - text: "text-[color:var(--color-accent)]", - }, - { - tone: "success", - background: "bg-[color:var(--color-success-tint)]", - text: "text-[color:var(--color-success)]", - }, - { - tone: "warning", - background: "bg-[color:var(--color-warning-tint)]", - text: "text-[color:var(--color-warning)]", - }, - { - tone: "danger", - background: "bg-[color:var(--color-danger-tint)]", - text: "text-[color:var(--color-danger)]", - }, - { - tone: "info", - background: "bg-[color:var(--color-info-tint)]", - text: "text-[color:var(--color-info)]", - }, - { - tone: "neutral", - background: "bg-[color:var(--color-neutral-tint)]", - text: "text-[color:var(--color-text-label)]", - }, - { - tone: "solid-accent", - background: "bg-[color:var(--color-accent)]", - text: "text-[color:var(--color-accent-ink)]", - }, - ])("Should apply the $tone tint tokens", ({ tone, background, text }) => { - const { container } = render(token); - const badge = container.querySelector('[data-slot="mono-badge"]'); - expect(badge?.getAttribute("data-tone")).toBe(tone); - expect(badge?.className).toContain(background); - expect(badge?.className).toContain(text); - }); - - it("Should preserve the requested slot while keeping the component tone marker stable", () => { - const { container } = render( - - token - - ); - const badge = container.querySelector('[data-slot="override-slot"]'); - expect(badge).not.toBeNull(); - expect(badge?.getAttribute("data-tone")).toBe("accent"); - }); -}); diff --git a/packages/ui/src/components/mono-badge.tsx b/packages/ui/src/components/mono-badge.tsx deleted file mode 100644 index d7b9be7b5..000000000 --- a/packages/ui/src/components/mono-badge.tsx +++ /dev/null @@ -1,63 +0,0 @@ -"use client"; - -import { cva, type VariantProps } from "class-variance-authority"; -import * as React from "react"; - -import { cn } from "../lib/utils"; - -const monoBadgeVariants = cva( - [ - "inline-flex items-center rounded-[var(--radius-mono-badge)] px-1.5 py-0.5", - "font-mono text-[11px] font-medium leading-[14px] tracking-[0.06em] whitespace-nowrap", - ].join(" "), - { - variants: { - tone: { - default: - "border border-[color:var(--color-divider)] bg-transparent text-[color:var(--color-text-label)]", - neutral: "bg-[color:var(--color-neutral-tint)] text-[color:var(--color-text-label)]", - accent: "bg-[color:var(--color-accent-tint)] text-[color:var(--color-accent)]", - "solid-accent": "bg-[color:var(--color-accent)] text-[color:var(--color-accent-ink)]", - success: "bg-[color:var(--color-success-tint)] text-[color:var(--color-success)]", - warning: "bg-[color:var(--color-warning-tint)] text-[color:var(--color-warning)]", - danger: "bg-[color:var(--color-danger-tint)] text-[color:var(--color-danger)]", - info: "bg-[color:var(--color-info-tint)] text-[color:var(--color-info)]", - }, - uppercase: { - true: "uppercase", - false: "", - }, - }, - defaultVariants: { - tone: "default", - uppercase: true, - }, - } -); - -export type MonoBadgeTone = NonNullable["tone"]>; - -export interface MonoBadgeProps - extends Omit, "color">, VariantProps { - "data-slot"?: string; -} - -/** - * Inline mono pill for identifiers (agent IDs, versions, protocol names) and - * status badges. Uppercase by default, with semantic tones using the DESIGN.md - * §4 tint formula and `solid-accent` reserved for accent-filled emphasis. - */ -function MonoBadge({ tone, uppercase, className, ...props }: MonoBadgeProps) { - const dataSlot = props["data-slot"] ?? "mono-badge"; - - return ( - - ); -} - -export { MonoBadge, monoBadgeVariants }; diff --git a/packages/ui/src/components/mono-chip.test.tsx b/packages/ui/src/components/mono-chip.test.tsx deleted file mode 100644 index 43d52b970..000000000 --- a/packages/ui/src/components/mono-chip.test.tsx +++ /dev/null @@ -1,22 +0,0 @@ -import { render } from "@testing-library/react"; -import { describe, expect, it } from "vitest"; - -import { MonoChip } from "./mono-chip"; - -describe("MonoChip", () => { - it("Should render a neutral mono chip with elevated surface", () => { - const { container } = render(code); - const chip = container.querySelector('[data-slot="mono-chip"]'); - expect(chip).not.toBeNull(); - expect(chip?.textContent).toBe("code"); - expect(chip?.className).toContain("font-mono"); - expect(chip?.className).toContain("bg-[color:var(--color-surface-elevated)]"); - expect(chip?.className).toContain("text-[color:var(--color-text-secondary)]"); - }); - - it("Should forward className", () => { - const { container } = render(tag); - const chip = container.querySelector('[data-slot="mono-chip"]'); - expect(chip?.className).toContain("custom-class"); - }); -}); diff --git a/packages/ui/src/components/mono-chip.tsx b/packages/ui/src/components/mono-chip.tsx deleted file mode 100644 index 0bbd3908c..000000000 --- a/packages/ui/src/components/mono-chip.tsx +++ /dev/null @@ -1,29 +0,0 @@ -"use client"; - -import * as React from "react"; - -import { cn } from "../lib/utils"; - -export interface MonoChipProps extends React.ComponentProps<"span"> {} - -/** - * Neutral inline chip — mirrors `.mono-chip` (default tone) in - * `docs/design/web-inspiration/styles/app.css`. Used for capability - * descriptors, tag rows, and other identifier strings rendered alongside - * message bodies. For tinted semantic variants use {@link MonoBadge}. - */ -function MonoChip({ className, ...props }: MonoChipProps) { - return ( - - ); -} - -export { MonoChip }; diff --git a/packages/ui/src/components/pills.test.tsx b/packages/ui/src/components/pill-group.test.tsx similarity index 56% rename from packages/ui/src/components/pills.test.tsx rename to packages/ui/src/components/pill-group.test.tsx index 22a44c482..cab0b8cae 100644 --- a/packages/ui/src/components/pills.test.tsx +++ b/packages/ui/src/components/pill-group.test.tsx @@ -2,62 +2,37 @@ import { render, screen } from "@testing-library/react"; import userEvent from "@testing-library/user-event"; import { describe, expect, it, vi } from "vitest"; -import { Pill, Pills } from "./pills"; - -describe("Pill", () => { - it("Should render a semantic tag with the success tint token as background", () => { - render(Live); - const pill = screen.getByText("Live"); - expect(pill).toHaveAttribute("data-slot", "pill"); - expect(pill).toHaveAttribute("data-variant", "success"); - expect(pill.className).toContain("bg-[color:var(--color-success-tint)]"); - expect(pill.className).toContain("text-[color:var(--color-success)]"); - }); - - it("Should fall back to the default variant when none is provided", () => { - render(Neutral); - const pill = screen.getByText("Neutral"); - expect(pill).toHaveAttribute("data-variant", "default"); - expect(pill.className).toContain("border-[color:var(--color-divider)]"); - }); - - it("Should use the md size token when size='md' is requested", () => { - render(Filter); - const pill = screen.getByText("Filter"); - expect(pill).toHaveAttribute("data-size", "md"); - expect(pill.className).toContain("h-8"); - }); -}); +import { PillGroup } from "./pill-group"; -describe("Pills", () => { +describe("PillGroup", () => { const items = [ { value: "list", label: "List" }, { value: "kanban", label: "Kanban" }, { value: "inbox", label: "Inbox", badge: 3 }, ] as const; - it("Should fire onChange with the selected value when an item is clicked", async () => { + it("Should fire onChange with the selected value when a non-active item is clicked", async () => { const user = userEvent.setup(); const handle = vi.fn(); - render(); + render(); await user.click(screen.getByRole("button", { name: /kanban/i })); expect(handle).toHaveBeenCalledWith("kanban"); }); - it("Should not fire onChange when the active item is clicked", async () => { + it("Should not fire onChange when the active item is re-clicked", async () => { const user = userEvent.setup(); const handle = vi.fn(); - render(); + render(); await user.click(screen.getByRole("button", { name: /list/i })); expect(handle).not.toHaveBeenCalled(); }); - it("Should reflect the active item via aria-pressed + data-active", () => { - render( {}} items={items} />); + it("Should reflect the active item via aria-pressed and data-active", () => { + render( {}} items={items} />); const kanban = screen.getByRole("button", { name: /kanban/i }); const list = screen.getByRole("button", { name: /list/i }); expect(kanban).toHaveAttribute("aria-pressed", "true"); @@ -67,18 +42,20 @@ describe("Pills", () => { }); it("Should render the badge count next to the item label when badge > 0", () => { - render( {}} items={items} />); + render( {}} items={items} />); const inbox = screen.getByRole("button", { name: /inbox/i }); - const badge = inbox.querySelector('[data-slot="pills-badge"]'); + const badge = inbox.querySelector('[data-slot="pill-group-badge"]'); expect(badge).not.toBeNull(); expect(badge?.textContent).toBe("3"); + expect(badge?.className).toContain("bg-(--color-accent)"); + expect(badge?.className).toContain("text-(--color-accent-ink)"); }); it("Should not fire onChange for a disabled item", async () => { const user = userEvent.setup(); const handle = vi.fn(); render( - { it("Should expose testId as data-testid when provided", () => { render( - {}} items={[{ value: "list", label: "List", testId: "mode-list" }]} @@ -108,4 +85,16 @@ describe("Pills", () => { "mode-list" ); }); + + it("Should render the larger md segments by default and switch to sm when requested", () => { + const { container, rerender } = render( + {}} items={items} /> + ); + let segments = container.querySelectorAll('[data-slot="pill-group-item"]'); + expect(segments[0]?.className).toContain("h-(--height-mono-badge)"); + + rerender( {}} items={items} size="sm" />); + segments = container.querySelectorAll('[data-slot="pill-group-item"]'); + expect(segments[0]?.className).toContain("h-(--height-pill-group-segment-sm)"); + }); }); diff --git a/packages/ui/src/components/pill-group.tsx b/packages/ui/src/components/pill-group.tsx new file mode 100644 index 000000000..a662d6d89 --- /dev/null +++ b/packages/ui/src/components/pill-group.tsx @@ -0,0 +1,101 @@ +"use client"; + +import { cva, type VariantProps } from "class-variance-authority"; +import * as React from "react"; + +import { cn } from "../lib/utils"; + +const pillGroupSegmentVariants = cva( + "inline-flex cursor-pointer items-center justify-center gap-1.5 whitespace-nowrap rounded-(--radius-chip) font-mono text-badge font-semibold uppercase tracking-(--tracking-badge) transition-colors duration-(--duration-base) ease-(--ease-out) focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-(--color-accent) focus-visible:ring-offset-0 disabled:cursor-not-allowed disabled:opacity-50", + { + variants: { + active: { + true: "bg-(--color-surface-elevated) text-(--color-text-primary)", + false: "bg-transparent text-(--color-text-tertiary) hover:text-(--color-text-secondary)", + }, + size: { + sm: "h-(--height-pill-group-segment-sm) px-(--space-pill-group-segment-sm-x)", + md: "h-(--height-mono-badge) px-(--space-pill-group-segment-md-x)", + }, + }, + defaultVariants: { + active: false, + size: "md", + }, + } +); + +export type PillGroupSize = NonNullable["size"]>; + +export interface PillGroupItem { + value: V; + label: React.ReactNode; + /** Optional unread / count badge rendered inside the segment. */ + badge?: number; + disabled?: boolean; + testId?: string; +} + +export interface PillGroupProps extends Omit< + React.ComponentProps<"div">, + "onChange" +> { + items: ReadonlyArray>; + value: V; + onChange: (next: V) => void; + size?: PillGroupSize; +} + +function PillGroup({ + items, + value, + onChange, + size = "md", + className, + ...props +}: PillGroupProps) { + return ( +
+ {items.map(item => { + const isActive = item.value === value; + return ( + + ); + })} +
+ ); +} + +export { PillGroup, pillGroupSegmentVariants }; diff --git a/packages/ui/src/components/pill.test.tsx b/packages/ui/src/components/pill.test.tsx new file mode 100644 index 000000000..8ae7d2547 --- /dev/null +++ b/packages/ui/src/components/pill.test.tsx @@ -0,0 +1,222 @@ +import { render, screen } from "@testing-library/react"; +import userEvent from "@testing-library/user-event"; +import { MotionConfig } from "motion/react"; +import type { ReactNode } from "react"; +import { describe, expect, it, vi } from "vitest"; + +import { Pill, type PillTone } from "./pill"; + +interface WithMotionProps { + reducedMotion: "always" | "never"; + children: ReactNode; +} + +interface ToneExpectation { + tone: PillTone; + bg: string; + text: string; +} + +function WithMotion({ reducedMotion, children }: WithMotionProps) { + return {children}; +} + +describe("Pill", () => { + it("Should render a neutral span at sm size by default", () => { + render(label); + const pill = screen.getByText("label"); + expect(pill.tagName).toBe("SPAN"); + expect(pill).toHaveAttribute("data-slot", "pill"); + expect(pill).toHaveAttribute("data-tone", "neutral"); + expect(pill).toHaveAttribute("data-size", "sm"); + expect(pill.className).toContain("bg-(--color-neutral-tint)"); + expect(pill.className).toContain("text-(--color-text-secondary)"); + }); + + it.each([ + { tone: "accent", bg: "bg-(--color-accent-tint)", text: "text-(--color-accent)" }, + { tone: "success", bg: "bg-(--color-success-tint)", text: "text-(--color-success)" }, + { tone: "warning", bg: "bg-(--color-warning-tint)", text: "text-(--color-warning)" }, + { tone: "danger", bg: "bg-(--color-danger-tint)", text: "text-(--color-danger)" }, + { tone: "info", bg: "bg-(--color-info-tint)", text: "text-(--color-info)" }, + ])("Should apply the $tone tint formula", ({ tone, bg, text }) => { + render(x); + const pill = screen.getByText("x"); + expect(pill).toHaveAttribute("data-tone", tone); + expect(pill.className).toContain(bg); + expect(pill.className).toContain(text); + }); + + it("Should switch to solid background and ink text when solid is true", () => { + render( + + NEW + + ); + const pill = screen.getByText("NEW"); + expect(pill).toHaveAttribute("data-solid", "true"); + expect(pill.className).toContain("bg-(--color-accent)"); + expect(pill.className).toContain("text-(--color-accent-ink)"); + }); + + it("Should adopt mono typography and uppercase when mono is true", () => { + render(token); + const pill = screen.getByText("token"); + expect(pill).toHaveAttribute("data-mono", "true"); + expect(pill.className).toContain("font-mono"); + expect(pill.className).toContain("uppercase"); + }); + + it("Should respect uppercase={false} explicit override", () => { + render( + + v1.2.3 + + ); + const pill = screen.getByText("v1.2.3"); + expect(pill.className).toContain("normal-case"); + expect(pill.className).not.toMatch(/(^| )uppercase( |$)/); + }); + + it("Should default xs size to non-uppercase chip chrome", () => { + render(capability-id); + const pill = screen.getByText("capability-id"); + expect(pill).toHaveAttribute("data-size", "xs"); + expect(pill.className).toContain("rounded-(--radius-chip)"); + expect(pill.className).toContain("normal-case"); + }); + + it("Should apply md filter-pill chrome when size='md'", () => { + render(FILTER); + const pill = screen.getByText("FILTER"); + expect(pill).toHaveAttribute("data-size", "md"); + expect(pill.className).toContain("h-8"); + expect(pill.className).toContain("font-semibold"); + expect(pill.className).toContain("uppercase"); + }); + + it("Should render as a button when render={ - ); - })} -
- ); -} - -export { Pill, Pills, pillVariants, pillToggleVariants }; diff --git a/packages/ui/src/components/status-dot.test.tsx b/packages/ui/src/components/status-dot.test.tsx deleted file mode 100644 index 2bb0bc385..000000000 --- a/packages/ui/src/components/status-dot.test.tsx +++ /dev/null @@ -1,90 +0,0 @@ -import { render } from "@testing-library/react"; -import { MotionConfig } from "motion/react"; -import type { ReactNode } from "react"; -import { describe, expect, it } from "vitest"; - -import { StatusDot, type StatusDotTone } from "./status-dot"; - -function WithMotion({ - reducedMotion, - children, -}: { - reducedMotion: "always" | "never"; - children: ReactNode; -}) { - return {children}; -} - -const TONE_TO_CSS_COLOR: Record = { - success: "var(--color-success)", - warning: "var(--color-warning)", - danger: "var(--color-danger)", - info: "var(--color-info)", - accent: "var(--color-accent)", - neutral: "var(--color-text-tertiary)", -}; - -describe("StatusDot", () => { - it("Should render a neutral dot by default", () => { - const { container } = render(); - const dot = container.querySelector('[data-slot="status-dot"]'); - expect(dot).not.toBeNull(); - expect(dot?.getAttribute("data-tone")).toBe("neutral"); - expect(dot?.getAttribute("aria-hidden")).toBe("true"); - expect((dot as HTMLElement).style.backgroundColor).toBe(TONE_TO_CSS_COLOR.neutral); - }); - - it.each(["success", "warning", "danger", "info", "accent", "neutral"])( - "Should map tone %s to the semantic color token", - tone => { - const { container } = render( - - - - ); - const dot = container.querySelector('[data-slot="status-dot"]'); - expect(dot?.getAttribute("data-tone")).toBe(tone); - expect(dot?.style.backgroundColor).toBe(TONE_TO_CSS_COLOR[tone]); - } - ); - - it("Should apply the pulse animation class when pulse is true and reduced motion is off", () => { - const { container } = render( - - - - ); - const dot = container.querySelector('[data-slot="status-dot"]'); - expect(dot?.className).toContain("animate-pulse"); - expect(dot?.getAttribute("data-pulse")).toBe("true"); - }); - - it("Should not animate when pulse is false", () => { - const { container } = render( - - - - ); - const dot = container.querySelector('[data-slot="status-dot"]'); - expect(dot?.className).not.toContain("animate-pulse"); - expect(dot?.getAttribute("data-pulse")).toBeNull(); - }); - - it("Should suppress pulse animation when prefers-reduced-motion is reduce", () => { - const { container } = render( - - - - ); - const dot = container.querySelector('[data-slot="status-dot"]'); - expect(dot?.className).not.toContain("animate-pulse"); - expect(dot?.getAttribute("data-pulse")).toBeNull(); - }); - - it("Should render the compact size variant", () => { - const { container } = render(); - const dot = container.querySelector('[data-slot="status-dot"]'); - expect(dot?.getAttribute("data-size")).toBe("sm"); - expect(dot?.className).toContain("size-1.5"); - }); -}); diff --git a/packages/ui/src/components/status-dot.tsx b/packages/ui/src/components/status-dot.tsx deleted file mode 100644 index 6b8bfd7b4..000000000 --- a/packages/ui/src/components/status-dot.tsx +++ /dev/null @@ -1,66 +0,0 @@ -"use client"; - -import { useReducedMotionConfig } from "motion/react"; -import * as React from "react"; - -import { cn } from "../lib/utils"; - -export type StatusDotTone = "success" | "warning" | "danger" | "info" | "accent" | "neutral"; - -export type StatusDotSize = "sm" | "md"; - -export interface StatusDotProps extends Omit, "color"> { - tone?: StatusDotTone; - pulse?: boolean; - size?: StatusDotSize; -} - -const TONE_COLOR: Record = { - success: "var(--color-success)", - warning: "var(--color-warning)", - danger: "var(--color-danger)", - info: "var(--color-info)", - accent: "var(--color-accent)", - neutral: "var(--color-text-tertiary)", -}; - -const SIZE_CLASS: Record = { - sm: "size-1.5", - md: "size-2", -}; - -/** - * Tinted signal dot — `tone` maps to a semantic color, optional `pulse` drives a - * subtle opacity loop. Respects `prefers-reduced-motion` via `useReducedMotion`. - * Mirrors `.dot` in `docs/design/web-inspiration/styles/app.css` and DESIGN.md §4. - */ -function StatusDot({ - tone = "neutral", - pulse = false, - size = "md", - className, - style, - ...props -}: StatusDotProps) { - const reduced = useReducedMotionConfig(); - const shouldAnimate = pulse && !reduced; - return ( -
), ], - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/avatar.stories.tsx b/packages/ui/src/components/stories/avatar.stories.tsx index 9ad913509..2477d0105 100644 --- a/packages/ui/src/components/stories/avatar.stories.tsx +++ b/packages/ui/src/components/stories/avatar.stories.tsx @@ -15,7 +15,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/badge.stories.tsx b/packages/ui/src/components/stories/badge.stories.tsx index 482c66207..2dc8f605d 100644 --- a/packages/ui/src/components/stories/badge.stories.tsx +++ b/packages/ui/src/components/stories/badge.stories.tsx @@ -13,7 +13,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/breadcrumb.stories.tsx b/packages/ui/src/components/stories/breadcrumb.stories.tsx index 3ce75d1d5..ec4fbc9b0 100644 --- a/packages/ui/src/components/stories/breadcrumb.stories.tsx +++ b/packages/ui/src/components/stories/breadcrumb.stories.tsx @@ -22,7 +22,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/button-group.stories.tsx b/packages/ui/src/components/stories/button-group.stories.tsx index 1c238cf36..8cfd67cef 100644 --- a/packages/ui/src/components/stories/button-group.stories.tsx +++ b/packages/ui/src/components/stories/button-group.stories.tsx @@ -18,7 +18,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/button.stories.tsx b/packages/ui/src/components/stories/button.stories.tsx index b7bf71d82..fe4cc1c0d 100644 --- a/packages/ui/src/components/stories/button.stories.tsx +++ b/packages/ui/src/components/stories/button.stories.tsx @@ -14,7 +14,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/card.stories.tsx b/packages/ui/src/components/stories/card.stories.tsx index b7288e188..1da10a4c2 100644 --- a/packages/ui/src/components/stories/card.stories.tsx +++ b/packages/ui/src/components/stories/card.stories.tsx @@ -29,7 +29,6 @@ const meta: Meta = {
), ], - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/chat-message-bubble.stories.tsx b/packages/ui/src/components/stories/chat-message-bubble.stories.tsx index 31dc46ac5..5d000fa39 100644 --- a/packages/ui/src/components/stories/chat-message-bubble.stories.tsx +++ b/packages/ui/src/components/stories/chat-message-bubble.stories.tsx @@ -2,7 +2,7 @@ import type { Meta, StoryObj } from "@storybook/react-vite"; import { expect, within } from "storybook/test"; import { ChatMessageBubble, type ChatMessageRole } from "../chat-message-bubble"; -import { StatusDot } from "../status-dot"; +import { Pill } from "../pill"; import { ToolCallCard, type ToolCallStatus } from "../tool-call-card"; const meta: Meta = { @@ -17,7 +17,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; @@ -36,7 +35,7 @@ const ROLE_ALIGN: Record = { function AgentMeta() { return ( <> - + claude · 12:03 diff --git a/packages/ui/src/components/stories/code-block.stories.tsx b/packages/ui/src/components/stories/code-block.stories.tsx index b061ab011..11ee7b862 100644 --- a/packages/ui/src/components/stories/code-block.stories.tsx +++ b/packages/ui/src/components/stories/code-block.stories.tsx @@ -15,7 +15,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/collapsible.stories.tsx b/packages/ui/src/components/stories/collapsible.stories.tsx index 1a720e00e..618733b9b 100644 --- a/packages/ui/src/components/stories/collapsible.stories.tsx +++ b/packages/ui/src/components/stories/collapsible.stories.tsx @@ -17,7 +17,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/combobox.stories.tsx b/packages/ui/src/components/stories/combobox.stories.tsx index 0fd4eacaf..970e1bfcc 100644 --- a/packages/ui/src/components/stories/combobox.stories.tsx +++ b/packages/ui/src/components/stories/combobox.stories.tsx @@ -27,7 +27,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/command.stories.tsx b/packages/ui/src/components/stories/command.stories.tsx index 286793a2d..cd1926db8 100644 --- a/packages/ui/src/components/stories/command.stories.tsx +++ b/packages/ui/src/components/stories/command.stories.tsx @@ -33,7 +33,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/connection-indicator.stories.tsx b/packages/ui/src/components/stories/connection-indicator.stories.tsx deleted file mode 100644 index d6924deb7..000000000 --- a/packages/ui/src/components/stories/connection-indicator.stories.tsx +++ /dev/null @@ -1,52 +0,0 @@ -import type { Meta, StoryObj } from "@storybook/react-vite"; - -import { ConnectionIndicator, type ConnectionStatus } from "../connection-indicator"; - -const meta: Meta = { - title: "ui/ConnectionIndicator", - component: ConnectionIndicator, - parameters: { - layout: "padded", - docs: { - description: { - component: - "StatusDot + mono label composite for daemon / socket connection state. `reconnecting` pulses the dot unless the user prefers reduced motion.", - }, - }, - }, - tags: ["autodocs"], -}; - -export default meta; -type Story = StoryObj; - -const STATES: ConnectionStatus[] = ["connected", "disconnected", "reconnecting"]; - -export const Connected: Story = { - args: { status: "connected" }, -}; - -export const Disconnected: Story = { - args: { status: "disconnected" }, -}; - -export const Reconnecting: Story = { - args: { status: "reconnecting" }, -}; - -export const AllStates: Story = { - render: () => ( -
- {STATES.map(state => ( - - ))} -
- ), -}; - -export const CustomLabel: Story = { - args: { - status: "connected", - label: "Live", - }, -}; diff --git a/packages/ui/src/components/stories/dialog.stories.tsx b/packages/ui/src/components/stories/dialog.stories.tsx index 30d2baa30..a3d5c4446 100644 --- a/packages/ui/src/components/stories/dialog.stories.tsx +++ b/packages/ui/src/components/stories/dialog.stories.tsx @@ -28,7 +28,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/direction.stories.tsx b/packages/ui/src/components/stories/direction.stories.tsx index 5d7c6bbc1..cdbac8802 100644 --- a/packages/ui/src/components/stories/direction.stories.tsx +++ b/packages/ui/src/components/stories/direction.stories.tsx @@ -14,7 +14,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/dropdown-menu.stories.tsx b/packages/ui/src/components/stories/dropdown-menu.stories.tsx index 112f4dad2..2def6d7dc 100644 --- a/packages/ui/src/components/stories/dropdown-menu.stories.tsx +++ b/packages/ui/src/components/stories/dropdown-menu.stories.tsx @@ -31,7 +31,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/empty.stories.tsx b/packages/ui/src/components/stories/empty.stories.tsx index 3cc96676c..6caf8f341 100644 --- a/packages/ui/src/components/stories/empty.stories.tsx +++ b/packages/ui/src/components/stories/empty.stories.tsx @@ -16,7 +16,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/field.stories.tsx b/packages/ui/src/components/stories/field.stories.tsx index 7aa54309b..790e86889 100644 --- a/packages/ui/src/components/stories/field.stories.tsx +++ b/packages/ui/src/components/stories/field.stories.tsx @@ -23,7 +23,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/input-group.stories.tsx b/packages/ui/src/components/stories/input-group.stories.tsx index 1bd6524f0..7b464b95c 100644 --- a/packages/ui/src/components/stories/input-group.stories.tsx +++ b/packages/ui/src/components/stories/input-group.stories.tsx @@ -25,7 +25,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/input.stories.tsx b/packages/ui/src/components/stories/input.stories.tsx index 3cdcc18b6..0cd9f8a95 100644 --- a/packages/ui/src/components/stories/input.stories.tsx +++ b/packages/ui/src/components/stories/input.stories.tsx @@ -20,7 +20,6 @@ const meta: Meta = { ), ], - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/item.stories.tsx b/packages/ui/src/components/stories/item.stories.tsx index 0ae82a4af..8daf28a61 100644 --- a/packages/ui/src/components/stories/item.stories.tsx +++ b/packages/ui/src/components/stories/item.stories.tsx @@ -27,7 +27,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/kbd.stories.tsx b/packages/ui/src/components/stories/kbd.stories.tsx index 21f411dae..3c9ad04f2 100644 --- a/packages/ui/src/components/stories/kbd.stories.tsx +++ b/packages/ui/src/components/stories/kbd.stories.tsx @@ -20,7 +20,6 @@ const meta: Meta = { ), ], - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/kind-chip.stories.tsx b/packages/ui/src/components/stories/kind-chip.stories.tsx deleted file mode 100644 index e6b048511..000000000 --- a/packages/ui/src/components/stories/kind-chip.stories.tsx +++ /dev/null @@ -1,48 +0,0 @@ -import type { Meta, StoryObj } from "@storybook/react-vite"; - -import { KindChip } from "../kind-chip"; - -const meta: Meta = { - title: "ui/KindChip", - component: KindChip, - parameters: { - layout: "padded", - docs: { - description: { - component: - "Protocol kind marker (`say`, `greet`, `direct`, `receipt`, `recipe`, `whois`, `trace`). Uppercase mono, transparent surface with neutral border + colored 7px wire-dot — mirrors `.intent-badge` + `.wire-dot` in `docs/design/web-inspiration/styles/app.css`.", - }, - }, - }, - tags: ["autodocs"], -}; - -export default meta; -type Story = StoryObj; - -const KINDS = ["say", "greet", "direct", "receipt", "recipe", "trace", "whois"] as const; - -export const Default: Story = { - args: { - kind: "greet", - }, -}; - -export const AllProtocolKinds: Story = { - render: () => ( -
- {KINDS.map(kind => ( - - ))} -
- ), -}; - -export const InlineWithCopy: Story = { - render: () => ( -

- Messages of kind are forwarded by the router to any peer subscribed to - the channel. -

- ), -}; diff --git a/packages/ui/src/components/stories/label.stories.tsx b/packages/ui/src/components/stories/label.stories.tsx index 8912235d4..a971db6d3 100644 --- a/packages/ui/src/components/stories/label.stories.tsx +++ b/packages/ui/src/components/stories/label.stories.tsx @@ -22,7 +22,6 @@ const meta: Meta = { ), ], - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/logo.stories.tsx b/packages/ui/src/components/stories/logo.stories.tsx index 8664b052e..3d4696246 100644 --- a/packages/ui/src/components/stories/logo.stories.tsx +++ b/packages/ui/src/components/stories/logo.stories.tsx @@ -14,7 +14,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], argTypes: { variant: { control: "select", diff --git a/packages/ui/src/components/stories/metric.stories.tsx b/packages/ui/src/components/stories/metric.stories.tsx index bff92ccfc..2baf7fc69 100644 --- a/packages/ui/src/components/stories/metric.stories.tsx +++ b/packages/ui/src/components/stories/metric.stories.tsx @@ -14,7 +14,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/mono-badge.stories.tsx b/packages/ui/src/components/stories/mono-badge.stories.tsx deleted file mode 100644 index 7f7c8cadd..000000000 --- a/packages/ui/src/components/stories/mono-badge.stories.tsx +++ /dev/null @@ -1,65 +0,0 @@ -import type { Meta, StoryObj } from "@storybook/react-vite"; - -import { MonoBadge, type MonoBadgeTone } from "../mono-badge"; - -const meta: Meta = { - title: "ui/MonoBadge", - component: MonoBadge, - parameters: { - layout: "padded", - docs: { - description: { - component: - "Inline mono pill for identifiers (agent IDs, versions, protocol names) and tinted status badges. 6px radius, JetBrains Mono 11px/500 at 0.06em tracking.", - }, - }, - }, - tags: ["autodocs"], -}; - -export default meta; -type Story = StoryObj; - -const TONES: MonoBadgeTone[] = [ - "default", - "neutral", - "accent", - "success", - "warning", - "danger", - "info", -]; - -export const Default: Story = { - args: { - children: "agent-42", - }, -}; - -export const AllTones: Story = { - render: () => ( -
- {TONES.map(tone => ( - - {tone} - - ))} -
- ), -}; - -export const LowercaseIdentifier: Story = { - args: { - uppercase: false, - children: "agh-network/v0", - }, -}; - -export const BesideLabel: Story = { - render: () => ( -
- Running - v0.2.1 -
- ), -}; diff --git a/packages/ui/src/components/stories/mono-chip.stories.tsx b/packages/ui/src/components/stories/mono-chip.stories.tsx deleted file mode 100644 index 78f365a6c..000000000 --- a/packages/ui/src/components/stories/mono-chip.stories.tsx +++ /dev/null @@ -1,35 +0,0 @@ -import type { Meta, StoryObj } from "@storybook/react-vite"; - -import { MonoChip } from "@agh/ui"; - -const meta: Meta = { - title: "ui/MonoChip", - component: MonoChip, - parameters: { - layout: "padded", - docs: { - description: { - component: - "Neutral inline chip — mirrors `.mono-chip` (default tone) in `docs/design/web-inspiration/styles/app.css`. Use for capability descriptors and tag rows. For tinted semantic variants use `MonoBadge`.", - }, - }, - }, - tags: ["autodocs"], -}; - -export default meta; -type Story = StoryObj; - -export const Default: Story = { - args: { children: "code" }, -}; - -export const Row: Story = { - render: () => ( -
- {["code", "shell", "file.read", "file.write", "plan.delegate"].map(label => ( - {label} - ))} -
- ), -}; diff --git a/packages/ui/src/components/stories/native-select.stories.tsx b/packages/ui/src/components/stories/native-select.stories.tsx index 93a03ad1d..3da587feb 100644 --- a/packages/ui/src/components/stories/native-select.stories.tsx +++ b/packages/ui/src/components/stories/native-select.stories.tsx @@ -15,7 +15,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/page-header.stories.tsx b/packages/ui/src/components/stories/page-header.stories.tsx index 8558e3fb2..fba79bf37 100644 --- a/packages/ui/src/components/stories/page-header.stories.tsx +++ b/packages/ui/src/components/stories/page-header.stories.tsx @@ -4,7 +4,7 @@ import { ListChecksIcon, PlusIcon, SparklesIcon } from "lucide-react"; import { Button } from "../button"; import { PageHeader } from "../page-header"; -import { Pills } from "../pills"; +import { PillGroup } from "../pill-group"; const meta: Meta = { title: "ui/PageHeader", @@ -14,11 +14,10 @@ const meta: Meta = { docs: { description: { component: - "Top-of-page header — icon + title + count badge on the left, segmented `Pills` controls in the middle, meta/actions on the right.", + "Top-of-page header — icon + title + count badge on the left, segmented `PillGroup` controls in the middle, meta/actions on the right.", }, }, }, - tags: ["autodocs"], }; export default meta; @@ -42,7 +41,7 @@ export const WithControlsAndMeta: Story = { icon={ListChecksIcon} count={42} controls={ - = { + title: "ui/PillGroup", + component: PillGroup, + parameters: { + layout: "padded", + docs: { + description: { + component: + "Segmented toggle track. Controlled via `items` + `value` + `onChange`. Replaces the legacy segmented pills toggle.", + }, + }, + }, +}; + +export default meta; +type Story = StoryObj; + +function PillGroupHarness() { + const [value, setValue] = useState<"list" | "kanban" | "dashboard" | "inbox">("list"); + return ( + + ); +} + +export const Default: Story = { + render: () => , +}; + +export const Selection: Story = { + render: () => , + play: async ({ canvasElement }) => { + const canvas = within(canvasElement); + const list = await canvas.findByTestId("mode-list"); + const kanban = await canvas.findByTestId("mode-kanban"); + + await expect(list).toHaveAttribute("aria-pressed", "true"); + await expect(kanban).toHaveAttribute("aria-pressed", "false"); + + await userEvent.click(kanban); + await waitFor(() => expect(kanban).toHaveAttribute("aria-pressed", "true")); + await expect(list).toHaveAttribute("aria-pressed", "false"); + }, +}; + +export const SizeSm: Story = { + render: () => { + const [value, setValue] = useState<"a" | "b" | "c">("a"); + return ( + + ); + }, +}; + +export const DisabledItem: Story = { + render: () => ( + {}} + items={[ + { value: "list", label: "List" }, + { value: "kanban", label: "Kanban", disabled: true }, + ]} + /> + ), +}; diff --git a/packages/ui/src/components/stories/pill.stories.tsx b/packages/ui/src/components/stories/pill.stories.tsx new file mode 100644 index 000000000..27a829dd3 --- /dev/null +++ b/packages/ui/src/components/stories/pill.stories.tsx @@ -0,0 +1,268 @@ +import type { Meta, StoryObj } from "@storybook/react-vite"; +import { expect, within } from "storybook/test"; + +import { Pill, type PillTone } from "../pill"; + +const meta: Meta = { + title: "ui/Pill", + component: Pill, + parameters: { + layout: "padded", + docs: { + description: { + component: + "Unified semantic pill — replaces legacy `MonoBadge`, `StatusDot`, `KindChip`, `WireChip`, and connection-state label compositions. Compose with `Pill.Dot` for leading status dots.", + }, + }, + }, +}; + +export default meta; +type Story = StoryObj; + +const TONES: PillTone[] = ["neutral", "accent", "success", "warning", "danger", "info"]; + +const KIND_DOT_COLORS: Record = { + say: "var(--color-kind-say)", + greet: "var(--color-kind-greet)", + direct: "var(--color-kind-direct)", + receipt: "var(--color-kind-receipt)", + capability: "var(--color-kind-capability)", + trace: "var(--color-kind-trace)", + whois: "var(--color-kind-whois)", +}; + +export const Default: Story = { + args: { children: "label" }, +}; + +export const Tones: Story = { + render: () => ( +
+ {TONES.map(tone => ( + + {tone} + + ))} +
+ ), +}; + +export const TonesSans: Story = { + render: () => ( +
+ {TONES.map(tone => ( + + {tone} + + ))} +
+ ), +}; + +export const SolidEmphasis: Story = { + render: () => ( +
+ {TONES.map(tone => ( + + {tone} + + ))} +
+ ), + parameters: { + docs: { + description: { + story: "`solid` swaps the 15% tinted bg for a fully filled accent + ink-text formula.", + }, + }, + }, +}; + +export const Sizes: Story = { + render: () => ( +
+ + capability-id + + + v0.2.1 + + + FILTER + +
+ ), + parameters: { + docs: { + description: { + story: + "`xs` = chip (5px radius). `sm` = badge (22px tall, 6px radius). `md` = filter (32px, 20px radius).", + }, + }, + }, +}; + +export const MonoLowercaseIdentifier: Story = { + render: () => ( + + agh-network/v0 + + ), + parameters: { + docs: { + description: { + story: "Override the auto-uppercase default for protocol strings.", + }, + }, + }, +}; + +export const WithDot: Story = { + render: () => ( +
+ + + Connected + + + + Reconnecting + + + + Disconnected + +
+ ), +}; + +export const KindChipReplacement: Story = { + render: () => ( +
+ {Object.keys(KIND_DOT_COLORS).map(kind => ( + + + {kind} + + ))} +
+ ), + parameters: { + docs: { + description: { + story: "Protocol kind markers — leading dot keyed off the kind, label preserved.", + }, + }, + }, +}; + +export const ToggleInteractive: Story = { + render: () => ( +
+ }> + ALL + + }> + SAY + + }> + DIRECT + +
+ ), + parameters: { + docs: { + description: { + story: + "Pass `render={ diff --git a/packages/ui/src/components/stories/select.stories.tsx b/packages/ui/src/components/stories/select.stories.tsx index ecf929859..000453619 100644 --- a/packages/ui/src/components/stories/select.stories.tsx +++ b/packages/ui/src/components/stories/select.stories.tsx @@ -25,7 +25,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/separator.stories.tsx b/packages/ui/src/components/stories/separator.stories.tsx index 29fe2f12d..f8ab6e152 100644 --- a/packages/ui/src/components/stories/separator.stories.tsx +++ b/packages/ui/src/components/stories/separator.stories.tsx @@ -20,7 +20,6 @@ const meta: Meta = { ), ], - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/sheet.stories.tsx b/packages/ui/src/components/stories/sheet.stories.tsx index 5c673d284..0a8c6ad62 100644 --- a/packages/ui/src/components/stories/sheet.stories.tsx +++ b/packages/ui/src/components/stories/sheet.stories.tsx @@ -28,7 +28,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/sidebar.stories.tsx b/packages/ui/src/components/stories/sidebar.stories.tsx index e0258dc7a..b70017eb9 100644 --- a/packages/ui/src/components/stories/sidebar.stories.tsx +++ b/packages/ui/src/components/stories/sidebar.stories.tsx @@ -28,7 +28,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/skeleton.stories.tsx b/packages/ui/src/components/stories/skeleton.stories.tsx index 3da35b955..5ce7e6faa 100644 --- a/packages/ui/src/components/stories/skeleton.stories.tsx +++ b/packages/ui/src/components/stories/skeleton.stories.tsx @@ -20,7 +20,6 @@ const meta: Meta = { ), ], - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/sonner.stories.tsx b/packages/ui/src/components/stories/sonner.stories.tsx index c600fe10c..a3942eb79 100644 --- a/packages/ui/src/components/stories/sonner.stories.tsx +++ b/packages/ui/src/components/stories/sonner.stories.tsx @@ -16,7 +16,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/spinner.stories.tsx b/packages/ui/src/components/stories/spinner.stories.tsx index f95286cae..6bc8dd930 100644 --- a/packages/ui/src/components/stories/spinner.stories.tsx +++ b/packages/ui/src/components/stories/spinner.stories.tsx @@ -20,7 +20,6 @@ const meta: Meta = { ), ], - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/split-pane.stories.tsx b/packages/ui/src/components/stories/split-pane.stories.tsx index 6824bdbb5..5ebcec8dc 100644 --- a/packages/ui/src/components/stories/split-pane.stories.tsx +++ b/packages/ui/src/components/stories/split-pane.stories.tsx @@ -18,7 +18,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/status-dot.stories.tsx b/packages/ui/src/components/stories/status-dot.stories.tsx deleted file mode 100644 index 3c629ae6d..000000000 --- a/packages/ui/src/components/stories/status-dot.stories.tsx +++ /dev/null @@ -1,102 +0,0 @@ -import type { Meta, StoryObj } from "@storybook/react-vite"; -import { expect, within } from "storybook/test"; - -import { StatusDot, type StatusDotTone } from "../status-dot"; - -const meta: Meta = { - title: "ui/StatusDot", - component: StatusDot, - parameters: { - layout: "centered", - docs: { - description: { - component: - "Tinted signal dot — semantic tone + optional `pulse` loop. Mirrors DESIGN.md §4 status indicators.", - }, - }, - }, - tags: ["autodocs"], -}; - -export default meta; -type Story = StoryObj; - -const TONES: StatusDotTone[] = ["success", "warning", "danger", "info", "accent", "neutral"]; - -const TONE_TO_COLOR: Record = { - success: "var(--color-success)", - warning: "var(--color-warning)", - danger: "var(--color-danger)", - info: "var(--color-info)", - accent: "var(--color-accent)", - neutral: "var(--color-text-tertiary)", -}; - -export const Default: Story = { - args: { - tone: "success", - }, -}; - -export const Tones: Story = { - render: () => ( -
- {TONES.map(tone => ( -
- - - {tone} - -
- ))} -
- ), -}; - -export const ToneCycleInteraction: Story = { - render: () => ( -
- {TONES.map(tone => ( -
- -
- ))} -
- ), - play: async ({ canvasElement }) => { - const canvas = within(canvasElement); - for (const tone of TONES) { - const wrapper = await canvas.findByTestId(`tone-${tone}`); - const dot = wrapper.querySelector('[data-slot="status-dot"]') as HTMLElement; - await expect(dot).toBeInTheDocument(); - await expect(dot.getAttribute("data-tone")).toBe(tone); - await expect(dot.style.backgroundColor).toBe(TONE_TO_COLOR[tone]); - } - }, -}; - -export const PulseSuccess: Story = { - args: { - tone: "success", - pulse: true, - }, -}; - -export const SizeVariants: Story = { - render: () => ( -
-
- - - sm · 6px - -
-
- - - md · 8px - -
-
- ), -}; diff --git a/packages/ui/src/components/stories/switch.stories.tsx b/packages/ui/src/components/stories/switch.stories.tsx index c5130e3c4..0518f6e14 100644 --- a/packages/ui/src/components/stories/switch.stories.tsx +++ b/packages/ui/src/components/stories/switch.stories.tsx @@ -16,7 +16,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/table.stories.tsx b/packages/ui/src/components/stories/table.stories.tsx index 304fd7561..1bac91936 100644 --- a/packages/ui/src/components/stories/table.stories.tsx +++ b/packages/ui/src/components/stories/table.stories.tsx @@ -29,7 +29,6 @@ const meta: Meta = { ), ], - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/tabs.stories.tsx b/packages/ui/src/components/stories/tabs.stories.tsx index f7d7a8ff7..b47ae3107 100644 --- a/packages/ui/src/components/stories/tabs.stories.tsx +++ b/packages/ui/src/components/stories/tabs.stories.tsx @@ -15,7 +15,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/textarea.stories.tsx b/packages/ui/src/components/stories/textarea.stories.tsx index 60c76e8fa..1b14488bc 100644 --- a/packages/ui/src/components/stories/textarea.stories.tsx +++ b/packages/ui/src/components/stories/textarea.stories.tsx @@ -15,7 +15,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/toggle-group.stories.tsx b/packages/ui/src/components/stories/toggle-group.stories.tsx index 28c378296..cba689b51 100644 --- a/packages/ui/src/components/stories/toggle-group.stories.tsx +++ b/packages/ui/src/components/stories/toggle-group.stories.tsx @@ -23,7 +23,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/toggle.stories.tsx b/packages/ui/src/components/stories/toggle.stories.tsx index fa09d7fc5..8252b1e2a 100644 --- a/packages/ui/src/components/stories/toggle.stories.tsx +++ b/packages/ui/src/components/stories/toggle.stories.tsx @@ -16,7 +16,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/tool-call-card.stories.tsx b/packages/ui/src/components/stories/tool-call-card.stories.tsx index 00d581aaf..6450baa6b 100644 --- a/packages/ui/src/components/stories/tool-call-card.stories.tsx +++ b/packages/ui/src/components/stories/tool-call-card.stories.tsx @@ -15,7 +15,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/toolbar.stories.tsx b/packages/ui/src/components/stories/toolbar.stories.tsx index 98590dfd5..24524e80f 100644 --- a/packages/ui/src/components/stories/toolbar.stories.tsx +++ b/packages/ui/src/components/stories/toolbar.stories.tsx @@ -3,7 +3,7 @@ import { useState } from "react"; import { PlusIcon } from "lucide-react"; import { Button } from "../button"; -import { Pills } from "../pills"; +import { PillGroup } from "../pill-group"; import { SearchInput } from "../search-input"; import { Toolbar } from "../toolbar"; @@ -15,11 +15,10 @@ const meta: Meta = { docs: { description: { component: - "Composition-first toolbar shell — pass `SearchInput`, `Pills`, `Button` children directly. Wraps on narrow viewports.", + "Composition-first toolbar shell — pass `SearchInput`, `PillGroup`, `Button` children directly. Wraps on narrow viewports.", }, }, }, - tags: ["autodocs"], }; export default meta; @@ -30,7 +29,7 @@ function Harness() { const [search, setSearch] = useState(""); return ( - (
- {}} items={[ diff --git a/packages/ui/src/components/stories/tooltip.stories.tsx b/packages/ui/src/components/stories/tooltip.stories.tsx index 980857e59..e21fb9ea2 100644 --- a/packages/ui/src/components/stories/tooltip.stories.tsx +++ b/packages/ui/src/components/stories/tooltip.stories.tsx @@ -18,7 +18,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/typing-dots.stories.tsx b/packages/ui/src/components/stories/typing-dots.stories.tsx index 3bb84da98..696d8662b 100644 --- a/packages/ui/src/components/stories/typing-dots.stories.tsx +++ b/packages/ui/src/components/stories/typing-dots.stories.tsx @@ -14,7 +14,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/ui-provider.stories.tsx b/packages/ui/src/components/stories/ui-provider.stories.tsx index 5403632ec..8249fa3be 100644 --- a/packages/ui/src/components/stories/ui-provider.stories.tsx +++ b/packages/ui/src/components/stories/ui-provider.stories.tsx @@ -28,7 +28,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/wire-card.stories.tsx b/packages/ui/src/components/stories/wire-card.stories.tsx index dcd4e8bd7..eb83ba277 100644 --- a/packages/ui/src/components/stories/wire-card.stories.tsx +++ b/packages/ui/src/components/stories/wire-card.stories.tsx @@ -15,7 +15,6 @@ const meta: Meta = { }, }, }, - tags: ["autodocs"], }; export default meta; diff --git a/packages/ui/src/components/stories/wire-chip.stories.tsx b/packages/ui/src/components/stories/wire-chip.stories.tsx deleted file mode 100644 index 0bfc27baf..000000000 --- a/packages/ui/src/components/stories/wire-chip.stories.tsx +++ /dev/null @@ -1,51 +0,0 @@ -import { useState } from "react"; -import type { Meta, StoryObj } from "@storybook/react-vite"; - -import { KIND_DOT_COLORS, WireChip } from "@agh/ui"; - -const meta: Meta = { - title: "ui/WireChip", - component: WireChip, - parameters: { - layout: "padded", - docs: { - description: { - component: - "Free-floating filter chip — mirrors `.wire-chip` in `docs/design/web-inspiration/styles/app.css`. For a contained segmented toggle, use `Pills` instead.", - }, - }, - }, - tags: ["autodocs"], -}; - -export default meta; -type Story = StoryObj; - -const KINDS = ["say", "greet", "direct", "receipt", "recipe", "trace", "whois"] as const; - -export const Default: Story = { - args: { children: "say" }, -}; - -export const KindFilterRow: Story = { - render: () => { - const [active, setActive] = useState("all"); - return ( -
- setActive("all")}> - all - - {KINDS.map(kind => ( - setActive(kind)} - > - {kind} - - ))} -
- ); - }, -}; diff --git a/packages/ui/src/components/tool-call-card.tsx b/packages/ui/src/components/tool-call-card.tsx index d8b981d3e..1845ac3c2 100644 --- a/packages/ui/src/components/tool-call-card.tsx +++ b/packages/ui/src/components/tool-call-card.tsx @@ -4,7 +4,7 @@ import { TerminalIcon } from "lucide-react"; import * as React from "react"; import { cn } from "../lib/utils"; -import { MonoBadge, type MonoBadgeTone } from "./mono-badge"; +import { Pill, type PillTone } from "./pill"; export type ToolCallStatus = "running" | "done" | "error"; @@ -24,7 +24,7 @@ function isIconComponent(value: unknown): value is ToolCallIconComponent { return false; } -const STATUS_TONE: Record = { +const STATUS_TONE: Record = { running: "accent", done: "success", error: "danger", @@ -96,13 +96,13 @@ function ToolCallCard({ {filePath} ) : null} - {STATUS_LABEL[status]} - +
{children ? (
{ /** * Horizontal toolbar shell — flex row with wrap on narrow viewports. - * Composition-first: host decides which children (SearchInput, Pills, Button, etc.) go inside. + * Composition-first: host decides which children (SearchInput, PillGroup, Button, etc.) go inside. */ function Toolbar({ className, sticky, ...props }: ToolbarProps) { return ( diff --git a/packages/ui/src/components/wire-chip.test.tsx b/packages/ui/src/components/wire-chip.test.tsx deleted file mode 100644 index a10990298..000000000 --- a/packages/ui/src/components/wire-chip.test.tsx +++ /dev/null @@ -1,42 +0,0 @@ -import { render, screen } from "@testing-library/react"; -import userEvent from "@testing-library/user-event"; -import { describe, expect, it, vi } from "vitest"; - -import { WireChip } from "./wire-chip"; - -describe("WireChip", () => { - it("Should render as a button with neutral chrome by default", () => { - const { container } = render(say); - const chip = container.querySelector('[data-slot="wire-chip"]'); - expect(chip).not.toBeNull(); - expect(chip?.tagName).toBe("BUTTON"); - expect(chip?.className).toContain("bg-[color:var(--color-surface)]"); - expect(chip?.className).toContain("border-[color:var(--color-divider)]"); - expect(chip?.getAttribute("aria-pressed")).toBe("false"); - }); - - it("Should reflect the active state via aria-pressed and elevated surface", () => { - const { container } = render(direct); - const chip = container.querySelector('[data-slot="wire-chip"]'); - expect(chip?.getAttribute("aria-pressed")).toBe("true"); - expect(chip?.getAttribute("data-active")).toBe("true"); - expect(chip?.className).toContain("bg-[color:var(--color-surface-elevated)]"); - }); - - it("Should render a colored leading dot when dotColor is provided", () => { - const { container } = render(direct); - const dot = container.querySelector('[data-slot="wire-chip-dot"]'); - expect(dot).not.toBeNull(); - expect(dot?.style.background).toBe("var(--color-accent)"); - }); - - it("Should fire onClick when activated", async () => { - const user = userEvent.setup(); - const handle = vi.fn(); - render(say); - - await user.click(screen.getByRole("button", { name: /say/i })); - - expect(handle).toHaveBeenCalledTimes(1); - }); -}); diff --git a/packages/ui/src/components/wire-chip.tsx b/packages/ui/src/components/wire-chip.tsx deleted file mode 100644 index ff6f01a13..000000000 --- a/packages/ui/src/components/wire-chip.tsx +++ /dev/null @@ -1,57 +0,0 @@ -"use client"; - -import * as React from "react"; - -import { cn } from "../lib/utils"; - -export interface WireChipProps extends Omit, "children"> { - active?: boolean; - /** Optional CSS color or var() for the leading 7px wire-dot. */ - dotColor?: string; - children: React.ReactNode; -} - -/** - * Free-floating filter chip — mirrors `.wire-chip` in - * `docs/design/web-inspiration/styles/app.css`. Used in stand-alone filter - * rows (e.g. the network channel header `ALL · SAY · DIRECT · …`). For a - * contained segmented toggle, use {@link Pills}. - */ -function WireChip({ - active = false, - dotColor, - children, - className, - type = "button", - ...props -}: WireChipProps) { - return ( - - ); -} - -export { WireChip }; diff --git a/packages/ui/src/index.ts b/packages/ui/src/index.ts index 57a06d718..8e415294c 100644 --- a/packages/ui/src/index.ts +++ b/packages/ui/src/index.ts @@ -160,33 +160,24 @@ export { SplitPane, SPLIT_LIST_WIDTH_DEFAULT, type SplitPaneProps } from "./comp export { PageHeader, type PageHeaderProps } from "./components/page-header"; export { Pill, - Pills, + PillDot, pillVariants, - pillToggleVariants, type PillProps, - type PillsProps, - type PillsItem, - type PillVariant, + type PillDotProps, + type PillTone, type PillSize, -} from "./components/pills"; +} from "./components/pill"; +export { + PillGroup, + pillGroupSegmentVariants, + type PillGroupProps, + type PillGroupItem, + type PillGroupSize, +} from "./components/pill-group"; export { SearchInput, type SearchInputProps } from "./components/search-input"; export { Empty, type EmptyProps } from "./components/empty"; export { Section, type SectionProps } from "./components/section"; export { Toolbar, type ToolbarProps } from "./components/toolbar"; -export { - StatusDot, - type StatusDotProps, - type StatusDotTone, - type StatusDotSize, -} from "./components/status-dot"; -export { - MonoBadge, - monoBadgeVariants, - type MonoBadgeProps, - type MonoBadgeTone, -} from "./components/mono-badge"; -export { KindChip, KIND_DOT_COLORS, type KindChipProps } from "./components/kind-chip"; -export { WireChip, type WireChipProps } from "./components/wire-chip"; export { WireCard, WireCardHead, @@ -194,7 +185,6 @@ export { WireCardFoot, type WireCardProps, } from "./components/wire-card"; -export { MonoChip, type MonoChipProps } from "./components/mono-chip"; export { TypingDots, type TypingDotsProps } from "./components/typing-dots"; export { CodeBlock, type CodeBlockProps } from "./components/code-block"; export { @@ -209,11 +199,6 @@ export { type ToolCallStatus, } from "./components/tool-call-card"; export { Metric, type MetricProps, type MetricTone } from "./components/metric"; -export { - ConnectionIndicator, - type ConnectionIndicatorProps, - type ConnectionStatus, -} from "./components/connection-indicator"; export { Avatar, AvatarBadge, diff --git a/packages/ui/src/tokens.css b/packages/ui/src/tokens.css index 1dfc8c4bd..d5daa2ac5 100644 --- a/packages/ui/src/tokens.css +++ b/packages/ui/src/tokens.css @@ -39,15 +39,34 @@ --color-warning: #ffd60a; --color-info: #bf5af2; + /* Protocol kind colors */ + --color-kind-say: #8e8e93; + --color-kind-greet: #5ba6ff; + --color-kind-direct: var(--color-accent); + --color-kind-receipt: var(--color-success); + --color-kind-capability: var(--color-warning); + --color-kind-trace: #b892ff; + --color-kind-whois: #4fd1c5; + /* Landing diagram backgrounds */ --color-canvas-deep: #0e0e0f; --color-line: #3c3a39; /* Shared mono/diagram constants */ --tracking-mono: 0.06em; + --tracking-badge: 0.08em; --radius-diagram: 12px; --radius-chip: 5px; --radius-mono-badge: 6px; + --height-mono-badge: 22px; + --height-pill-group-segment-sm: 20px; + --size-pill-group-badge: 14px; + --space-pill-group-track-gap: 2px; + --space-pill-group-track-padding: 3px; + --space-pill-group-segment-sm-x: 8px; + --space-pill-group-segment-md-x: 10px; + --space-pill-group-badge-x: 4px; + --text-pill-group-badge: 0.5625rem; /* Motion */ --duration-fast: 100ms; diff --git a/skills-lock.json b/skills-lock.json index 1184c10fd..b65d62614 100644 --- a/skills-lock.json +++ b/skills-lock.json @@ -696,11 +696,6 @@ "sourceType": "github", "computedHash": "1f9dd6946d6bb66bd76c5d49d607e5becf7c93ad643070fc4e352d17e426b69d" }, - "shadcn-ui": { - "source": "pedronauck/skills", - "sourceType": "github", - "computedHash": "4c3e54ac47ddcc2c948995c85f6740b78f2f556c769d800e92ddff035d5c7e59" - }, "shape": { "source": "pbakaus/impeccable", "sourceType": "github", diff --git a/web/AGENTS.md b/web/AGENTS.md index 38b130610..ac5edea4c 100644 --- a/web/AGENTS.md +++ b/web/AGENTS.md @@ -22,6 +22,7 @@ No production users exist. Never sacrifice code quality for backward compatibili - **`make web-lint` and `make web-typecheck` MUST pass** before completing ANY web task. Zero warnings, zero errors. - **Oxlint has zero tolerance** — any warning is a blocking failure - **Follow shadcn kebab-case naming** for all files in `web/` +- **Native DOM wrappers** — if a component’s root is a single native element (`button`, `input`, `a`, …), its props MUST extend that element’s intrinsic type (`React.ComponentProps<"…">`), merge `className`, and spread `{...props}` onto the node (use `forwardRef` when refs apply). CVA + `VariantProps`: follow the `shadcn` skill. Canonical rule: `.agents/skills/react/SKILL.md` → _Extend native element props_. - **Never add JS dependencies by hand in `package.json`** — always use `bun add` - **Check dependent package APIs** before writing integration code or tests - **Local QA against an isolated daemon MUST read `AGH_WEB_API_PROXY_TARGET` from the active bootstrap manifest/env** — never hardcode `http://localhost:2123` when `agh-qa-bootstrap` or another isolated QA envelope is in use. @@ -30,25 +31,25 @@ No production users exist. Never sacrifice code quality for backward compatibili Activate skills **before** writing code. Match task domain → activate all required skills: -| Domain | Required Skills | Conditional Skills | -| ----------------------------- | ---------------------------------------------------------------- | -------------------------------------------------- | -| React / Web UI | `react` + `tailwindcss` + `vercel-react-best-practices` | `shadcn` + `shadcn-ui` | -| Routing | `tanstack-router-best-practices` | `tanstack` | -| Data fetching | `tanstack-query-best-practices` + `app-renderer-systems` | | -| State management | `zustand` | | -| Schema / Validation | `zod` | `typescript-advanced` | -| Web testing | `vitest` + `react` + `testing-anti-patterns` | | -| TypeScript (types) | `typescript-advanced` | `context7` | -| UI / UX Design (generic) | `frontend-design` + `design-taste-frontend` | `interface-design` + `shadcn-ui` + `minimalist-ui` | -| **AGH UI / Redesign tasks** | `agh-design` + `design-taste-frontend` + `minimalist-ui` | `frontend-design` + `interface-design` | -| Storybook / component stories | `storybook-stories` | `shadcn-ui` | -| Animation / motion | `motion-react` | `motion` | -| Component patterns | `vercel-composition-patterns` + `vercel-react-best-practices` | | -| AI / Streaming | `ai-sdk` | `tanstack-query-best-practices` | -| Bug fix | `systematic-debugging` + `no-workarounds` | `testing-anti-patterns` | -| Design polish passes | `impeccable:polish` + `impeccable:layout` + `impeccable:typeset` | `impeccable:delight` + `impeccable:critique` | -| External docs lookup | `context7` + `find-docs` | `exa-web-search-free` | -| Task completion | `cy-final-verify` | | +| Domain | Required Skills | Conditional Skills | +| ----------------------------- | ---------------------------------------------------------------- | ----------------------------------------------- | +| React / Web UI | `react` + `tailwindcss` + `vercel-react-best-practices` | `shadcn` | +| Routing | `tanstack-router-best-practices` | `tanstack` | +| Data fetching | `tanstack-query-best-practices` + `app-renderer-systems` | | +| State management | `zustand` | | +| Schema / Validation | `zod` | `typescript-advanced` | +| Web testing | `vitest` + `react` + `testing-anti-patterns` | | +| TypeScript (types) | `typescript-advanced` | `context7` | +| UI / UX Design (generic) | `frontend-design` + `design-taste-frontend` | `interface-design` + `shadcn` + `minimalist-ui` | +| **AGH UI / Redesign tasks** | `agh-design` + `design-taste-frontend` + `minimalist-ui` | `frontend-design` + `interface-design` | +| Storybook / component stories | `storybook-stories` | `shadcn` | +| Animation / motion | `motion-react` | `motion` | +| Component patterns | `vercel-composition-patterns` + `vercel-react-best-practices` | | +| AI / Streaming | `ai-sdk` | `tanstack-query-best-practices` | +| Bug fix | `systematic-debugging` + `no-workarounds` | `testing-anti-patterns` | +| Design polish passes | `impeccable:polish` + `impeccable:layout` + `impeccable:typeset` | `impeccable:delight` + `impeccable:critique` | +| External docs lookup | `context7` + `find-docs` | `exa-web-search-free` | +| Task completion | `cy-final-verify` | | **Redesign tasks (`.compozy/tasks/redesign/*`)**: you MUST run the `designer` agent in execution mode (not plan mode) AND activate `agh-design` + `design-taste-frontend` + `minimalist-ui` before touching any component. `DESIGN.md` tokens win over anything informal already in the codebase. diff --git a/web/CLAUDE.md b/web/CLAUDE.md index 38b130610..ac5edea4c 100644 --- a/web/CLAUDE.md +++ b/web/CLAUDE.md @@ -22,6 +22,7 @@ No production users exist. Never sacrifice code quality for backward compatibili - **`make web-lint` and `make web-typecheck` MUST pass** before completing ANY web task. Zero warnings, zero errors. - **Oxlint has zero tolerance** — any warning is a blocking failure - **Follow shadcn kebab-case naming** for all files in `web/` +- **Native DOM wrappers** — if a component’s root is a single native element (`button`, `input`, `a`, …), its props MUST extend that element’s intrinsic type (`React.ComponentProps<"…">`), merge `className`, and spread `{...props}` onto the node (use `forwardRef` when refs apply). CVA + `VariantProps`: follow the `shadcn` skill. Canonical rule: `.agents/skills/react/SKILL.md` → _Extend native element props_. - **Never add JS dependencies by hand in `package.json`** — always use `bun add` - **Check dependent package APIs** before writing integration code or tests - **Local QA against an isolated daemon MUST read `AGH_WEB_API_PROXY_TARGET` from the active bootstrap manifest/env** — never hardcode `http://localhost:2123` when `agh-qa-bootstrap` or another isolated QA envelope is in use. @@ -30,25 +31,25 @@ No production users exist. Never sacrifice code quality for backward compatibili Activate skills **before** writing code. Match task domain → activate all required skills: -| Domain | Required Skills | Conditional Skills | -| ----------------------------- | ---------------------------------------------------------------- | -------------------------------------------------- | -| React / Web UI | `react` + `tailwindcss` + `vercel-react-best-practices` | `shadcn` + `shadcn-ui` | -| Routing | `tanstack-router-best-practices` | `tanstack` | -| Data fetching | `tanstack-query-best-practices` + `app-renderer-systems` | | -| State management | `zustand` | | -| Schema / Validation | `zod` | `typescript-advanced` | -| Web testing | `vitest` + `react` + `testing-anti-patterns` | | -| TypeScript (types) | `typescript-advanced` | `context7` | -| UI / UX Design (generic) | `frontend-design` + `design-taste-frontend` | `interface-design` + `shadcn-ui` + `minimalist-ui` | -| **AGH UI / Redesign tasks** | `agh-design` + `design-taste-frontend` + `minimalist-ui` | `frontend-design` + `interface-design` | -| Storybook / component stories | `storybook-stories` | `shadcn-ui` | -| Animation / motion | `motion-react` | `motion` | -| Component patterns | `vercel-composition-patterns` + `vercel-react-best-practices` | | -| AI / Streaming | `ai-sdk` | `tanstack-query-best-practices` | -| Bug fix | `systematic-debugging` + `no-workarounds` | `testing-anti-patterns` | -| Design polish passes | `impeccable:polish` + `impeccable:layout` + `impeccable:typeset` | `impeccable:delight` + `impeccable:critique` | -| External docs lookup | `context7` + `find-docs` | `exa-web-search-free` | -| Task completion | `cy-final-verify` | | +| Domain | Required Skills | Conditional Skills | +| ----------------------------- | ---------------------------------------------------------------- | ----------------------------------------------- | +| React / Web UI | `react` + `tailwindcss` + `vercel-react-best-practices` | `shadcn` | +| Routing | `tanstack-router-best-practices` | `tanstack` | +| Data fetching | `tanstack-query-best-practices` + `app-renderer-systems` | | +| State management | `zustand` | | +| Schema / Validation | `zod` | `typescript-advanced` | +| Web testing | `vitest` + `react` + `testing-anti-patterns` | | +| TypeScript (types) | `typescript-advanced` | `context7` | +| UI / UX Design (generic) | `frontend-design` + `design-taste-frontend` | `interface-design` + `shadcn` + `minimalist-ui` | +| **AGH UI / Redesign tasks** | `agh-design` + `design-taste-frontend` + `minimalist-ui` | `frontend-design` + `interface-design` | +| Storybook / component stories | `storybook-stories` | `shadcn` | +| Animation / motion | `motion-react` | `motion` | +| Component patterns | `vercel-composition-patterns` + `vercel-react-best-practices` | | +| AI / Streaming | `ai-sdk` | `tanstack-query-best-practices` | +| Bug fix | `systematic-debugging` + `no-workarounds` | `testing-anti-patterns` | +| Design polish passes | `impeccable:polish` + `impeccable:layout` + `impeccable:typeset` | `impeccable:delight` + `impeccable:critique` | +| External docs lookup | `context7` + `find-docs` | `exa-web-search-free` | +| Task completion | `cy-final-verify` | | **Redesign tasks (`.compozy/tasks/redesign/*`)**: you MUST run the `designer` agent in execution mode (not plan mode) AND activate `agh-design` + `design-taste-frontend` + `minimalist-ui` before touching any component. `DESIGN.md` tokens win over anything informal already in the codebase. diff --git a/web/src/components/app-sidebar.test.tsx b/web/src/components/app-sidebar.test.tsx index 7d02b6fc8..d22049085 100644 --- a/web/src/components/app-sidebar.test.tsx +++ b/web/src/components/app-sidebar.test.tsx @@ -85,7 +85,6 @@ function makeProps(overrides: Partial = {}): AppSidebarProps { collapsed: false, onCollapseChange, workspaces, - activeWorkspace: workspaces[0], activeWorkspaceId: "ws_alpha", onSelectWorkspace, onAddWorkspace, @@ -109,21 +108,10 @@ describe("AppSidebar", () => { }); describe("Header", () => { - it("surfaces the active workspace name", () => { + it("does not render a sidebar header slot — workspace identity lives in the rail", () => { renderSidebar(makeProps()); - expect(screen.getByTestId("sidebar-workspace-name")).toHaveTextContent("alpha"); - }); - - it("removes the non-functional sidebar search affordances", () => { - renderSidebar(makeProps()); - expect(screen.queryByRole("button", { name: "Search" })).not.toBeInTheDocument(); - expect(screen.queryByText("Search…")).not.toBeInTheDocument(); - }); - - it("no longer carries the wordmark (now owned by the global app shell)", () => { - renderSidebar(makeProps()); - expect(screen.queryByTestId("sidebar-wordmark")).not.toBeInTheDocument(); - expect(screen.queryByTestId("sidebar-alpha-chip")).not.toBeInTheDocument(); + expect(screen.queryByTestId("sidebar-workspace-name")).not.toBeInTheDocument(); + expect(document.querySelector('[data-slot="sidebar-header"]')).toBeNull(); }); }); @@ -184,9 +172,7 @@ describe("AppSidebar", () => { }); it("still renders the + affordance when there are no workspaces", () => { - renderSidebar( - makeProps({ workspaces: [], activeWorkspace: undefined, activeWorkspaceId: null }) - ); + renderSidebar(makeProps({ workspaces: [], activeWorkspaceId: null })); expect(screen.getByTestId("add-workspace-btn")).toBeInTheDocument(); expect(screen.queryByTestId(/^workspace-avatar-/)).not.toBeInTheDocument(); }); diff --git a/web/src/components/app-sidebar.tsx b/web/src/components/app-sidebar.tsx index 27800a8ce..f945fe21d 100644 --- a/web/src/components/app-sidebar.tsx +++ b/web/src/components/app-sidebar.tsx @@ -15,16 +15,14 @@ import { type LucideIcon, } from "lucide-react"; -import { - cn, - ConnectionIndicator, - Logo, - type ConnectionStatus, - Sidebar, - SidebarSectionLabel, - StatusDot, -} from "@agh/ui"; +import { cn, Logo, Sidebar, SidebarSectionLabel, Pill } from "@agh/ui"; +import { ConnectionIndicator, type ConnectionStatus } from "@/components/connection-indicator"; +import { + ACTIVE_NAV_INDICATOR_CLASS, + ACTIVE_NAV_ROW_CLASS, + NAV_ROW_CLASS, +} from "@/components/sidebar-nav-classes"; import { AgentIcon, type AgentPayload } from "@/systems/agent"; import type { SessionPayload } from "@/systems/session"; import type { WorkspacePayload } from "@/systems/workspace"; @@ -88,28 +86,6 @@ function RailSlot({ ); } -interface HeaderSlotProps { - activeWorkspace: WorkspacePayload | undefined; -} - -function HeaderSlot({ activeWorkspace }: HeaderSlotProps) { - return ( - - {activeWorkspace?.name ?? ""} - - ); -} - -const NAV_ROW_CLASS = - "relative flex items-center gap-2 rounded-[6px] px-2 py-1.5 text-[13px] text-[color:var(--color-text-secondary)] transition-colors hover:bg-[color:var(--color-hover)] hover:text-[color:var(--color-text-primary)]"; -const ACTIVE_NAV_ROW_CLASS = - "bg-[color:var(--color-surface)] font-medium text-[color:var(--color-text-primary)]"; -const ACTIVE_NAV_INDICATOR_CLASS = - "absolute -left-2 top-1.5 bottom-1.5 w-[2px] rounded-r-[2px] bg-[color:var(--color-accent)]"; - interface NavItemProps { to: string; icon: LucideIcon; @@ -174,7 +150,7 @@ function AgentItem({ agent, hasActiveSession }: AgentItemProps) { /> {agent.name} {hasActiveSession ? ( - void; workspaces: WorkspacePayload[] | undefined; - activeWorkspace: WorkspacePayload | undefined; activeWorkspaceId: string | null; onSelectWorkspace: (id: string) => void; onAddWorkspace: () => void; @@ -355,7 +330,6 @@ function AppSidebar({ collapsed, onCollapseChange, workspaces, - activeWorkspace, activeWorkspaceId, onSelectWorkspace, onAddWorkspace, @@ -380,7 +354,6 @@ function AppSidebar({ onAddWorkspace={onAddWorkspace} /> } - header={} nav={ { @@ -14,7 +10,7 @@ export interface ConnectionIndicatorProps extends React.ComponentProps<"div"> { } interface StatusConfig { - tone: StatusDotTone; + tone: PillTone; label: string; pulse: boolean; } @@ -26,10 +22,16 @@ const STATUS_CONFIG: Record = { }; /** - * StatusDot + mono label composite — one canonical shape for daemon / socket - * connection state across the operator UI. Mirrors DESIGN.md §4 "Status Indicators". + * `Pill.Dot` + monospace label composite — canonical chrome for daemon / + * socket connection state across the operator UI. Wraps the dot in an + * `aria-live=polite` region so screen readers announce reconnects. */ -function ConnectionIndicator({ status, label, className, ...props }: ConnectionIndicatorProps) { +export function ConnectionIndicator({ + status, + label, + className, + ...props +}: ConnectionIndicatorProps) { const config = STATUS_CONFIG[status]; return (
- + {label ?? config.label}
); } - -export { ConnectionIndicator }; diff --git a/web/src/components/design-system-showcase.test.tsx b/web/src/components/design-system-showcase.test.tsx index 171531b59..383cf318d 100644 --- a/web/src/components/design-system-showcase.test.tsx +++ b/web/src/components/design-system-showcase.test.tsx @@ -46,6 +46,24 @@ function extractAghTokens(source: string): string[] { return [...tokens]; } +function extractTokenValueMap(source: string): Map { + const values = new Map(); + const rootMatch = source.match(/:root\s*{([\s\S]*?)}/); + if (!rootMatch) return values; + const body = rootMatch[1]; + for (const line of body.split("\n")) { + const match = line.match(/^\s*(--[a-z0-9-]+)\s*:\s*(.+?);\s*(?:\/\*[^*]*\*\/)?\s*$/i); + if (!match) continue; + const [, name, rawValue] = match; + values.set(name, rawValue.trim()); + } + return values; +} + +function normalizeTokenValue(value: string): string { + return value.trim().toLowerCase(); +} + describe("DesignSystemShowcase", () => { describe("rendering", () => { it("renders the page header, filter toolbar, and search input", () => { @@ -184,10 +202,27 @@ describe("DesignSystemShowcase", () => { expect(kinds.has("easing")).toBe(true); expect(kinds.has("tracking")).toBe(true); }); + + it("keeps rendered swatch metadata synchronized with tokens.css values", () => { + const tokenValues = extractTokenValueMap(TOKENS_SOURCE); + const mismatches = TOKEN_GROUPS.flatMap(group => + group.swatches.flatMap(swatch => { + const expected = tokenValues.get(swatch.token); + if (!expected) { + return []; + } + return normalizeTokenValue(expected) === normalizeTokenValue(swatch.value) + ? [] + : [`${swatch.token}: showcase=${swatch.value} tokens=${expected}`]; + }) + ); + + expect(mismatches).toEqual([]); + }); }); describe("file content contract", () => { - it("imports only from @agh/ui + lucide-react + react (no local UI primitives)", () => { + it("imports only from @agh/ui + lucide-react + react + the local helpers that compose @agh/ui Pill primitives", () => { const specifierRegex = /from\s+["']([^"']+)["']/g; const sources = new Set(); for (const match of SHOWCASE_SOURCE.matchAll(specifierRegex)) { @@ -196,7 +231,13 @@ describe("DesignSystemShowcase", () => { expect(sources.has("@agh/ui")).toBe(true); expect(sources.has("lucide-react")).toBe(true); expect(sources.has("react")).toBe(true); - const allowed = new Set(["@agh/ui", "lucide-react", "react"]); + const allowed = new Set([ + "@agh/ui", + "lucide-react", + "react", + "@/components/connection-indicator", + "@/systems/network", + ]); const forbidden = [...sources].filter(specifier => { if (allowed.has(specifier)) return false; return true; diff --git a/web/src/components/design-system-showcase.tsx b/web/src/components/design-system-showcase.tsx index df9b97ec3..47e11bfb7 100644 --- a/web/src/components/design-system-showcase.tsx +++ b/web/src/components/design-system-showcase.tsx @@ -45,7 +45,6 @@ import { Collapsible, CollapsibleContent, CollapsibleTrigger, - ConnectionIndicator, Dialog, DialogClose, DialogContent, @@ -75,15 +74,13 @@ import { ItemTitle, Kbd, KbdGroup, - KindChip, Label, Metric, - MonoBadge, + Pill, NativeSelect, NativeSelectOption, PageHeader, - Pill, - Pills, + PillGroup, Popover, PopoverContent, PopoverDescription, @@ -114,7 +111,6 @@ import { Skeleton, SplitPane, Spinner, - StatusDot, Switch, Table, TableBody, @@ -139,6 +135,9 @@ import { TooltipTrigger, } from "@agh/ui"; +import { ConnectionIndicator } from "@/components/connection-indicator"; +import { KindChip } from "@/systems/network"; + const DESIGN_MD_BASE = "https://github.com/compozy/agh/blob/main/DESIGN.md"; type SwatchKind = "color" | "radius" | "duration" | "easing" | "tracking"; @@ -295,6 +294,36 @@ const TOKEN_GROUPS: TokenGroup[] = [ }, ], }, + { + id: "protocol-kinds", + label: "Protocol Kind Colors", + caption: + "Kind-dot colors for `say`, `greet`, `direct`, `receipt`, `capability`, `trace`, and `whois`.", + swatches: [ + { token: "--color-kind-say", value: "#8E8E93", role: "say", kind: "color" }, + { token: "--color-kind-greet", value: "#5BA6FF", role: "greet", kind: "color" }, + { + token: "--color-kind-direct", + value: "var(--color-accent)", + role: "direct", + kind: "color", + }, + { + token: "--color-kind-receipt", + value: "var(--color-success)", + role: "receipt", + kind: "color", + }, + { + token: "--color-kind-capability", + value: "var(--color-warning)", + role: "capability", + kind: "color", + }, + { token: "--color-kind-trace", value: "#B892FF", role: "trace", kind: "color" }, + { token: "--color-kind-whois", value: "#4FD1C5", role: "whois", kind: "color" }, + ], + }, { id: "radii", label: "Radii", @@ -353,6 +382,12 @@ const TOKEN_GROUPS: TokenGroup[] = [ role: "Mono eyebrow tracking", kind: "tracking", }, + { + token: "--tracking-badge", + value: "0.08em", + role: "Badge uppercase tracking", + kind: "tracking", + }, ], }, ]; @@ -442,7 +477,7 @@ function DesignSystemShowcase() { /> - setFilter(next)} items={FILTERS.map(item => ({ label: item.label, value: item.value }))} @@ -486,7 +521,7 @@ function FoundationsTokenSection() { id="foundations" data-testid="section-foundations" label={Foundations — Tokens} - right={tokens.css} + right={tokens.css} >
{TOKEN_GROUPS.map(group => ( @@ -588,7 +623,7 @@ function TypographySection() { id="typography" data-testid="section-typography" label={Foundations — Typography} - right={Inter · JetBrains Mono · NuixyberNext} + right={Inter · JetBrains Mono · NuixyberNext} >
@@ -623,7 +658,7 @@ function TypographySection() { agh - + Alpha
@@ -641,7 +676,11 @@ function ButtonsAndPillsSection() { id="buttons" data-testid="section-buttons" label={Buttons & Pills} - right={action} + right={ + + action + + } >
@@ -677,14 +716,14 @@ function ButtonsAndPillsSection() { Outline
- Neutral - Action - Stable - Pending - Error - Info + Neutral + Action + Stable + Pending + Error + Info
- Inputs & Search} - right={form primitives} + right={form primitives} >
@@ -797,7 +836,11 @@ function StatusAndMetricSection() { id="status" data-testid="section-status" label={Status, Metric, MonoBadge, KindChip} - right={signal} + right={ + + signal + + } >
@@ -811,15 +854,15 @@ function StatusAndMetricSection() {
- + Connected
- + Reconnecting
- + Disconnected
@@ -827,13 +870,27 @@ function StatusAndMetricSection() {
- id_01HQ… - idle - RUNNING - DONE - PARTIAL - ERROR - INFO + + id_01HQ… + + + idle + + + RUNNING + + + DONE + + + PARTIAL + + + ERROR + + + INFO +
{KINDS.map(kind => ( @@ -864,7 +921,11 @@ function FeedbackSection() { id="feedback" data-testid="section-feedback" label={Feedback (Alert, Empty, Toaster)} - right={state} + right={ + + state + + } >
@@ -914,7 +975,11 @@ function OverlaysSection() { id="overlays" data-testid="section-overlays" label={Dialog · Sheet · Popover · Tooltip} - right={motion} + right={ + + motion + + } >
@@ -1066,7 +1131,11 @@ agh session list --active`; id="code-chat" data-testid="section-code-chat" label={Code & Chat} - right={session shells} + right={ + + session shells + + } >
@@ -1078,7 +1147,7 @@ agh session list --active`; role="agent" meta={ <> - + CLAUDE · 10:42 } @@ -1105,7 +1174,7 @@ function LayoutSection() { id="layout" data-testid="section-layout" label={Sidebar & SplitPane} - right={layout} + right={layout} >
@@ -1206,7 +1275,9 @@ function LayoutSection() { Version - v0.4.2 + + v0.4.2 + diff --git a/web/src/components/sidebar-nav-classes.ts b/web/src/components/sidebar-nav-classes.ts new file mode 100644 index 000000000..e5a7b2fa9 --- /dev/null +++ b/web/src/components/sidebar-nav-classes.ts @@ -0,0 +1,8 @@ +export const NAV_ROW_CLASS = + "relative flex items-center gap-2 rounded-[6px] px-2 py-1.5 text-[13px] text-[color:var(--color-text-secondary)] transition-colors hover:bg-[color:var(--color-hover)] hover:text-[color:var(--color-text-primary)]"; + +export const ACTIVE_NAV_ROW_CLASS = + "bg-[color:var(--color-surface)] font-medium text-[color:var(--color-text-primary)]"; + +export const ACTIVE_NAV_INDICATOR_CLASS = + "absolute -left-2 top-1.5 bottom-1.5 w-[2px] rounded-r-[2px] bg-[color:var(--color-accent)]"; diff --git a/web/src/components/stories/app-sidebar.stories.tsx b/web/src/components/stories/app-sidebar.stories.tsx index 3cff848c3..1116ab71b 100644 --- a/web/src/components/stories/app-sidebar.stories.tsx +++ b/web/src/components/stories/app-sidebar.stories.tsx @@ -31,21 +31,17 @@ function AppSidebarHarness({ defaultCollapsed = false, defaultWorkspaceId, activeWorkspaceId, - activeWorkspace, ...rest }: StoryArgs) { const [collapsed, setCollapsed] = useState(defaultCollapsed); const [workspaceId, setWorkspaceId] = useState( defaultWorkspaceId ?? activeWorkspaceId ?? null ); - const resolvedActive = - rest.workspaces?.find(ws => ws.id === workspaceId) ?? activeWorkspace ?? undefined; return ( = { docs: { description: { component: - "Thin composition over `@agh/ui` `Sidebar`. The rail owns the workspace switcher, the header surfaces the active workspace name + search, the nav owns the agent tree and workspace nav, and the footer owns the connection indicator + settings link. The global `agh` wordmark lives in the app-shell header one level up.", + "Thin composition over `@agh/ui` `Sidebar`. The rail owns the workspace switcher, the nav owns the agent tree and workspace nav, and the footer owns the connection indicator + settings link. The global `agh` wordmark lives in the app-shell header one level up.", }, }, }, args: { workspaces: workspaceFixtures, activeWorkspaceId: workspaceFixtures[1].id, - activeWorkspace: workspaceFixtures[1], onAddWorkspace: () => undefined, health: { version: "0.4.1" }, connectionStatus: "connected", @@ -104,7 +99,6 @@ export const Collapsed: Story = { export const NoWorkspaces: Story = { args: { workspaces: [], - activeWorkspace: undefined, activeWorkspaceId: null, defaultWorkspaceId: null, agents: [], diff --git a/web/src/generated/agh-openapi.d.ts b/web/src/generated/agh-openapi.d.ts index 1afd47aad..84bde5f32 100644 --- a/web/src/generated/agh-openapi.d.ts +++ b/web/src/generated/agh-openapi.d.ts @@ -1177,6 +1177,23 @@ export interface paths { patch?: never; trace?: never; }; + "/api/sessions/{id}/repair": { + parameters: { + query?: never; + header?: never; + path?: never; + cookie?: never; + }; + get?: never; + put?: never; + /** Inspect and repair an interrupted session transcript */ + post: operations["repairSession"]; + delete?: never; + options?: never; + head?: never; + patch?: never; + trace?: never; + }; "/api/sessions/{id}/resume": { parameters: { query?: never; @@ -12860,6 +12877,93 @@ export interface operations { }; }; }; + repairSession: { + parameters: { + query?: { + /** @description Report planned repairs without persisting new events */ + dry_run?: boolean; + /** @description Allow repair for stopped sessions whose stop reason is not crash or error */ + force?: boolean; + }; + header?: never; + path: { + /** @description Session id */ + id: string; + }; + cookie?: never; + }; + requestBody?: never; + responses: { + /** @description OK */ + 200: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + repair: { + actions: { + code: string; + event_id?: string; + persisted: boolean; + tool_call_id?: string; + tool_name?: string; + turn_id: string; + }[]; + issues: { + code: string; + detail?: string; + event_id?: string; + severity: string; + turn_id?: string; + }[]; + persisted: boolean; + session_id: string; + }; + }; + }; + }; + /** @description Invalid repair options */ + 400: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Session not found */ + 404: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + /** @description Internal server error */ + 500: { + headers: { + [name: string]: unknown; + }; + content: { + "application/json": { + error: string; + }; + }; + }; + default: { + headers: { + [name: string]: unknown; + }; + content?: never; + }; + }; + }; resumeSession: { parameters: { query?: never; diff --git a/web/src/hooks/routes/use-home-page.ts b/web/src/hooks/routes/use-home-page.ts index c60b24c8b..6344f85ca 100644 --- a/web/src/hooks/routes/use-home-page.ts +++ b/web/src/hooks/routes/use-home-page.ts @@ -1,6 +1,8 @@ import { useMemo } from "react"; -import type { ConnectionStatus, StatusDotTone } from "@agh/ui"; +import type { PillTone } from "@agh/ui"; + +import type { ConnectionStatus } from "@/components/connection-indicator"; import { useAgents } from "@/systems/agent"; import { useDaemonHealth } from "@/systems/daemon"; @@ -12,7 +14,7 @@ export type DaemonStatusKey = "healthy" | "degraded" | "disconnected" | "unknown interface DaemonStatusDescriptor { key: DaemonStatusKey; - tone: StatusDotTone; + tone: PillTone; label: string; description: string; } diff --git a/web/src/lib/kind-colors.ts b/web/src/lib/kind-colors.ts new file mode 100644 index 000000000..5dbb0449e --- /dev/null +++ b/web/src/lib/kind-colors.ts @@ -0,0 +1,19 @@ +/** + * Wire-protocol kind → leading-dot color map. + * Each protocol kind (`say`, `greet`, `direct`, …) is identified visually by + * a 7px colored dot rendered ahead of the kind label. Unknown kinds (platform + * names, event ids) render without a dot. + */ +export const KIND_COLORS: Record = { + say: "var(--color-kind-say)", + greet: "var(--color-kind-greet)", + direct: "var(--color-kind-direct)", + receipt: "var(--color-kind-receipt)", + capability: "var(--color-kind-capability)", + trace: "var(--color-kind-trace)", + whois: "var(--color-kind-whois)", +}; + +export function kindColorFor(kind: string): string | undefined { + return KIND_COLORS[kind.toLowerCase()]; +} diff --git a/web/src/lib/pill-variant.ts b/web/src/lib/pill-variant.ts index 6e7f6f473..600476f65 100644 --- a/web/src/lib/pill-variant.ts +++ b/web/src/lib/pill-variant.ts @@ -1,9 +1,9 @@ -import type { PillVariant } from "@agh/ui"; +import type { PillTone } from "@agh/ui"; /** * Legacy tone strings emitted by `*-formatters.ts` helpers across domain systems. - * Maps the historical `design-system/pill` tone palette onto the new `@agh/ui` - * `Pill`/`Pills` semantic variant system. + * Maps the historical `design-system/pill` tone palette onto the unified `@agh/ui` + * `Pill` `tone` system. */ export type LegacyPillTone = | "neutral" @@ -14,7 +14,7 @@ export type LegacyPillTone = | "accent" | "warning"; -export function pillVariantFromTone(tone: LegacyPillTone | null | undefined): PillVariant { +export function pillToneFromLegacyTone(tone: LegacyPillTone | null | undefined): PillTone { switch (tone) { case "amber": case "accent": @@ -29,6 +29,6 @@ export function pillVariantFromTone(tone: LegacyPillTone | null | undefined): Pi return "warning"; case "neutral": default: - return "default"; + return "neutral"; } } diff --git a/web/src/routes/_app.tsx b/web/src/routes/_app.tsx index 5ee8e8257..9de93590b 100644 --- a/web/src/routes/_app.tsx +++ b/web/src/routes/_app.tsx @@ -43,7 +43,6 @@ function AppLayout() { collapsed={page.collapsed} onCollapseChange={page.setCollapsed} workspaces={page.areWorkspacesLoading || page.workspacesError ? undefined : page.workspaces} - activeWorkspace={page.activeWorkspace} activeWorkspaceId={page.activeWorkspaceId} onSelectWorkspace={page.setActiveWorkspaceId} onAddWorkspace={page.openWorkspaceSetup} diff --git a/web/src/routes/_app/-tasks.test.tsx b/web/src/routes/_app/-tasks.test.tsx index dfb7c8e0c..13b318041 100644 --- a/web/src/routes/_app/-tasks.test.tsx +++ b/web/src/routes/_app/-tasks.test.tsx @@ -196,7 +196,7 @@ describe("TasksRoute", () => { expect(await screen.findByTestId("tasks-inbox-view")).toBeInTheDocument(); expect(screen.getByTestId("tasks-open-create")).toBeInTheDocument(); const inboxTab = screen.getByTestId("tasks-mode-inbox"); - expect(inboxTab.querySelector('[data-slot="pills-badge"]')).toHaveTextContent("1"); + expect(inboxTab.querySelector('[data-slot="pill-group-badge"]')).toHaveTextContent("1"); expect(screen.getByTestId("tasks-inbox-group-approvals")).toBeInTheDocument(); fireEvent.click(screen.getByTestId("tasks-inbox-item-approve-task_apr")); diff --git a/web/src/routes/_app/bridges.tsx b/web/src/routes/_app/bridges.tsx index 15c6a112e..39870b1b0 100644 --- a/web/src/routes/_app/bridges.tsx +++ b/web/src/routes/_app/bridges.tsx @@ -1,7 +1,7 @@ import { AlertCircle, Loader2, Plus, Waypoints } from "lucide-react"; import { createFileRoute } from "@tanstack/react-router"; -import { Button, Empty, PageHeader, Pills, SplitPane } from "@agh/ui"; +import { Button, Empty, PageHeader, PillGroup, SplitPane } from "@agh/ui"; import { BridgeCreateDialog, BridgeDetailPanel, @@ -64,7 +64,7 @@ function BridgesPage() { ); const controls = ( - + v{page.daemonVersion} - + ) : null } > @@ -103,7 +95,7 @@ function DaemonStatusSection({ page }: { page: HomePageView }) { data-status={page.daemonStatus.key} >
-
- {profile.backend} + + {profile.backend} + {backendLabel(profile.backend)} diff --git a/web/src/routes/_app/settings.tsx b/web/src/routes/_app/settings.tsx index aa2b2a391..d16be748b 100644 --- a/web/src/routes/_app/settings.tsx +++ b/web/src/routes/_app/settings.tsx @@ -10,7 +10,12 @@ import { import { AlertTriangle, RefreshCw, Settings as SettingsIcon } from "lucide-react"; import type { ComponentType, ReactNode } from "react"; -import { Button, Empty, buttonVariants, cn } from "@agh/ui"; +import { Button, Empty, SidebarSectionLabel, buttonVariants, cn } from "@agh/ui"; +import { + ACTIVE_NAV_INDICATOR_CLASS, + ACTIVE_NAV_ROW_CLASS, + NAV_ROW_CLASS, +} from "@/components/sidebar-nav-classes"; import { SETTINGS_ROOT_PATH, SETTINGS_SECTIONS, @@ -97,15 +102,11 @@ function SettingsSectionNav() { return (
- + aria-label="Room detail tabs" items={[ { value: "about", label: "About" }, @@ -909,9 +959,13 @@ export function NetworkWorkspaceShell({ {activeRoom.capabilities.map(capability => (
- {capability.id} + + {capability.id} + {capability.detail?.version ? ( - {capability.detail.version} + + {capability.detail.version} + ) : null}

@@ -933,7 +987,7 @@ export function NetworkWorkspaceShell({ activeRoom.members.map(member => (

- +

{member.title} @@ -942,7 +996,9 @@ export function NetworkWorkspaceShell({ {member.subtitle}

- {member.local ? "local" : "remote"} + + {member.local ? "local" : "remote"} +
{member.lastSeen ? (

diff --git a/web/src/systems/network/index.ts b/web/src/systems/network/index.ts index 621ac54e7..f9c79d087 100644 --- a/web/src/systems/network/index.ts +++ b/web/src/systems/network/index.ts @@ -120,5 +120,6 @@ export { export { useCreateNetworkChannel, useSendNetworkMessage } from "./hooks/use-network-actions"; // Components +export { KindChip } from "./components/kind-chip"; export { NetworkCreateChannelDialog } from "./components/network-create-channel-dialog"; export { NetworkWorkspaceShell } from "./components/network-workspace-shell"; diff --git a/web/src/systems/session/adapters/session-api.test.ts b/web/src/systems/session/adapters/session-api.test.ts index e5b739e88..9f0652800 100644 --- a/web/src/systems/session/adapters/session-api.test.ts +++ b/web/src/systems/session/adapters/session-api.test.ts @@ -8,6 +8,8 @@ import { } from "@/test/fetch-test-utils"; import { + SessionApiError, + SessionNotFoundError, cancelSessionPrompt, clearSessionConversation, createSession, @@ -16,6 +18,7 @@ import { fetchSessionEvents, fetchSessionTranscript, fetchSessions, + repairSession, resumeSession, stopSession, } from "./session-api"; @@ -31,6 +34,27 @@ const mockSession = { updated_at: "2026-04-01T01:00:00Z", }; +const mockRepair = { + session_id: "sess-001", + issues: [ + { + code: "event_sequence_gap", + severity: "warning", + turn_id: "turn-1", + detail: "gap before sequence 4", + }, + ], + actions: [ + { + code: "append_terminal_error", + turn_id: "turn-1", + event_id: "ev-repair-1", + persisted: false, + }, + ], + persisted: false, +}; + beforeEach(() => { vi.stubGlobal("fetch", vi.fn()); }); @@ -304,6 +328,53 @@ describe("resumeSession", () => { }); }); +describe("repairSession", () => { + it("calls POST repair endpoint with query flags and returns the repair payload", async () => { + mockJsonResponse({ repair: mockRepair }); + + const result = await repairSession("sess-001", { dry_run: true, force: true }); + + expect(result).toEqual(mockRepair); + const request = fetchRequest(); + const url = new URL(request.url); + expect(request.method).toBe("POST"); + expect(url.pathname).toBe("/api/sessions/sess-001/repair"); + expect(url.searchParams.get("dry_run")).toBe("true"); + expect(url.searchParams.get("force")).toBe("true"); + }); + + it("throws 404 error for unknown session", async () => { + vi.mocked(globalThis.fetch).mockResolvedValue(new Response(null, { status: 404 })); + + await expect(repairSession("unknown")).rejects.toBeInstanceOf(SessionNotFoundError); + await expect(repairSession("unknown")).rejects.toThrow("Session not found: unknown"); + }); + + it("throws typed adapter error for non-404 failures", async () => { + vi.mocked(globalThis.fetch).mockResolvedValue(new Response(null, { status: 500 })); + + await expect(repairSession("sess-001")).rejects.toBeInstanceOf(SessionApiError); + await expect(repairSession("sess-001")).rejects.toMatchObject({ + message: 'Failed to repair session "sess-001": 500', + status: 500, + sessionId: "sess-001", + }); + }); + + it("passes abort signal to fetch", async () => { + mockJsonResponse({ repair: mockRepair }); + + const controller = new AbortController(); + await repairSession("sess-001", {}, controller.signal); + + await expectFetchRequest({ + method: "POST", + path: "/api/sessions/sess-001/repair", + signal: controller.signal, + }); + }); +}); + describe("clearSessionConversation", () => { it("calls POST clear endpoint and returns the refreshed session payload", async () => { mockJsonResponse({ session: mockSession }); diff --git a/web/src/systems/session/adapters/session-api.ts b/web/src/systems/session/adapters/session-api.ts index b4fc774ad..99b6a1fcc 100644 --- a/web/src/systems/session/adapters/session-api.ts +++ b/web/src/systems/session/adapters/session-api.ts @@ -12,6 +12,8 @@ import type { SessionMessage, SessionEventPayload, SessionPayload, + SessionRepairPayload, + SessionRepairQuery, TurnHistoryPayload, } from "../types"; import { normalizeTranscriptMessages } from "../lib/message-schemas"; @@ -21,8 +23,43 @@ export type { CreateSessionParams, FetchSessionEventsParams, PermissionDecision, + SessionRepairQuery, } from "../types"; +export class SessionApiError extends Error { + constructor( + message: string, + public readonly status: number, + public readonly sessionId?: string + ) { + super(message); + this.name = "SessionApiError"; + } +} + +export class SessionNotFoundError extends SessionApiError { + constructor(id: string) { + super(`Session not found: ${id}`, 404, id); + this.name = "SessionNotFoundError"; + } +} + +function throwSessionRequestError( + response: Response, + error: unknown, + fallback: string, + sessionId?: string +): never { + if (response.status === 404 && sessionId) { + throw new SessionNotFoundError(sessionId); + } + throw new SessionApiError( + defaultApiErrorMessage(fallback, response, error), + response.status, + sessionId + ); +} + export async function fetchSessions( workspace?: string, signal?: AbortSignal @@ -36,7 +73,7 @@ export async function fetchSessions( signal, }); if (apiRequestFailed(response, error)) { - throw new Error(defaultApiErrorMessage("Failed to fetch sessions", response, error)); + throwSessionRequestError(response, error, "Failed to fetch sessions"); } return requireResponseData(data, response, "Failed to fetch sessions").sessions; } @@ -51,9 +88,9 @@ export async function createSession( }); if (apiRequestFailed(response, error)) { if (response.status === 409) { - throw new Error("Max sessions reached"); + throw new SessionApiError("Max sessions reached", 409); } - throw new Error(defaultApiErrorMessage("Failed to create session", response, error)); + throwSessionRequestError(response, error, "Failed to create session"); } return requireResponseData(data, response, "Failed to create session").session; } @@ -64,10 +101,7 @@ export async function fetchSession(id: string, signal?: AbortSignal): Promise { + const { data, error, response } = await apiClient.POST("/api/sessions/{id}/repair", { + params: { + path: { id }, + query, + }, + signal, + }); + if (apiRequestFailed(response, error)) { + throwSessionRequestError(response, error, `Failed to repair session "${id}"`, id); + } + return requireResponseData(data, response, `Failed to repair session "${id}"`).repair; +} + function isPlainObject(value: unknown): value is Record { return typeof value === "object" && value !== null && !Array.isArray(value); } @@ -163,17 +210,29 @@ export async function clearSessionConversation( const response = await globalThis.fetch(request); if (!response.ok) { if (response.status === 404) { - throw new Error(`Session not found: ${id}`); + throw new SessionNotFoundError(id); } if (response.status === 409) { - throw new Error(`Cannot clear session "${id}" while a prompt is still running`); + throw new SessionApiError( + `Cannot clear session "${id}" while a prompt is still running`, + 409, + id + ); } - throw new Error(`Failed to clear session "${id}": ${response.status}`); + throw new SessionApiError( + `Failed to clear session "${id}": ${response.status}`, + response.status, + id + ); } const body: unknown = await response.json(); if (!isSessionEnvelope(body)) { - throw new Error(`Failed to clear session "${id}": invalid response payload`); + throw new SessionApiError( + `Failed to clear session "${id}": invalid response payload`, + response.status, + id + ); } return body.session; @@ -192,12 +251,7 @@ export async function fetchSessionEvents( signal, }); if (apiRequestFailed(response, error)) { - if (response.status === 404) { - throw new Error(`Session not found: ${id}`); - } - throw new Error( - defaultApiErrorMessage(`Failed to fetch session events "${id}"`, response, error) - ); + throwSessionRequestError(response, error, `Failed to fetch session events "${id}"`, id); } return requireResponseData(data, response, `Failed to fetch session events "${id}"`).events; } @@ -213,10 +267,7 @@ export async function approveSession( signal, }); if (apiRequestFailed(response, error)) { - if (response.status === 404) { - throw new Error(`Session not found: ${id}`); - } - throw new Error(defaultApiErrorMessage("Failed to approve permission", response, error)); + throwSessionRequestError(response, error, "Failed to approve permission", id); } } @@ -229,12 +280,7 @@ export async function fetchSessionHistory( signal, }); if (apiRequestFailed(response, error)) { - if (response.status === 404) { - throw new Error(`Session not found: ${id}`); - } - throw new Error( - defaultApiErrorMessage(`Failed to fetch session history "${id}"`, response, error) - ); + throwSessionRequestError(response, error, `Failed to fetch session history "${id}"`, id); } return requireResponseData(data, response, `Failed to fetch session history "${id}"`).history; } @@ -248,12 +294,7 @@ export async function fetchSessionTranscript( signal, }); if (apiRequestFailed(response, error)) { - if (response.status === 404) { - throw new Error(`Session not found: ${id}`); - } - throw new Error( - defaultApiErrorMessage(`Failed to fetch session transcript "${id}"`, response, error) - ); + throwSessionRequestError(response, error, `Failed to fetch session transcript "${id}"`, id); } const payload = requireResponseData(data, response, `Failed to fetch session transcript "${id}"`); diff --git a/web/src/systems/session/components/chat-header.test.tsx b/web/src/systems/session/components/chat-header.test.tsx index e0e97fd3a..1185f4490 100644 --- a/web/src/systems/session/components/chat-header.test.tsx +++ b/web/src/systems/session/components/chat-header.test.tsx @@ -50,7 +50,7 @@ describe("ChatHeader", () => { ); const dot = screen.getByTestId("agent-status-dot"); - expect(dot.getAttribute("data-slot")).toBe("status-dot"); + expect(dot.getAttribute("data-slot")).toBe("pill-dot"); expect(dot.getAttribute("data-tone")).toBe("success"); expect(dot.getAttribute("data-size")).toBe("md"); }); @@ -85,7 +85,7 @@ describe("ChatHeader", () => { const badge = screen.getByTestId("session-workspace-badge"); expect(badge).toHaveTextContent("alpha"); - expect(badge.getAttribute("data-slot")).toBe("mono-badge"); + expect(badge.getAttribute("data-slot")).toBe("pill"); }); it("shows current runtime activity when the session is supervised", () => { diff --git a/web/src/systems/session/components/chat-header.tsx b/web/src/systems/session/components/chat-header.tsx index 3fe09b6fd..4fdb6d431 100644 --- a/web/src/systems/session/components/chat-header.tsx +++ b/web/src/systems/session/components/chat-header.tsx @@ -9,9 +9,8 @@ import { DialogFooter, DialogHeader, DialogTitle, - MonoBadge, - StatusDot, - type StatusDotTone, + Pill, + type PillTone, } from "@agh/ui"; import { cn } from "@/lib/utils"; @@ -30,7 +29,7 @@ export interface ChatHeaderProps { } interface StateSignal { - tone: StatusDotTone; + tone: PillTone; pulse?: boolean; } @@ -76,7 +75,7 @@ export function ChatHeader({ className="flex min-w-0 items-center gap-2 overflow-hidden" data-testid="chat-breadcrumb" > -

{ expect(badge).not.toBeNull(); expect(badge?.textContent).toBe("RUNNING"); expect(badge?.getAttribute("data-tone")).toBe("accent"); - expect(badge?.className).toMatch(/bg-\[color:var\(--color-accent-tint\)\]/); + expect(badge?.className).toMatch(/bg-\(--color-accent-tint\)/); expect(queryPrimitiveRoot()?.getAttribute("data-status")).toBe("running"); }); @@ -99,7 +99,7 @@ describe("ToolCallCard", () => { const badge = queryStatusBadge(); expect(badge?.textContent).toBe("DONE"); expect(badge?.getAttribute("data-tone")).toBe("success"); - expect(badge?.className).toMatch(/bg-\[color:var\(--color-success-tint\)\]/); + expect(badge?.className).toMatch(/bg-\(--color-success-tint\)/); expect(queryPrimitiveRoot()?.getAttribute("data-status")).toBe("done"); }); @@ -115,7 +115,7 @@ describe("ToolCallCard", () => { const badge = queryStatusBadge(); expect(badge?.textContent).toBe("ERROR"); expect(badge?.getAttribute("data-tone")).toBe("danger"); - expect(badge?.className).toMatch(/bg-\[color:var\(--color-danger-tint\)\]/); + expect(badge?.className).toMatch(/bg-\(--color-danger-tint\)/); const root = queryPrimitiveRoot(); expect(root?.getAttribute("data-status")).toBe("error"); expect(root?.className).toContain("data-[status=error]:border-[color:var(--color-danger)]/40"); diff --git a/web/src/systems/session/hooks/use-session-actions.test.tsx b/web/src/systems/session/hooks/use-session-actions.test.tsx index 49385e5b9..48af9237c 100644 --- a/web/src/systems/session/hooks/use-session-actions.test.tsx +++ b/web/src/systems/session/hooks/use-session-actions.test.tsx @@ -8,6 +8,7 @@ import { useClearSessionConversation, useCreateSession, useDeleteSession, + useRepairSession, } from "./use-session-actions"; import { sessionKeys } from "../lib/query-keys"; import type { SessionPayload } from "../types"; @@ -16,11 +17,17 @@ vi.mock("../adapters/session-api", () => ({ clearSessionConversation: vi.fn(), createSession: vi.fn(), deleteSession: vi.fn(), + repairSession: vi.fn(), stopSession: vi.fn(), resumeSession: vi.fn(), })); -import { clearSessionConversation, createSession, deleteSession } from "../adapters/session-api"; +import { + clearSessionConversation, + createSession, + deleteSession, + repairSession, +} from "../adapters/session-api"; function createWrapper(queryClient: QueryClient) { return ({ children }: { children: ReactNode }) => @@ -234,4 +241,51 @@ describe("session actions", () => { expect(queryClient.getQueryData(sessionKeys.events(createdSession.id))).toEqual(eventsSnapshot); expect(useSessionStore.getState().drafts[createdSession.id]?.text).toBe("keep me"); }); + + it("useRepairSession invalidates transcript-facing caches after repair completes", async () => { + vi.mocked(repairSession).mockResolvedValue({ + session_id: createdSession.id, + issues: [], + actions: [ + { + code: "append_terminal_error", + turn_id: "turn-1", + event_id: "ev-repair-1", + persisted: true, + }, + ], + persisted: true, + }); + + const queryClient = new QueryClient({ + defaultOptions: { queries: { retry: false }, mutations: { retry: false } }, + }); + const invalidateSpy = vi.spyOn(queryClient, "invalidateQueries"); + + const { result } = renderHook(() => useRepairSession(), { + wrapper: createWrapper(queryClient), + }); + + await act(async () => { + await result.current.mutateAsync({ + id: createdSession.id, + dry_run: true, + force: true, + }); + }); + + expect(repairSession).toHaveBeenCalledWith(createdSession.id, { + dry_run: true, + force: true, + }); + expect(invalidateSpy).toHaveBeenCalledWith({ queryKey: sessionKeys.detail(createdSession.id) }); + expect(invalidateSpy).toHaveBeenCalledWith({ + queryKey: sessionKeys.history(createdSession.id), + }); + expect(invalidateSpy).toHaveBeenCalledWith({ + queryKey: sessionKeys.transcript(createdSession.id), + }); + expect(invalidateSpy).toHaveBeenCalledWith({ queryKey: sessionKeys.events(createdSession.id) }); + expect(invalidateSpy).toHaveBeenCalledWith({ queryKey: sessionKeys.lists() }); + }); }); diff --git a/web/src/systems/session/hooks/use-session-actions.ts b/web/src/systems/session/hooks/use-session-actions.ts index 10640ec14..e18cbc00b 100644 --- a/web/src/systems/session/hooks/use-session-actions.ts +++ b/web/src/systems/session/hooks/use-session-actions.ts @@ -5,12 +5,13 @@ import { createSession, type CreateSessionParams, deleteSession, + repairSession, resumeSession, stopSession, } from "../adapters/session-api"; import { useSessionStore } from "./use-session-store"; import { sessionKeys } from "../lib/query-keys"; -import type { SessionPayload } from "../types"; +import type { SessionPayload, SessionRepairQuery } from "../types"; function mergeSessionList( current: SessionPayload[] | undefined, @@ -100,6 +101,25 @@ export function useResumeSession() { }); } +export interface RepairSessionParams extends SessionRepairQuery { + id: string; +} + +export function useRepairSession() { + const queryClient = useQueryClient(); + + return useMutation({ + mutationFn: ({ id, ...query }: RepairSessionParams) => repairSession(id, query), + onSettled: (_data, _error, params) => { + queryClient.invalidateQueries({ queryKey: sessionKeys.detail(params.id) }); + queryClient.invalidateQueries({ queryKey: sessionKeys.history(params.id) }); + queryClient.invalidateQueries({ queryKey: sessionKeys.transcript(params.id) }); + queryClient.invalidateQueries({ queryKey: sessionKeys.events(params.id) }); + queryClient.invalidateQueries({ queryKey: sessionKeys.lists() }); + }, + }); +} + interface ClearConversationSnapshot { session: SessionPayload | undefined; transcript: unknown; diff --git a/web/src/systems/session/index.ts b/web/src/systems/session/index.ts index 6a9cf6d36..a91215b91 100644 --- a/web/src/systems/session/index.ts +++ b/web/src/systems/session/index.ts @@ -14,6 +14,9 @@ export type { SessionHistoryResponse, SessionMessage, SessionPayload, + SessionRepairPayload, + SessionRepairQuery, + SessionRepairResponse, SessionResponse, SessionState, SessionsResponse, @@ -37,6 +40,7 @@ export { fetchSessionHistory, fetchSessionTranscript, fetchSessions, + repairSession, resumeSession, stopSession, } from "./adapters/session-api"; @@ -66,8 +70,10 @@ export { useClearSessionConversation, useCreateSession, useDeleteSession, + useRepairSession, useResumeSession, useStopSession, + type RepairSessionParams, } from "./hooks/use-session-actions"; export { useSessionCreateDialog, diff --git a/web/src/systems/session/mocks/fixtures.ts b/web/src/systems/session/mocks/fixtures.ts index 9d1b96196..addbe0c44 100644 --- a/web/src/systems/session/mocks/fixtures.ts +++ b/web/src/systems/session/mocks/fixtures.ts @@ -3,6 +3,7 @@ import type { SessionApprovalResponse, SessionEventPayload, SessionPayload, + SessionRepairPayload, TranscriptMessage, TurnHistoryPayload, UIMessage, @@ -101,6 +102,20 @@ export const sessionHistoryFixture: TurnHistoryPayload[] = [ }, ]; +export const sessionRepairFixture: SessionRepairPayload = { + session_id: primarySessionFixture.id, + issues: [], + actions: [ + { + code: "append_terminal_error", + turn_id: "turn_001", + event_id: "event_repair_001", + persisted: true, + }, + ], + persisted: true, +}; + export const bashToolMessageFixture: UIMessage = { id: "tool_bash", role: "tool_call", diff --git a/web/src/systems/session/mocks/handlers.ts b/web/src/systems/session/mocks/handlers.ts index de136f68f..8daa88bdf 100644 --- a/web/src/systems/session/mocks/handlers.ts +++ b/web/src/systems/session/mocks/handlers.ts @@ -6,6 +6,7 @@ import { sessionEventsFixture, sessionFixtures, sessionHistoryFixture, + sessionRepairFixture, sessionTranscriptFixture, } from "./fixtures"; @@ -71,6 +72,28 @@ export const handlers: HttpHandler[] = [ }, }); }), + http.post("/api/sessions/:id/repair", ({ params, request }) => { + const id = String(params.id); + + if (!sessionById.has(id)) { + return HttpResponse.json({ error: `Session not found: ${id}` }, { status: 404 }); + } + + const url = new URL(request.url); + const dryRun = url.searchParams.get("dry_run") === "true"; + + return HttpResponse.json({ + repair: { + ...sessionRepairFixture, + session_id: id, + persisted: !dryRun, + actions: sessionRepairFixture.actions.map(action => ({ + ...action, + persisted: !dryRun, + })), + }, + }); + }), http.post("/api/sessions/:id/approve", ({ params }) => { const id = String(params.id); diff --git a/web/src/systems/session/types.ts b/web/src/systems/session/types.ts index 9188034bb..cd3102ca1 100644 --- a/web/src/systems/session/types.ts +++ b/web/src/systems/session/types.ts @@ -25,6 +25,9 @@ export type SessionHistoryResponse = OperationResponse<"getSessionHistory", 200> export type TurnHistoryPayload = SessionHistoryResponse["history"][number]; export type SessionTranscriptResponse = OperationResponse<"getSessionTranscript", 200>; +export type SessionRepairResponse = OperationResponse<"repairSession", 200>; +export type SessionRepairPayload = SessionRepairResponse["repair"]; +export type SessionRepairQuery = OperationQuery<"repairSession">; export type CreateSessionParams = OperationRequestBody<"createSession">; export type SessionApprovalResponse = OperationResponse<"approveSession", 200>; diff --git a/web/src/systems/settings/components/provider-card.tsx b/web/src/systems/settings/components/provider-card.tsx index d9fe46261..1db67d250 100644 --- a/web/src/systems/settings/components/provider-card.tsx +++ b/web/src/systems/settings/components/provider-card.tsx @@ -6,10 +6,8 @@ import { CardFooter, CardHeader, CardTitle, - MonoBadge, Pill, - StatusDot, - type StatusDotTone, + type PillTone, } from "@agh/ui"; import type { ReactNode } from "react"; @@ -56,12 +54,12 @@ export function ProviderCard({ provider, onEdit, onDelete }: ProviderCardProps)

{provider.default ? ( - DEFAULT + DEFAULT ) : null} - + {provider.settings.command ?? } @@ -72,12 +70,13 @@ export function ProviderCard({ provider, onEdit, onDelete }: ProviderCardProps) {provider.settings.api_key_env ? ( {provider.settings.api_key_env} - {provider.api_key_env_present ? "SET" : "MISSING"} - + ) : ( @@ -94,7 +93,7 @@ export function ProviderCard({ provider, onEdit, onDelete }: ProviderCardProps) - { = { "workspace-mcp-sidecar": "WS-MCP.JSON", }; -function badgeTone(kind: SettingsSourceKind): MonoBadgeTone { +function badgeTone(kind: SettingsSourceKind): PillTone { switch (kind) { case "builtin-provider": return "neutral"; @@ -52,12 +52,13 @@ function SettingsSourceBadge({ }: SettingsSourceBadgeProps) { return (
- {sourceLabel(source)} - + {shadowed && shadowed.length > 0 ? ( shadows {shadowed.map((entry, index) => ( - {sourceLabel(entry)} - + ))} ) : null} diff --git a/web/src/systems/settings/components/settings-status-line.tsx b/web/src/systems/settings/components/settings-status-line.tsx index c17db2e91..60555b5cc 100644 --- a/web/src/systems/settings/components/settings-status-line.tsx +++ b/web/src/systems/settings/components/settings-status-line.tsx @@ -1,5 +1,5 @@ import type { ReactNode } from "react"; -import { StatusDot } from "@agh/ui"; +import { Pill } from "@agh/ui"; interface SettingsStatusLineProps { daemonAvailable: boolean; @@ -18,7 +18,7 @@ function SettingsStatusLine({ return (
- + {label} {items.map((item, index) => ( diff --git a/web/src/systems/settings/lib/sections.ts b/web/src/systems/settings/lib/sections.ts index 080a6d881..0271d5888 100644 --- a/web/src/systems/settings/lib/sections.ts +++ b/web/src/systems/settings/lib/sections.ts @@ -1,17 +1,29 @@ +import { + Activity, + Brain, + Cpu, + Network, + Puzzle, + Server, + SlidersHorizontal, + Workflow, + Wrench, +} from "lucide-react"; + import type { SettingsSectionDescriptor, SettingsSectionSlug } from "../types"; export const SETTINGS_ROOT_PATH = "/settings" as const; export const SETTINGS_SECTIONS: readonly SettingsSectionDescriptor[] = [ - { slug: "general", label: "General" }, - { slug: "providers", label: "Providers" }, - { slug: "mcp-servers", label: "MCP Servers" }, - { slug: "memory", label: "Memory" }, - { slug: "skills", label: "Skills" }, - { slug: "automation", label: "Automation" }, - { slug: "network", label: "Network" }, - { slug: "observability", label: "Observability" }, - { slug: "hooks-extensions", label: "Hooks & Extensions" }, + { slug: "general", label: "General", icon: SlidersHorizontal }, + { slug: "providers", label: "Providers", icon: Cpu }, + { slug: "mcp-servers", label: "MCP Servers", icon: Server }, + { slug: "memory", label: "Memory", icon: Brain }, + { slug: "skills", label: "Skills", icon: Wrench }, + { slug: "automation", label: "Automation", icon: Workflow }, + { slug: "network", label: "Network", icon: Network }, + { slug: "observability", label: "Observability", icon: Activity }, + { slug: "hooks-extensions", label: "Hooks & Extensions", icon: Puzzle }, ] as const; export const SETTINGS_SECTION_SLUGS: readonly SettingsSectionSlug[] = SETTINGS_SECTIONS.map( diff --git a/web/src/systems/settings/types.ts b/web/src/systems/settings/types.ts index 3ee7d6e13..21e003a31 100644 --- a/web/src/systems/settings/types.ts +++ b/web/src/systems/settings/types.ts @@ -1,3 +1,5 @@ +import type { LucideIcon } from "lucide-react"; + import type { OperationQuery, OperationRequestBody, OperationResponse } from "@/lib/api-contract"; export type SettingsGeneralSection = OperationResponse<"getSettingsGeneral", 200>; @@ -67,6 +69,7 @@ export type SettingsCollectionName = "providers" | "mcp-servers" | "sandboxes" | export interface SettingsSectionDescriptor { slug: SettingsSectionSlug; label: string; + icon: LucideIcon; } export type SettingsSectionSlug = diff --git a/web/src/systems/skill/components/marketplace-view.tsx b/web/src/systems/skill/components/marketplace-view.tsx index 2d8f112ee..bca02da83 100644 --- a/web/src/systems/skill/components/marketplace-view.tsx +++ b/web/src/systems/skill/components/marketplace-view.tsx @@ -11,8 +11,8 @@ import { CardFooter, CardHeader, Empty, - MonoBadge, - Pills, + Pill, + PillGroup, SearchInput, } from "@agh/ui"; @@ -82,23 +82,24 @@ function MarketplaceCard({ skill, isInstalled, onInstall, isInstalling }: Market {tags.length > 0 ? (
{tags.map(tag => ( - {tag} - + ))}
) : null} {isInstalled ? ( - + INSTALLED - + ) : onInstall ? (
)} @@ -168,7 +169,7 @@ function MarketplaceView({ } value={search} /> - ({ diff --git a/web/src/systems/skill/components/skill-detail-panel.tsx b/web/src/systems/skill/components/skill-detail-panel.tsx index c8b6d0d69..9ee22192a 100644 --- a/web/src/systems/skill/components/skill-detail-panel.tsx +++ b/web/src/systems/skill/components/skill-detail-panel.tsx @@ -3,10 +3,9 @@ import { AlertCircle, Loader2, Wrench } from "lucide-react"; import { Button, Empty, - MonoBadge, + Pill, PageHeader, Section, - StatusDot, Switch, Table, TableBody, @@ -44,12 +43,12 @@ function SkillDetailMeta({ skill }: { skill: SkillPayload }) { return (
{skill.version ? ( - {`v${skill.version}`} + {`v${skill.version}`} ) : null} - {author ? {`@${author}`} : null} - + {author ? {`@${author}`} : null} + {skill.source} - +
); } @@ -157,13 +156,14 @@ function SkillCapabilitiesSection({ skill }: { skill: SkillPayload }) { ) : (
{capabilities.map(capability => ( - {capability} - + ))}
)} @@ -204,7 +204,7 @@ function SkillRecentCallsSection({ skill }: { skill: SkillPayload }) { key={`${call.label}-${index}`} > - ) : null}
- @@ -170,7 +170,7 @@ function SkillListPanel({ {group.label} - {group.skills.length} + {group.skills.length}
{group.skills.map(skill => ( = { additional: 4, }; -const SOURCE_TONE: Record = { +const SOURCE_TONE: Record = { bundled: "success", workspace: "info", marketplace: "accent", @@ -34,7 +34,7 @@ export function compareSkillSource(left: string, right: string): number { return (SOURCE_ORDER[left] ?? 99) - (SOURCE_ORDER[right] ?? 99); } -export function skillSourceTone(source: string): MonoBadgeTone { +export function skillSourceTone(source: string): PillTone { return SOURCE_TONE[source] ?? "neutral"; } diff --git a/web/src/systems/tasks/components/task-card.test.tsx b/web/src/systems/tasks/components/task-card.test.tsx index a0a4fcf97..179b47f1d 100644 --- a/web/src/systems/tasks/components/task-card.test.tsx +++ b/web/src/systems/tasks/components/task-card.test.tsx @@ -43,7 +43,7 @@ describe("TaskCard", () => { expect(screen.getByTestId("task-card-children-task_001")).toHaveTextContent("2 children"); expect(screen.getByTestId("task-card-deps-task_001")).toHaveTextContent("1 dep"); // Status is rendered as a pulsing accent dot for in_progress tasks. - const dot = container.querySelector('[data-slot="status-dot"]'); + const dot = container.querySelector('[data-slot="pill-dot"]'); expect(dot).not.toBeNull(); expect(dot).toHaveAttribute("data-tone", "accent"); expect(dot).toHaveAttribute("data-pulse", "true"); diff --git a/web/src/systems/tasks/components/task-card.tsx b/web/src/systems/tasks/components/task-card.tsx index b7352add7..2d8b3b1e5 100644 --- a/web/src/systems/tasks/components/task-card.tsx +++ b/web/src/systems/tasks/components/task-card.tsx @@ -1,7 +1,7 @@ import { AlertCircle } from "lucide-react"; -import { MonoBadge, Pill } from "@agh/ui"; -import { pillVariantFromTone } from "@/lib/pill-variant"; +import { Pill } from "@agh/ui"; +import { pillToneFromLegacyTone } from "@/lib/pill-variant"; import { formatAttemptLabel, @@ -92,12 +92,12 @@ export function TaskCard({
{task.priority ? ( - + {taskPriorityLabel(task.priority)} ) : null} {showApproval ? ( - {taskApprovalStateLabel(task.approval_state)} + {taskApprovalStateLabel(task.approval_state)} ) : null} {isDraft && onPublish ? (