From bfd54851837b4fb310850db899adf3a3d6bf0708 Mon Sep 17 00:00:00 2001 From: Pedro Nauck Date: Wed, 6 May 2026 21:54:06 -0300 Subject: [PATCH 01/13] fix: global migrations --- ...026-05-06-MEMORY-sqlite-migration-drift.md | 60 +++++ ...2026-05-06-sqlite-migration-append-only.md | 50 ++++ AGENTS.md | 4 +- CLAUDE.md | 4 +- ...chema-migration-identity-is-append-only.md | 81 +++++++ docs/_memory/lessons/README.md | 1 + internal/AGENTS.md | 6 + internal/CLAUDE.md | 6 + internal/store/globaldb/global_db.go | 18 +- .../globaldb/global_db_heartbeat_test.go | 31 +-- .../global_db_network_conversations_test.go | 82 ++++++- .../store/globaldb/global_db_soul_test.go | 25 +- internal/store/globaldb/global_db_test.go | 213 ++++++++++++------ 13 files changed, 445 insertions(+), 136 deletions(-) create mode 100644 .codex/ledger/2026-05-06-MEMORY-sqlite-migration-drift.md create mode 100644 .codex/plans/2026-05-06-sqlite-migration-append-only.md create mode 100644 docs/_memory/lessons/L-021-schema-migration-identity-is-append-only.md diff --git a/.codex/ledger/2026-05-06-MEMORY-sqlite-migration-drift.md b/.codex/ledger/2026-05-06-MEMORY-sqlite-migration-drift.md new file mode 100644 index 000000000..713efa33e --- /dev/null +++ b/.codex/ledger/2026-05-06-MEMORY-sqlite-migration-drift.md @@ -0,0 +1,60 @@ +Goal (incl. success criteria): + +- Fix global SQLite migration identity drift that prevents seamless `./bin/agh daemon stop/start`. +- Success: canonical migration registry preserves already-recorded versions 17-20, observed-history DB upgrades to v22, guardrail lesson/instructions land, focused tests and `make verify` pass or blockers are reported with evidence. + +Constraints/Assumptions: + +- Use root-cause fix only; do not weaken migration integrity checks and do not manually edit the live `~/.agh/agh.db`. +- Persist accepted Plan Mode plan under `.codex/plans/` before execution. +- Conversation in BR-PT; code/docs/artifacts in English. +- Use RTK for shell commands and avoid destructive git commands. + +Key decisions: + +- Restore registry order to observed DB history: v17 task orchestration profile, v18 review gate, v19 notification cursors, v20 bridge task subscriptions, v21 network conversation containers, v22 memory v2 events. +- No one-pass repair unless evidence appears for DBs created by the inverse broken order. +- Add durable guardrails in `docs/_memory/lessons`, root `AGENTS.md`/`CLAUDE.md`, and internal `AGENTS.md`/`CLAUDE.md`. + +State: + +- Registry, tests, lesson, and instruction guardrails are patched. Focused Go tests, isolated daemon restart proof, and full `make verify` passed. + +Done: + +- Confirmed live DB records v17-v20 as task/bridge migrations. +- Confirmed current code expects network migration at v17 and shifts v17-v20 to v18-v21. +- Accepted plan produced in chat. +- Persisted accepted plan in `.codex/plans/2026-05-06-sqlite-migration-append-only.md`. +- Restored `internal/store/globaldb.globalSchemaMigrations` append-only order: v17 task orchestration profile, v18 review gate, v19 notification cursors, v20 bridge subscriptions, v21 network conversations, v22 memory events. +- Added migration identity/order contract helpers and observed-history upgrade coverage for the real v17-v20 prefix. +- Added lesson `docs/_memory/lessons/L-021-schema-migration-identity-is-append-only.md` and updated lessons index. +- Added guardrails to root/internal `AGENTS.md` and `CLAUDE.md`. +- Ran `gofmt` on touched Go files. +- Focused verification passed: `go test ./internal/store ./internal/store/globaldb -count=1 -race`. +- Attempted `scripts/check-test-conventions.py`, but the helper script is absent in this repository. +- Built local `./bin/agh` with the fixed registry (`make build` exit 0; Vite chunk-size warning emitted). +- Isolated daemon proof passed against a SQLite backup of `/Users/pedronauck/.agh/agh.db`: `AGH_HOME=/tmp/agh-migration-proof.HvgC9J`, HTTP `127.0.0.1:63350`, socket `/tmp/agh-migration-proof.HvgC9J/daemon.sock`; first start appended v21 `rebuild_network_conversation_containers` and v22 `memv2_memory_events`, stop/start repeated without integrity mismatch. +- Full verification passed: `make verify` exit 0. Output included the existing Vite chunk-size warning and macOS linker warning from `golangci-lint`; no command failed. + +Now: + +- Prepare final report with verification evidence. + +Next: + +- None. + +Open questions (UNCONFIRMED if needed): + +- None. + +Working set (files/ids/commands): + +- `.codex/plans/2026-05-06-sqlite-migration-append-only.md` +- `.codex/ledger/2026-05-06-MEMORY-sqlite-migration-drift.md` +- `internal/store/globaldb/global_db.go` +- `internal/store/globaldb/global_db*_test.go` +- `docs/_memory/lessons/L-021-schema-migration-identity-is-append-only.md` +- `docs/_memory/lessons/README.md` +- `AGENTS.md`, `CLAUDE.md`, `internal/AGENTS.md`, `internal/CLAUDE.md` diff --git a/.codex/plans/2026-05-06-sqlite-migration-append-only.md b/.codex/plans/2026-05-06-sqlite-migration-append-only.md new file mode 100644 index 000000000..163409f2d --- /dev/null +++ b/.codex/plans/2026-05-06-sqlite-migration-append-only.md @@ -0,0 +1,50 @@ +# Fix Global SQLite Migration Order Drift And Add Guardrails + +## Summary + +- Root cause confirmed: the daemon correctly refuses to start because `globalSchemaMigrations` changed the identity of an already-applied migration version. +- The observed `/Users/pedronauck/.agh/agh.db` records `17=add_task_orchestration_profile_schema`, `18=add_task_review_gate_schema`, `19=add_notification_cursors`, `20=add_bridge_task_subscriptions`. +- Current code incorrectly expects `17=rebuild_network_conversation_containers` and shifted the previously recorded task/bridge migrations to `18..21`. +- The fix restores append-only migration identity, keeps strict integrity mismatch failures, adds regression coverage for this exact history, and documents the rule in durable project memory plus active agent instructions. + +## Key Changes + +- Restore the canonical global migration order in `internal/store/globaldb/global_db.go`: + - `17 add_task_orchestration_profile_schema` + - `18 add_task_review_gate_schema` + - `19 add_notification_cursors` + - `20 add_bridge_task_subscriptions` + - `21 rebuild_network_conversation_containers` + - `22 memv2_memory_events` +- Update network conversation migration tests to use `networkConversationMigrationVersion = 21` and seed legacy network DBs from the corrected pre-network history. +- Add a regression test that seeds a DB matching the observed local history through migration `20`, with legacy `network_timeline_log.interaction_id`, then opens it through `OpenGlobalDB` and asserts no integrity mismatch, network migration v21, memory migration v22, intact task/bridge schema, and idempotent reopen. +- Add an append-only registry contract test for the known global migration sequence, emphasizing versions `17..22`. +- Preserve strict integrity behavior in `store.RunMigrations`; do not accept arbitrary mismatches or edit `schema_migrations` in place. +- Do not add one-pass repair unless real DBs are found with the broken inverse sequence. + +## Documentation Guardrails + +- Add `docs/_memory/lessons/L-021-schema-migration-identity-is-append-only.md`. +- Update `docs/_memory/lessons/README.md` with `L-021`. +- Update root `AGENTS.md` and `CLAUDE.md` under `### Schema Migrations` with the append-only registry rule. +- Update `internal/AGENTS.md` and `internal/CLAUDE.md` with an `internal/store` migration invariant. + +## Public Interfaces / Data Contract + +- No HTTP, UDS, CLI, OpenAPI, web, or config contract changes. +- The internal data contract is made explicit: global SQLite migration numbers, names, and checksums are immutable once applied anywhere meaningful. +- Fresh DB final schema remains the same. Existing DBs with the observed `17..20` history upgrade by applying only missing migrations `21` and `22`. + +## Test Plan + +- Run `go test ./internal/store ./internal/store/globaldb -count=1 -race`. +- Run an isolated daemon upgrade proof using a temp copy of `/Users/pedronauck/.agh/agh.db`. +- Verify lesson and instruction guardrails landed in the intended files. +- Run `make verify`. + +## Assumptions + +- The selected implementation scope is registry and tests, not generic migration-runner redesign. +- The observed local DB history is valid and must be preserved. +- The live `/Users/pedronauck/.agh/agh.db` will not be manually mutated during validation. +- Persistent artifacts are written in English. diff --git a/AGENTS.md b/AGENTS.md index a9a2392a2..71a5adb79 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -220,6 +220,8 @@ Backend architecture, autonomy contracts, security invariants, package layout, a - **Skill**: `agh-schema-migration`. - **When**: any SQLite column, index, or constraint change. - **Mandatory**: numbered migration in the registry — `EnsureSchema`-style boot reconciliation is forbidden for column changes. +- **Append-only identity**: migration `version`, `name`, and `checksum` are persisted data contracts. Never insert, reorder, rename, renumber, or change an existing migration after it may have reached any developer, QA, or release database; append new migrations at the registry tail. +- **Integrity mismatch response**: stop and investigate the recorded history. Fix the registry order or write an ADR-backed one-pass repair; never weaken mismatch checks and never manually edit a live `schema_migrations` row as the fix. - **Covers**: numbered registry, transactional wrap (`BEGIN IMMEDIATE`), `-wal` / `-shm` companion handling on recovery, `ORDER BY 0` pitfall, fresh-DB + reopen-after-restart tests. ## Vocabulary & Product Strategy @@ -236,7 +238,7 @@ Repo-wide rules backed by RFC 001 / RFC 002. Runtime implementation details (pre - **Standing directives** — `docs/_memory/standing_directives.md`. Perpetually-active engineering posture (SD-001..SD-011): long-running session supervision, greenfield-delete, BR-PT/EN, multi-LLM pipeline, real-scenario QA, forensic-first bug fixes, truthful UI, composition-root discipline, detached lifetime, extensible-and-agent-manageable design. Read before opening a TechSpec, defending an architecture pivot, or whenever someone proposes a compat shim. - **Spec authoring playbook** — `docs/_memory/spec-authoring-playbook.md`. Mandatory preflight for `cy-create-prd` / `cy-create-techspec` / `cy-create-tasks`, with phase-by-phase MUST / MUST-NOT and evidence references. The `cy-spec-preflight` skill enforces this — always read before producing any `_idea.md` / `_prd.md` / `_techspec.md` / `_tasks.md`. -- **Lessons learned** — `docs/_memory/lessons/` (`L-001..L-015`, plus `README.md` index). One file per durable lesson with confirmed root cause + fix + evidence (ADR, commit, review issue, or QA bug). Scan the index whenever you hit a class of issue: concurrency / API, testing discipline, autonomy architecture, persistence, spec authoring. +- **Lessons learned** — `docs/_memory/lessons/` (`L-001..L-021`, plus `README.md` index). One file per durable lesson with confirmed root cause + fix + evidence (ADR, commit, review issue, or QA bug). Scan the index whenever you hit a class of issue: concurrency / API, testing discipline, autonomy architecture, persistence, spec authoring. - **Glossary** — `docs/_memory/glossary.md`. Canonical vocabulary (`capability` vs `recipe`, `AGENT.md` vs `AGENTS.md`, Peer Card vs Agent Card, autonomy primitives). Authoritative when older RFCs / ledgers conflict. Read when naming anything new, reviewing a rename PR, or when a term feels overloaded. - **Cross-source synthesis** — `docs/_memory/_synthesis.md`. Cross-referenced findings from 8 forensic analyses, ranked by source count — the evidence corpus behind every rule in CLAUDE.md and the standing directives. Read when challenging or evolving a rule. - **Forensic analyses** — `docs/_memory/analysis/analysis_*.md`. Per-source raw analyses (codex sessions / plans / ledger, compozy tasks, qmd collections, local / global runs, existing surfaces) feeding `_synthesis.md`. Read when synthesis cites a finding and you need the underlying evidence. diff --git a/CLAUDE.md b/CLAUDE.md index 3eb4eb21d..fd53e7fa1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -220,6 +220,8 @@ Backend architecture, autonomy contracts, security invariants, package layout, a - **Skill**: `agh-schema-migration`. - **When**: any SQLite column, index, or constraint change. - **Mandatory**: numbered migration in the registry — `EnsureSchema`-style boot reconciliation is forbidden for column changes. +- **Append-only identity**: migration `version`, `name`, and `checksum` are persisted data contracts. Never insert, reorder, rename, renumber, or change an existing migration after it may have reached any developer, QA, or release database; append new migrations at the registry tail. +- **Integrity mismatch response**: stop and investigate the recorded history. Fix the registry order or write an ADR-backed one-pass repair; never weaken mismatch checks and never manually edit a live `schema_migrations` row as the fix. - **Covers**: numbered registry, transactional wrap (`BEGIN IMMEDIATE`), `-wal` / `-shm` companion handling on recovery, `ORDER BY 0` pitfall, fresh-DB + reopen-after-restart tests. ## Vocabulary & Product Strategy @@ -236,7 +238,7 @@ Repo-wide rules backed by RFC 001 / RFC 002. Runtime implementation details (pre - **Standing directives** — `docs/_memory/standing_directives.md`. Perpetually-active engineering posture (SD-001..SD-011): long-running session supervision, greenfield-delete, BR-PT/EN, multi-LLM pipeline, real-scenario QA, forensic-first bug fixes, truthful UI, composition-root discipline, detached lifetime, extensible-and-agent-manageable design. Read before opening a TechSpec, defending an architecture pivot, or whenever someone proposes a compat shim. - **Spec authoring playbook** — `docs/_memory/spec-authoring-playbook.md`. Mandatory preflight for `cy-create-prd` / `cy-create-techspec` / `cy-create-tasks`, with phase-by-phase MUST / MUST-NOT and evidence references. The `cy-spec-preflight` skill enforces this — always read before producing any `_idea.md` / `_prd.md` / `_techspec.md` / `_tasks.md`. -- **Lessons learned** — `docs/_memory/lessons/` (`L-001..L-015`, plus `README.md` index). One file per durable lesson with confirmed root cause + fix + evidence (ADR, commit, review issue, or QA bug). Scan the index whenever you hit a class of issue: concurrency / API, testing discipline, autonomy architecture, persistence, spec authoring. +- **Lessons learned** — `docs/_memory/lessons/` (`L-001..L-021`, plus `README.md` index). One file per durable lesson with confirmed root cause + fix + evidence (ADR, commit, review issue, or QA bug). Scan the index whenever you hit a class of issue: concurrency / API, testing discipline, autonomy architecture, persistence, spec authoring. - **Glossary** — `docs/_memory/glossary.md`. Canonical vocabulary (`capability` vs `recipe`, `AGENT.md` vs `AGENTS.md`, Peer Card vs Agent Card, autonomy primitives). Authoritative when older RFCs / ledgers conflict. Read when naming anything new, reviewing a rename PR, or when a term feels overloaded. - **Cross-source synthesis** — `docs/_memory/_synthesis.md`. Cross-referenced findings from 8 forensic analyses, ranked by source count — the evidence corpus behind every rule in CLAUDE.md and the standing directives. Read when challenging or evolving a rule. - **Forensic analyses** — `docs/_memory/analysis/analysis_*.md`. Per-source raw analyses (codex sessions / plans / ledger, compozy tasks, qmd collections, local / global runs, existing surfaces) feeding `_synthesis.md`. Read when synthesis cites a finding and you need the underlying evidence. diff --git a/docs/_memory/lessons/L-021-schema-migration-identity-is-append-only.md b/docs/_memory/lessons/L-021-schema-migration-identity-is-append-only.md new file mode 100644 index 000000000..251339be1 --- /dev/null +++ b/docs/_memory/lessons/L-021-schema-migration-identity-is-append-only.md @@ -0,0 +1,81 @@ +# L-021 — Schema migration identity is append-only + +**Class:** Persistence +**Date discovered:** 2026-05-06 (daemon restart migration integrity failure) +**Evidence sources:** Local daemon restart failure, observed `~/.agh/agh.db` `schema_migrations` +rows, `0b371eaa feat: add network threads (#105)`, `08eedb32 feat: orchestration +improvements (#106)`, and L-008 schema migration discipline + +## Context + +Restarting the local daemon failed before readiness with: + +```text +store: migration 17 integrity mismatch: recorded "add_task_orchestration_profile_schema"/2026-05-05-add-task-orchestration-profile-schema, current "rebuild_network_conversation_containers"/2026-05-05-rebuild-network-conversation-containers +``` + +The live global database had already recorded: + +```text +17 add_task_orchestration_profile_schema +18 add_task_review_gate_schema +19 add_notification_cursors +20 add_bridge_task_subscriptions +``` + +Current code had inserted `rebuild_network_conversation_containers` at version 17 and shifted the +existing task/bridge migrations to later numbers. The migration runner correctly refused to boot: +the persisted version/name/checksum identity no longer matched the binary. + +## Root cause + +Migration numbers were treated as a local ordering convenience instead of persisted contract data. +Fresh database tests still passed because the final schema could be built from the new order, but +an existing database carries the historical identity in `schema_migrations`. Once any developer, +QA, or release database can record a migration version/name/checksum, that identity is immutable. +Reordering the registry after that point breaks upgrades even when the end-state schema is valid. + +## Rule + +> SQLite migration identity is append-only. After a migration may have been applied anywhere +> meaningful, do not insert before it, reorder it, rename it, renumber it, or change its checksum. +> New schema work appends the next migration number at the registry tail. + +If an existing database reports an integrity mismatch, treat it as a safety signal. Do not weaken +the runner, do not accept arbitrary mismatches, and do not manually edit `schema_migrations`. +Investigate which identity is historically valid, restore the append-only sequence, and add +observed-history upgrade coverage. + +## Operationalization + +- Before choosing a migration number, inspect the current registry, recent commits touching the + registry, and relevant ledgers/tasks for concurrently landed migrations. +- New schema work appends after the highest registered version. Chronological neatness is not a + reason to insert into the middle. +- Migration tests must include fresh database coverage and upgrade/reopen coverage. For drift + fixes, add an observed-history regression seeded with the real `schema_migrations` prefix that + failed in the operator database. +- Keep integrity mismatch failures strict. A mismatch means the binary and database disagree about + history; fixing that disagreement belongs in the registry or in an ADR-backed one-pass repair. +- One-pass repair is allowed only under the existing greenfield exception: bounded to one boot, + documented in an ADR, and followed immediately by strict semantics. + +## Anti-pattern + +- Inserting a new migration at an older number because it "belongs" earlier in feature chronology. +- Renumbering already-recorded migrations to make a branch merge look sequential. +- Updating tests to the new fresh-DB order without seeding an old DB and reopening it. +- Handling an integrity mismatch by allowing multiple names/checksums for one version. +- Manually updating rows in a live `schema_migrations` table to match the current binary. + +## Source + +- Observed local database: + `sqlite3 /Users/pedronauck/.agh/agh.db 'SELECT version, name, checksum FROM schema_migrations ORDER BY version;'` +- Failing daemon startup: + `error: daemon: open global database "/Users/pedronauck/.agh/agh.db": store: initialize sqlite database "/Users/pedronauck/.agh/agh.db": store: migration 17 integrity mismatch` +- `internal/store/globaldb/global_db.go` — `globalSchemaMigrations` registry +- `internal/store/schema.go` — strict `RunMigrations` version/name/checksum validation +- `docs/_memory/lessons/L-008-schema-migrations-mandatory.md` +- `0b371eaa feat: add network threads (#105)` +- `08eedb32 feat: orchestration improvements (#106)` diff --git a/docs/_memory/lessons/README.md b/docs/_memory/lessons/README.md index d082ff6fb..375f12c9a 100644 --- a/docs/_memory/lessons/README.md +++ b/docs/_memory/lessons/README.md @@ -28,6 +28,7 @@ These are NOT speculative warnings — every lesson here has either an ADR, a co | [L-018](L-018-delegated-docs-runtime-truth-audit.md) | Delegated docs lanes need a runtime-truth audit before acceptance | Documentation | | [L-019](L-019-diagnostic-data-outlives-primary-record.md) | Diagnostic data must outlive its primary record when audit/replay matters | Architecture / Persistence | | [L-020](L-020-dense-typed-records-need-pointer-boundaries.md) | Dense typed orchestration records need pointer boundaries | Architecture / Code style | +| [L-021](L-021-schema-migration-identity-is-append-only.md) | Schema migration identity is append-only | Persistence | ## How to use diff --git a/internal/AGENTS.md b/internal/AGENTS.md index d34be4fc5..24a88b807 100644 --- a/internal/AGENTS.md +++ b/internal/AGENTS.md @@ -51,6 +51,12 @@ Generic Go concurrency patterns (goroutine ownership, channels vs mutexes, `sele - Append-only event store (`runtime.db`) is the canonical operational ledger; session DBs are projections, not authority. - Live broadcasters publish only after durable append; reconnect/replay uses `after_seq`. +### Persistence + +- **SQLite migration registries are append-only.** `internal/store/globaldb.globalSchemaMigrations` and equivalent registries persist `version`, `name`, and `checksum` in `schema_migrations`; never insert, reorder, rename, renumber, or change an existing migration identity after it may have been applied. +- **Migration drift fixes require observed-history tests.** Cover fresh DB, upgrade/reopen, and the real recorded migration prefix that failed. Integrity mismatch is a safety signal to preserve, not an error to suppress. +- **New schema work appends at the registry tail.** If a migration appears to need an earlier slot, stop and write an ADR-backed repair plan instead of silently shifting recorded history. + ## Security Invariants - **`claim_token` redaction is non-negotiable.** Raw `claim_token` (`agh_claim_*`), MCP auth tokens, OAuth codes, PKCE verifiers, and secret bindings MUST NEVER appear in logs, status APIs, settings views, error payloads, channel messages, SSE, web UI, or memory. Use hash forms (`claim_token_hash`) over the wire. Network layer rejects raw `claim_token` in metadata. diff --git a/internal/CLAUDE.md b/internal/CLAUDE.md index 3f6e05e22..30ae6317c 100644 --- a/internal/CLAUDE.md +++ b/internal/CLAUDE.md @@ -51,6 +51,12 @@ Generic Go concurrency patterns (goroutine ownership, channels vs mutexes, `sele - Append-only event store (`runtime.db`) is the canonical operational ledger; session DBs are projections, not authority. - Live broadcasters publish only after durable append; reconnect/replay uses `after_seq`. +### Persistence + +- **SQLite migration registries are append-only.** `internal/store/globaldb.globalSchemaMigrations` and equivalent registries persist `version`, `name`, and `checksum` in `schema_migrations`; never insert, reorder, rename, renumber, or change an existing migration identity after it may have been applied. +- **Migration drift fixes require observed-history tests.** Cover fresh DB, upgrade/reopen, and the real recorded migration prefix that failed. Integrity mismatch is a safety signal to preserve, not an error to suppress. +- **New schema work appends at the registry tail.** If a migration appears to need an earlier slot, stop and write an ADR-backed repair plan instead of silently shifting recorded history. + ## Security Invariants - **`claim_token` redaction is non-negotiable.** Raw `claim_token` (`agh_claim_*`), MCP auth tokens, OAuth codes, PKCE verifiers, and secret bindings MUST NEVER appear in logs, status APIs, settings views, error payloads, channel messages, SSE, web UI, or memory. Use hash forms (`claim_token_hash`) over the wire. Network layer rejects raw `claim_token` in metadata. diff --git a/internal/store/globaldb/global_db.go b/internal/store/globaldb/global_db.go index 6ccf6478c..25f78b77b 100644 --- a/internal/store/globaldb/global_db.go +++ b/internal/store/globaldb/global_db.go @@ -841,34 +841,34 @@ var globalSchemaMigrations = []store.Migration{ }, { Version: 17, - Name: "rebuild_network_conversation_containers", - Up: migrateNetworkConversationContainers, - Checksum: "2026-05-05-rebuild-network-conversation-containers", - }, - { - Version: 18, Name: "add_task_orchestration_profile_schema", Up: migrateTaskOrchestrationProfileSchema, Checksum: "2026-05-05-add-task-orchestration-profile-schema", }, { - Version: 19, + Version: 18, Name: "add_task_review_gate_schema", Up: migrateTaskReviewGateSchema, Checksum: "2026-05-05-add-task-review-gate-schema", }, { - Version: 20, + Version: 19, Name: "add_notification_cursors", Up: migrateNotificationCursors, Checksum: "2026-05-05-add-notification-cursors", }, { - Version: 21, + Version: 20, Name: "add_bridge_task_subscriptions", Up: migrateBridgeTaskSubscriptions, Checksum: "2026-05-05-add-bridge-task-subscriptions", }, + { + Version: 21, + Name: "rebuild_network_conversation_containers", + Up: migrateNetworkConversationContainers, + Checksum: "2026-05-05-rebuild-network-conversation-containers", + }, { Version: 22, Name: "memv2_memory_events", diff --git a/internal/store/globaldb/global_db_heartbeat_test.go b/internal/store/globaldb/global_db_heartbeat_test.go index c858688c9..0c13a6c2b 100644 --- a/internal/store/globaldb/global_db_heartbeat_test.go +++ b/internal/store/globaldb/global_db_heartbeat_test.go @@ -134,36 +134,7 @@ func TestGlobalDBHeartbeatMigration(t *testing.T) { if got, want := len(records), len(globalSchemaMigrations); got != want { t.Fatalf("len(records) = %d, want %d", got, want) } - if records[11].Version != 12 || records[11].Name != "add_agent_soul_snapshots" { - t.Fatalf("records[11] = %#v, want Soul v12 before Heartbeat", records[11]) - } - if records[12].Version != 13 || records[12].Name != "add_agent_heartbeat_storage" { - t.Fatalf("records[12] = %#v, want Heartbeat storage v13", records[12]) - } - if records[13].Version != 14 || records[13].Name != "add_event_summary_lineage" { - t.Fatalf("records[13] = %#v, want event summary lineage v14", records[13]) - } - if records[14].Version != 15 || records[14].Name != "rebuild_event_summaries_for_global_payloads" { - t.Fatalf("records[14] = %#v, want event summary global payloads v15", records[14]) - } - if records[15].Version != 16 || records[15].Name != "rename_actor_ref_columns_to_actor_id" { - t.Fatalf("records[15] = %#v, want actor_id rename v16", records[15]) - } - if records[16].Version != 17 || records[16].Name != "rebuild_network_conversation_containers" { - t.Fatalf("records[16] = %#v, want network conversation containers v17", records[16]) - } - if records[17].Version != 18 || records[17].Name != "add_task_orchestration_profile_schema" { - t.Fatalf("records[17] = %#v, want task orchestration profile schema v18", records[17]) - } - if records[18].Version != 19 || records[18].Name != "add_task_review_gate_schema" { - t.Fatalf("records[18] = %#v, want task review gate schema v19", records[18]) - } - if records[19].Version != 20 || records[19].Name != "add_notification_cursors" { - t.Fatalf("records[19] = %#v, want notification cursors v20", records[19]) - } - if records[20].Version != 21 || records[20].Name != "add_bridge_task_subscriptions" { - t.Fatalf("records[20] = %#v, want bridge task subscriptions v21", records[20]) - } + assertAppliedGlobalMigrationOrder(t, records) }) t.Run("Should return wrapped errors and not mark failed Heartbeat migrations successful", func(t *testing.T) { diff --git a/internal/store/globaldb/global_db_network_conversations_test.go b/internal/store/globaldb/global_db_network_conversations_test.go index 2ff3dd2d2..864c96eab 100644 --- a/internal/store/globaldb/global_db_network_conversations_test.go +++ b/internal/store/globaldb/global_db_network_conversations_test.go @@ -11,7 +11,7 @@ import ( "github.com/pedronauck/agh/internal/testutil" ) -const networkConversationMigrationVersion = 17 +const networkConversationMigrationVersion = 21 func TestOpenGlobalDBCreatesNetworkConversationSchema(t *testing.T) { t.Parallel() @@ -153,6 +153,84 @@ func TestNetworkConversationMigrationRebuildsLegacyTimeline(t *testing.T) { func TestNetworkConversationMigrationReopenAfterRestart(t *testing.T) { t.Parallel() + t.Run( + "Should upgrade observed task and bridge migration history by appending network migration", + func(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t) + path := filepath.Join(t.TempDir(), GlobalDatabaseName) + seedLegacyNetworkConversationDatabase(t, path) + + beforeDB, err := store.OpenSQLiteDatabase(ctx, path, nil) + if err != nil { + t.Fatalf("OpenSQLiteDatabase(before) error = %v", err) + } + beforeRecords, err := store.AppliedMigrations(ctx, beforeDB) + if err != nil { + t.Fatalf("AppliedMigrations(before) error = %v", err) + } + if err := beforeDB.Close(); err != nil { + t.Fatalf("beforeDB.Close() error = %v", err) + } + assertAppliedGlobalMigrationPrefix(t, beforeRecords, networkConversationMigrationVersion-1) + + first, err := OpenGlobalDB(ctx, path) + if err != nil { + t.Fatalf("OpenGlobalDB(first) error = %v", err) + } + firstRecords, err := store.AppliedMigrations(ctx, first.db) + if err != nil { + t.Fatalf("AppliedMigrations(first) error = %v", err) + } + assertAppliedGlobalMigrationOrder(t, firstRecords) + for index, before := range beforeRecords { + if !firstRecords[index].AppliedAt.Equal(before.AppliedAt) { + t.Fatalf( + "migration %d applied_at = %s, want unchanged %s", + before.Version, + firstRecords[index].AppliedAt, + before.AppliedAt, + ) + } + } + assertTaskOrchestrationProfileSchema(t, first.db) + assertReviewGateSchema(t, first.db) + assertNotificationCursorSchema(t, first.db) + assertBridgeTaskSubscriptionSchema(t, first.db) + assertTableLacksColumns(t, first.db, "network_timeline_log", "interaction_id") + assertTablesPresent(t, first.db, "network_threads", "network_direct_rooms", "network_work", "memory_events") + if err := first.Close(ctx); err != nil { + t.Fatalf("Close(first) error = %v", err) + } + + second, err := OpenGlobalDB(ctx, path) + if err != nil { + t.Fatalf("OpenGlobalDB(second) error = %v", err) + } + t.Cleanup(func() { + if closeErr := second.Close(ctx); closeErr != nil { + t.Fatalf("Close(second) error = %v", closeErr) + } + }) + secondRecords, err := store.AppliedMigrations(ctx, second.db) + if err != nil { + t.Fatalf("AppliedMigrations(second) error = %v", err) + } + assertAppliedGlobalMigrationOrder(t, secondRecords) + for index, firstRecord := range firstRecords { + if !secondRecords[index].AppliedAt.Equal(firstRecord.AppliedAt) { + t.Fatalf( + "second migration %d applied_at = %s, want unchanged %s", + firstRecord.Version, + secondRecords[index].AppliedAt, + firstRecord.AppliedAt, + ) + } + } + }, + ) + t.Run("Should record migration version and keep schema stable after reopen", func(t *testing.T) { t.Parallel() @@ -190,7 +268,7 @@ func TestNetworkConversationMigrationReopenAfterRestart(t *testing.T) { t.Fatalf("len(secondRecords) = %d, want %d", got, want) } if !secondRecords[len(secondRecords)-1].AppliedAt.Equal(firstRecords[len(firstRecords)-1].AppliedAt) { - t.Fatalf("migration v17 applied_at changed after reopen") + t.Fatalf("migration v%d applied_at changed after reopen", networkConversationMigrationVersion) } assertTableLacksColumns(t, second.db, "network_timeline_log", "interaction_id") assertTablesPresent(t, second.db, "network_threads", "network_direct_rooms", "network_work") diff --git a/internal/store/globaldb/global_db_soul_test.go b/internal/store/globaldb/global_db_soul_test.go index b5fb84249..2384c5662 100644 --- a/internal/store/globaldb/global_db_soul_test.go +++ b/internal/store/globaldb/global_db_soul_test.go @@ -72,30 +72,7 @@ func TestGlobalDBSoulMigration(t *testing.T) { if heartbeatRecord.Version != 13 || heartbeatRecord.Name != "add_agent_heartbeat_storage" { t.Fatalf("records[12] = %#v, want add_agent_heartbeat_storage v13", heartbeatRecord) } - if records[13].Version != 14 || records[13].Name != "add_event_summary_lineage" { - t.Fatalf("records[13] = %#v, want add_event_summary_lineage v14", records[13]) - } - if records[14].Version != 15 || records[14].Name != "rebuild_event_summaries_for_global_payloads" { - t.Fatalf("records[14] = %#v, want rebuild_event_summaries_for_global_payloads v15", records[14]) - } - if records[15].Version != 16 || records[15].Name != "rename_actor_ref_columns_to_actor_id" { - t.Fatalf("records[15] = %#v, want rename_actor_ref_columns_to_actor_id v16", records[15]) - } - if records[16].Version != 17 || records[16].Name != "rebuild_network_conversation_containers" { - t.Fatalf("records[16] = %#v, want rebuild_network_conversation_containers v17", records[16]) - } - if records[17].Version != 18 || records[17].Name != "add_task_orchestration_profile_schema" { - t.Fatalf("records[17] = %#v, want add_task_orchestration_profile_schema v18", records[17]) - } - if records[18].Version != 19 || records[18].Name != "add_task_review_gate_schema" { - t.Fatalf("records[18] = %#v, want add_task_review_gate_schema v19", records[18]) - } - if records[19].Version != 20 || records[19].Name != "add_notification_cursors" { - t.Fatalf("records[19] = %#v, want add_notification_cursors v20", records[19]) - } - if records[20].Version != 21 || records[20].Name != "add_bridge_task_subscriptions" { - t.Fatalf("records[20] = %#v, want add_bridge_task_subscriptions v21", records[20]) - } + assertAppliedGlobalMigrationOrder(t, records) for _, table := range []string{"soul_snapshots", "soul_revisions"} { exists, err := tableExists(ctx, globalDB.db, table) if err != nil { diff --git a/internal/store/globaldb/global_db_test.go b/internal/store/globaldb/global_db_test.go index 8330eaed4..9f290921f 100644 --- a/internal/store/globaldb/global_db_test.go +++ b/internal/store/globaldb/global_db_test.go @@ -176,75 +176,7 @@ func TestOpenGlobalDBRecordsSchemaMigrationAndRepeatedBootIsIdempotent(t *testin if got, want := len(firstRecords), len(globalSchemaMigrations); got != want { t.Fatalf("len(firstRecords) = %d, want %d", got, want) } - if firstRecords[0].Version != 1 || firstRecords[0].Name != "create_global_schema" { - t.Fatalf("firstRecords[0] = %#v, want create_global_schema v1", firstRecords[0]) - } - if firstRecords[1].Version != 2 || firstRecords[1].Name != "add_session_failure_diagnostics" { - t.Fatalf("firstRecords[1] = %#v, want add_session_failure_diagnostics v2", firstRecords[1]) - } - if firstRecords[2].Version != 3 || firstRecords[2].Name != "add_automation_scheduler_state" { - t.Fatalf("firstRecords[2] = %#v, want add_automation_scheduler_state v3", firstRecords[2]) - } - if firstRecords[3].Version != 4 || firstRecords[3].Name != "add_mcp_auth_tokens" { - t.Fatalf("firstRecords[3] = %#v, want add_mcp_auth_tokens v4", firstRecords[3]) - } - if firstRecords[4].Version != 5 || firstRecords[4].Name != "add_tool_process_records" { - t.Fatalf("firstRecords[4] = %#v, want add_tool_process_records v5", firstRecords[4]) - } - if firstRecords[5].Version != 6 || firstRecords[5].Name != "add_memory_operation_scope" { - t.Fatalf("firstRecords[5] = %#v, want add_memory_operation_scope v6", firstRecords[5]) - } - if firstRecords[6].Version != 7 || firstRecords[6].Name != "add_task_run_claim_lease_schema" { - t.Fatalf("firstRecords[6] = %#v, want add_task_run_claim_lease_schema v7", firstRecords[6]) - } - if firstRecords[7].Version != 8 || firstRecords[7].Name != "add_session_lineage_metadata" { - t.Fatalf("firstRecords[7] = %#v, want add_session_lineage_metadata v8", firstRecords[7]) - } - if firstRecords[8].Version != 9 || firstRecords[8].Name != "rename_environment_columns_to_sandbox" { - t.Fatalf("firstRecords[8] = %#v, want rename_environment_columns_to_sandbox v9", firstRecords[8]) - } - if firstRecords[9].Version != 10 || firstRecords[9].Name != "add_vault_secrets" { - t.Fatalf("firstRecords[9] = %#v, want add_vault_secrets v10", firstRecords[9]) - } - if firstRecords[10].Version != 11 || firstRecords[10].Name != "unify_secret_refs" { - t.Fatalf("firstRecords[10] = %#v, want unify_secret_refs v11", firstRecords[10]) - } - if firstRecords[11].Version != 12 || firstRecords[11].Name != "add_agent_soul_snapshots" { - t.Fatalf("firstRecords[11] = %#v, want add_agent_soul_snapshots v12", firstRecords[11]) - } - if firstRecords[12].Version != 13 || firstRecords[12].Name != "add_agent_heartbeat_storage" { - t.Fatalf("firstRecords[12] = %#v, want add_agent_heartbeat_storage v13", firstRecords[12]) - } - if firstRecords[13].Version != 14 || firstRecords[13].Name != "add_event_summary_lineage" { - t.Fatalf("firstRecords[13] = %#v, want add_event_summary_lineage v14", firstRecords[13]) - } - if firstRecords[14].Version != 15 || firstRecords[14].Name != "rebuild_event_summaries_for_global_payloads" { - t.Fatalf( - "firstRecords[14] = %#v, want rebuild_event_summaries_for_global_payloads v15", - firstRecords[14], - ) - } - if firstRecords[15].Version != 16 || firstRecords[15].Name != "rename_actor_ref_columns_to_actor_id" { - t.Fatalf("firstRecords[15] = %#v, want rename_actor_ref_columns_to_actor_id v16", firstRecords[15]) - } - if firstRecords[16].Version != 17 || firstRecords[16].Name != "rebuild_network_conversation_containers" { - t.Fatalf("firstRecords[16] = %#v, want rebuild_network_conversation_containers v17", firstRecords[16]) - } - if firstRecords[17].Version != 18 || firstRecords[17].Name != "add_task_orchestration_profile_schema" { - t.Fatalf("firstRecords[17] = %#v, want add_task_orchestration_profile_schema v18", firstRecords[17]) - } - if firstRecords[18].Version != 19 || firstRecords[18].Name != "add_task_review_gate_schema" { - t.Fatalf("firstRecords[18] = %#v, want add_task_review_gate_schema v19", firstRecords[18]) - } - if firstRecords[19].Version != 20 || firstRecords[19].Name != "add_notification_cursors" { - t.Fatalf("firstRecords[19] = %#v, want add_notification_cursors v20", firstRecords[19]) - } - if firstRecords[20].Version != 21 || firstRecords[20].Name != "add_bridge_task_subscriptions" { - t.Fatalf("firstRecords[20] = %#v, want add_bridge_task_subscriptions v21", firstRecords[20]) - } - if firstRecords[21].Version != 22 || firstRecords[21].Name != "memv2_memory_events" { - t.Fatalf("firstRecords[21] = %#v, want memv2_memory_events v22", firstRecords[21]) - } + assertAppliedGlobalMigrationOrder(t, firstRecords) if err := first.Close(ctx); err != nil { t.Fatalf("Close(first) error = %v", err) } @@ -310,6 +242,149 @@ func TestOpenGlobalDBFailsOnSchemaMigrationIntegrityMismatch(t *testing.T) { } } +func TestGlobalSchemaMigrationsAreAppendOnlyContract(t *testing.T) { + t.Parallel() + + t.Run("Should keep known migration identities stable", func(t *testing.T) { + t.Parallel() + + assertGlobalSchemaMigrationDefinitions(t, globalSchemaMigrations) + }) +} + +type expectedGlobalMigrationIdentity struct { + version int + name string + checksum string +} + +func expectedGlobalMigrationIdentities() []expectedGlobalMigrationIdentity { + return []expectedGlobalMigrationIdentity{ + { + version: 1, + name: "create_global_schema", + checksum: "70e2c16c9d32e692891ab71d075ca823782626e7c9f6ffbbc88c5d662704e089", + }, + {version: 2, name: "add_session_failure_diagnostics", checksum: "2026-04-24-add-session-failure-diagnostics"}, + {version: 3, name: "add_automation_scheduler_state", checksum: "2026-04-24-add-automation-scheduler-state"}, + {version: 4, name: "add_mcp_auth_tokens", checksum: "2026-04-25-add-mcp-auth-tokens"}, + {version: 5, name: "add_tool_process_records", checksum: "2026-04-24-add-tool-process-records"}, + {version: 6, name: "add_memory_operation_scope", checksum: "2026-04-25-add-memory-operation-scope"}, + {version: 7, name: "add_task_run_claim_lease_schema", checksum: "2026-04-26-add-task-run-claim-lease-schema"}, + {version: 8, name: "add_session_lineage_metadata", checksum: "2026-04-26-add-session-lineage-metadata"}, + { + version: 9, + name: "rename_environment_columns_to_sandbox", + checksum: "2026-04-28-rename-environment-columns-to-sandbox", + }, + {version: 10, name: "add_vault_secrets", checksum: "2026-05-01-add-vault-secrets"}, + {version: 11, name: "unify_secret_refs", checksum: "2026-05-01-unify-secret-refs"}, + {version: 12, name: "add_agent_soul_snapshots", checksum: "2026-05-02-add-agent-soul-snapshots"}, + {version: 13, name: "add_agent_heartbeat_storage", checksum: "2026-05-02-add-agent-heartbeat-storage"}, + {version: 14, name: "add_event_summary_lineage", checksum: "2026-05-04-add-event-summary-lineage"}, + { + version: 15, + name: "rebuild_event_summaries_for_global_payloads", + checksum: "2026-05-04-rebuild-event-summaries-for-global-payloads", + }, + { + version: 16, + name: "rename_actor_ref_columns_to_actor_id", + checksum: "2026-05-04-rename-actor-ref-columns-to-actor-id", + }, + { + version: 17, + name: "add_task_orchestration_profile_schema", + checksum: "2026-05-05-add-task-orchestration-profile-schema", + }, + {version: 18, name: "add_task_review_gate_schema", checksum: "2026-05-05-add-task-review-gate-schema"}, + {version: 19, name: "add_notification_cursors", checksum: "2026-05-05-add-notification-cursors"}, + {version: 20, name: "add_bridge_task_subscriptions", checksum: "2026-05-05-add-bridge-task-subscriptions"}, + { + version: 21, + name: "rebuild_network_conversation_containers", + checksum: "2026-05-05-rebuild-network-conversation-containers", + }, + {version: 22, name: "memv2_memory_events", checksum: "2026-05-05-memv2-memory-events"}, + } +} + +func assertGlobalSchemaMigrationDefinitions(t *testing.T, migrations []store.Migration) { + t.Helper() + + want := expectedGlobalMigrationIdentities() + if got := len(migrations); got != len(want) { + t.Fatalf("globalSchemaMigrations length = %d, want %d", got, len(want)) + } + for index, expected := range want { + got := migrations[index] + if got.Version != expected.version || got.Name != expected.name || got.Checksum != expected.checksum { + t.Fatalf( + "globalSchemaMigrations[%d] = version %d name %q checksum %q, want version %d name %q checksum %q", + index, + got.Version, + got.Name, + got.Checksum, + expected.version, + expected.name, + expected.checksum, + ) + } + } +} + +func assertAppliedGlobalMigrationOrder(t *testing.T, records []store.MigrationRecord) { + t.Helper() + + want := expectedGlobalMigrationIdentities() + if got := len(records); got != len(want) { + t.Fatalf("schema_migrations length = %d, want %d", got, len(want)) + } + for index, expected := range want { + got := records[index] + if got.Version != expected.version || got.Name != expected.name || got.Checksum != expected.checksum { + t.Fatalf( + "schema_migrations[%d] = version %d name %q checksum %q, want version %d name %q checksum %q", + index, + got.Version, + got.Name, + got.Checksum, + expected.version, + expected.name, + expected.checksum, + ) + } + } +} + +func assertAppliedGlobalMigrationPrefix(t *testing.T, records []store.MigrationRecord, length int) { + t.Helper() + + want := expectedGlobalMigrationIdentities() + if length < 0 || length > len(want) { + t.Fatalf("migration prefix length = %d, want 0..%d", length, len(want)) + } + if got := len(records); got != length { + t.Fatalf("schema_migrations prefix length = %d, want %d", got, length) + } + for index := range records { + expected := want[index] + got := records[index] + if got.Version != expected.version || got.Name != expected.name || got.Checksum != expected.checksum { + t.Fatalf( + "schema_migrations[%d] = version %d name %q checksum %q, want version %d name %q checksum %q", + index, + got.Version, + got.Name, + got.Checksum, + expected.version, + expected.name, + expected.checksum, + ) + } + } +} + func TestOpenGlobalDBCreatesExtensionsTableWithExpectedColumns(t *testing.T) { t.Parallel() From 0ff846d4eff08e8e782365d51ff25423f9f1ae7d Mon Sep 17 00:00:00 2001 From: Pedro Nauck Date: Thu, 7 May 2026 03:00:13 -0300 Subject: [PATCH 02/13] refactor: hard cut provider model config --- config.toml | 12 +- internal/api/contract/contract.go | 35 +- internal/api/contract/contract_test.go | 63 ++- internal/api/contract/settings.go | 29 +- internal/api/core/conversions.go | 118 ++++- internal/api/core/conversions_parsers_test.go | 24 +- internal/api/core/handlers.go | 16 +- internal/api/core/memory_workspace_test.go | 3 +- internal/api/core/session_workspace.go | 35 ++ .../core/session_workspace_internal_test.go | 37 ++ internal/api/core/settings.go | 70 ++- internal/api/core/settings_internal_test.go | 6 + internal/api/core/settings_test.go | 55 +- internal/api/core/workspaces.go | 5 +- .../transport_parity_integration_test.go | 2 +- internal/api/udsapi/handlers_test.go | 3 +- .../transport_parity_integration_test.go | 3 +- internal/cli/config.go | 31 +- internal/cli/config_test.go | 12 + internal/cli/install.go | 6 +- internal/cli/install_test.go | 10 +- internal/cli/session.go | 38 +- internal/config/autonomy.go | 4 +- internal/config/autonomy_test.go | 7 +- internal/config/bootstrap.go | 2 +- internal/config/bootstrap_test.go | 12 +- internal/config/config.go | 14 +- internal/config/config_test.go | 40 +- internal/config/merge.go | 129 ++++- internal/config/perf_bench_test.go | 6 +- internal/config/persistence_test.go | 22 +- internal/config/provider.go | 460 ++++++++++++++-- internal/config/provider_test.go | 427 ++++++++++++++- internal/config/tool_surface.go | 11 +- internal/extension/contract/host_api.go | 10 +- internal/extension/host_api.go | 10 +- internal/session/manager.go | 3 + internal/session/manager_start.go | 33 ++ internal/session/manager_test.go | 71 +++ internal/session/query.go | 1 + internal/session/query_test.go | 28 +- internal/session/runtime_overrides.go | 27 + internal/session/session.go | 4 + internal/settings/collections.go | 91 +++- internal/settings/models.go | 95 +++- internal/settings/service_test.go | 70 ++- internal/situation/service.go | 2 +- internal/store/types.go | 1 + internal/testutil/e2e/config_seed_test.go | 6 +- internal/workspace/clone.go | 88 ++- internal/workspace/resolver_test.go | 6 +- openapi/agh.json | 499 +++++++++++++++++- sdk/typescript/src/generated/contracts.ts | 2 + .../session-provider-override.spec.ts | 3 +- .../fixtures/__tests__/runtime-seed.test.ts | 2 +- web/src/generated/agh-openapi.d.ts | 159 +++++- .../use-settings-providers-page.test.tsx | 61 ++- .../routes/use-settings-providers-page.ts | 37 +- web/src/routes/_app.tsx | 7 + .../settings/__tests__/-providers.test.tsx | 32 +- web/src/routes/_app/settings/providers.tsx | 22 +- .../__tests__/session-create-dialog.test.tsx | 128 +++-- .../components/model-command-select.tsx | 169 ++++++ .../components/reasoning-command-select.tsx | 129 +++++ .../components/session-create-dialog.tsx | 114 ++-- .../use-session-create-dialog.test.tsx | 27 + .../hooks/use-session-create-dialog.ts | 81 ++- .../adapters/__tests__/settings-api.test.ts | 10 +- .../settings/components/provider-card.tsx | 37 +- web/src/systems/settings/mocks/fixtures.ts | 33 +- .../components/provider-command-list.tsx | 119 +++++ .../components/provider-command-select.tsx | 75 +++ web/src/systems/workspace/index.ts | 4 + web/src/systems/workspace/mocks/fixtures.ts | 5 - 74 files changed, 3667 insertions(+), 381 deletions(-) create mode 100644 internal/session/runtime_overrides.go create mode 100644 web/src/systems/session/components/model-command-select.tsx create mode 100644 web/src/systems/session/components/reasoning-command-select.tsx create mode 100644 web/src/systems/workspace/components/provider-command-list.tsx create mode 100644 web/src/systems/workspace/components/provider-command-select.tsx diff --git a/config.toml b/config.toml index cebe1a0fe..c5218d6b8 100644 --- a/config.toml +++ b/config.toml @@ -29,14 +29,14 @@ external_default = "disabled" approval_timeout_seconds = 120 trusted_sources = [] -[providers.claude] -default_model = "claude-sonnet-4-6" +[providers.claude.models] +default = "claude-sonnet-4-6" -[providers.codex] -default_model = "gpt-5.4" +[providers.codex.models] +default = "gpt-5.4" -[providers.gemini] -default_model = "gemini-3.1-pro-preview" +[providers.gemini.models] +default = "gemini-3.1-pro-preview" [observability] enabled = true diff --git a/internal/api/contract/contract.go b/internal/api/contract/contract.go index e9dd9d64d..f58c358e3 100644 --- a/internal/api/contract/contract.go +++ b/internal/api/contract/contract.go @@ -14,12 +14,14 @@ import ( // CreateSessionRequest is the shared session creation request payload. type CreateSessionRequest struct { - AgentName string `json:"agent_name,omitempty"` - Provider string `json:"provider,omitempty"` - Name string `json:"name,omitempty"` - Workspace string `json:"workspace,omitempty"` - WorkspacePath string `json:"workspace_path,omitempty"` - Channel string `json:"channel,omitempty"` + AgentName string `json:"agent_name,omitempty"` + Provider string `json:"provider,omitempty"` + Model string `json:"model,omitempty"` + ReasoningEffort string `json:"reasoning_effort,omitempty"` + Name string `json:"name,omitempty"` + Workspace string `json:"workspace,omitempty"` + WorkspacePath string `json:"workspace_path,omitempty"` + Channel string `json:"channel,omitempty"` } // ApproveSessionRequest is the interactive permission approval payload. @@ -31,15 +33,17 @@ type ApproveSessionRequest struct { // SessionPayload is the shared session response payload. type SessionPayload struct { - ID string `json:"id"` - Name string `json:"name,omitempty"` - AgentName string `json:"agent_name"` - Provider string `json:"provider"` - WorkspaceID string `json:"workspace_id,omitempty"` - WorkspacePath string `json:"workspace_path,omitempty"` - Channel string `json:"channel,omitempty"` - Type session.Type `json:"type,omitempty"` - State session.State `json:"state"` + ID string `json:"id"` + Name string `json:"name,omitempty"` + AgentName string `json:"agent_name"` + Provider string `json:"provider"` + Model string `json:"model,omitempty"` + ReasoningEffort string `json:"reasoning_effort,omitempty"` + WorkspaceID string `json:"workspace_id,omitempty"` + WorkspacePath string `json:"workspace_path,omitempty"` + Channel string `json:"channel,omitempty"` + Type session.Type `json:"type,omitempty"` + State session.State `json:"state"` // StopReason is the session-level stop classification, distinct from AgentEventPayload.StopReason. StopReason store.StopReason `json:"stop_reason,omitempty"` // StopDetail is the session-level stop context paired with StopReason. @@ -914,7 +918,6 @@ type SessionProviderOptionPayload struct { DisplayName string `json:"display_name,omitempty"` Harness string `json:"harness,omitempty"` RuntimeProvider string `json:"runtime_provider,omitempty"` - DefaultModel string `json:"default_model,omitempty"` AuthMode string `json:"auth_mode,omitempty"` EnvPolicy string `json:"env_policy,omitempty"` HomePolicy string `json:"home_policy,omitempty"` diff --git a/internal/api/contract/contract_test.go b/internal/api/contract/contract_test.go index 4ef461b57..e5b0ea4f1 100644 --- a/internal/api/contract/contract_test.go +++ b/internal/api/contract/contract_test.go @@ -23,14 +23,16 @@ func TestSessionPayloadJSONShape(t *testing.T) { now := time.Date(2026, 4, 7, 10, 30, 0, 0, time.UTC) ttl := now.Add(time.Hour) payload := core.SessionPayloadFromInfo(&session.Info{ - ID: "sess-1", - Name: "demo", - AgentName: "coder", - Provider: "fake", - WorkspaceID: "ws_alpha", - Workspace: "/workspace", - State: session.StateActive, - ACPSessionID: "acp-123", + ID: "sess-1", + Name: "demo", + AgentName: "coder", + Provider: "fake", + Model: "gpt-test", + ReasoningEffort: "high", + WorkspaceID: "ws_alpha", + Workspace: "/workspace", + State: session.StateActive, + ACPSessionID: "acp-123", Lineage: &store.SessionLineage{ RootSessionID: "sess-1", SpawnDepth: 0, @@ -59,6 +61,8 @@ func TestSessionPayloadJSONShape(t *testing.T) { if got["agent_name"] != "coder" || got["provider"] != "fake" || + got["model"] != "gpt-test" || + got["reasoning_effort"] != "high" || got["workspace_id"] != "ws_alpha" || got["workspace_path"] != "/workspace" { t.Fatalf("session JSON = %#v", got) @@ -216,6 +220,49 @@ func TestCreateSessionRequestJSONShape(t *testing.T) { t.Fatalf("request = %#v", req) } }) + + t.Run("Should round-trip model and reasoning_effort overrides", func(t *testing.T) { + t.Parallel() + + req := contract.CreateSessionRequest{ + AgentName: "coder", + Provider: "codex", + Model: "gpt-5.4", + ReasoningEffort: "high", + Workspace: "alpha", + } + raw, err := json.Marshal(req) + if err != nil { + t.Fatalf("json.Marshal() error = %v", err) + } + var decoded contract.CreateSessionRequest + if err := json.Unmarshal(raw, &decoded); err != nil { + t.Fatalf("json.Unmarshal() error = %v", err) + } + if decoded.Model != "gpt-5.4" || decoded.ReasoningEffort != "high" { + t.Fatalf("decoded = %#v", decoded) + } + var shape map[string]any + if err := json.Unmarshal(raw, &shape); err != nil { + t.Fatalf("json.Unmarshal(map) error = %v", err) + } + if shape["model"] != "gpt-5.4" || shape["reasoning_effort"] != "high" { + t.Fatalf("shape = %#v", shape) + } + }) + + t.Run("Should omit model and reasoning_effort cleanly when absent", func(t *testing.T) { + t.Parallel() + + req := contract.CreateSessionRequest{AgentName: "coder", Workspace: "alpha"} + raw, err := json.Marshal(req) + if err != nil { + t.Fatalf("json.Marshal() error = %v", err) + } + if strings.Contains(string(raw), "model") || strings.Contains(string(raw), "reasoning_effort") { + t.Fatalf("raw = %s", string(raw)) + } + }) } func TestMemoryV2PublicContractJSONShape(t *testing.T) { diff --git a/internal/api/contract/settings.go b/internal/api/contract/settings.go index ff6aef4c6..eee014726 100644 --- a/internal/api/contract/settings.go +++ b/internal/api/contract/settings.go @@ -539,7 +539,7 @@ type SettingsSourceMetadataPayload struct { type SettingsProviderSettingsPayload struct { Command string `json:"command,omitempty"` DisplayName string `json:"display_name,omitempty"` - DefaultModel string `json:"default_model,omitempty"` + Models *SettingsProviderModelsPayload `json:"models,omitempty"` Harness string `json:"harness,omitempty"` RuntimeProvider string `json:"runtime_provider,omitempty"` Transport string `json:"transport,omitempty"` @@ -552,6 +552,33 @@ type SettingsProviderSettingsPayload struct { CredentialSlots []SettingsProviderCredentialSlotPayload `json:"credential_slots,omitempty"` } +type SettingsProviderModelsPayload struct { + Default string `json:"default,omitempty"` + Curated []SettingsProviderModelPayload `json:"curated,omitempty"` + Discovery *SettingsProviderModelsDiscoveryPayload `json:"discovery,omitempty"` +} + +type SettingsProviderModelsDiscoveryPayload struct { + Enabled *bool `json:"enabled,omitempty"` + Command string `json:"command,omitempty"` + Endpoint string `json:"endpoint,omitempty"` + Timeout string `json:"timeout,omitempty"` +} + +type SettingsProviderModelPayload struct { + ID string `json:"id"` + DisplayName string `json:"display_name,omitempty"` + ContextWindow *int64 `json:"context_window,omitempty"` + MaxInputTokens *int64 `json:"max_input_tokens,omitempty"` + MaxOutputTokens *int64 `json:"max_output_tokens,omitempty"` + SupportsTools *bool `json:"supports_tools,omitempty"` + SupportsReasoning *bool `json:"supports_reasoning,omitempty"` + ReasoningEfforts []string `json:"reasoning_efforts,omitempty"` + DefaultReasoningEffort string `json:"default_reasoning_effort,omitempty"` + CostInputPerMillion *float64 `json:"cost_input_per_million,omitempty"` + CostOutputPerMillion *float64 `json:"cost_output_per_million,omitempty"` +} + type SettingsProviderCredentialSlotPayload struct { Name string `json:"name"` TargetEnv string `json:"target_env"` diff --git a/internal/api/core/conversions.go b/internal/api/core/conversions.go index c58523c55..13e3d3d80 100644 --- a/internal/api/core/conversions.go +++ b/internal/api/core/conversions.go @@ -43,22 +43,24 @@ func SessionPayloadFromInfo(info *session.Info) contract.SessionPayload { ref := workref.NewPath(info.WorkspaceID, info.Workspace) payload = contract.SessionPayload{ - ID: info.ID, - Name: info.Name, - AgentName: info.AgentName, - Provider: info.Provider, - WorkspaceID: ref.WorkspaceID, - WorkspacePath: ref.WorkspacePath, - Channel: info.Channel, - Type: info.Type, - State: info.State, - StopReason: info.StopReason, - StopDetail: info.StopDetail, - Failure: SessionFailurePayloadFromStore(info.Failure), - ACPSessionID: info.ACPSessionID, - Lineage: contract.SessionLineagePayloadFromStore(info.Lineage), - CreatedAt: info.CreatedAt, - UpdatedAt: info.UpdatedAt, + ID: info.ID, + Name: info.Name, + AgentName: info.AgentName, + Provider: info.Provider, + Model: strings.TrimSpace(info.Model), + ReasoningEffort: strings.TrimSpace(info.ReasoningEffort), + WorkspaceID: ref.WorkspaceID, + WorkspacePath: ref.WorkspacePath, + Channel: info.Channel, + Type: info.Type, + State: info.State, + StopReason: info.StopReason, + StopDetail: info.StopDetail, + Failure: SessionFailurePayloadFromStore(info.Failure), + ACPSessionID: info.ACPSessionID, + Lineage: contract.SessionLineagePayloadFromStore(info.Lineage), + CreatedAt: info.CreatedAt, + UpdatedAt: info.UpdatedAt, } if caps := ACPCapsPayloadFromInfo(info.ACPCaps); caps != nil { payload.ACPCaps = caps @@ -1031,7 +1033,6 @@ func sessionProviderOptionPayloadFromConfig( DisplayName: strings.TrimSpace(resolved.DisplayName), Harness: string(resolved.EffectiveHarness()), RuntimeProvider: strings.TrimSpace(resolved.RuntimeProviderName(providerName)), - DefaultModel: strings.TrimSpace(resolved.DefaultModel), AuthMode: string(resolved.EffectiveAuthMode()), EnvPolicy: string(resolved.EffectiveEnvPolicy()), HomePolicy: string(resolved.EffectiveHomePolicy()), @@ -1922,13 +1923,16 @@ func settingsProviderItemPayloads(values []settingspkg.ProviderItem) []contract. return nil } payloads := make([]contract.SettingsProviderItemPayload, 0, len(values)) - for _, value := range values { - payloads = append(payloads, settingsProviderItemPayload(value)) + for idx := range values { + payloads = append(payloads, settingsProviderItemPayload(&values[idx])) } return payloads } -func settingsProviderItemPayload(value settingspkg.ProviderItem) contract.SettingsProviderItemPayload { +func settingsProviderItemPayload(value *settingspkg.ProviderItem) contract.SettingsProviderItemPayload { + if value == nil { + return contract.SettingsProviderItemPayload{} + } payload := contract.SettingsProviderItemPayload{ Name: strings.TrimSpace(value.Name), Settings: settingsProviderSettingsPayload(value.Settings), @@ -1951,7 +1955,7 @@ func settingsProviderSettingsPayload(value settingspkg.ProviderSettings) contrac return contract.SettingsProviderSettingsPayload{ Command: strings.TrimSpace(value.Command), DisplayName: strings.TrimSpace(value.DisplayName), - DefaultModel: strings.TrimSpace(value.DefaultModel), + Models: settingsProviderModelsPayload(value.Models), Harness: string(value.Harness), RuntimeProvider: strings.TrimSpace(value.RuntimeProvider), Transport: strings.TrimSpace(value.Transport), @@ -1965,6 +1969,70 @@ func settingsProviderSettingsPayload(value settingspkg.ProviderSettings) contrac } } +func settingsProviderModelsPayload( + value aghconfig.ProviderModelsConfig, +) *contract.SettingsProviderModelsPayload { + if providerModelsConfigIsEmpty(value) { + return nil + } + return &contract.SettingsProviderModelsPayload{ + Default: strings.TrimSpace(value.Default), + Curated: settingsProviderModelPayloads(value.Curated), + Discovery: settingsProviderModelsDiscoveryPayload(value.Discovery), + } +} + +func settingsProviderModelsDiscoveryPayload( + value aghconfig.ProviderModelsDiscoveryConfig, +) *contract.SettingsProviderModelsDiscoveryPayload { + if value.Enabled == nil && + strings.TrimSpace(value.Command) == "" && + strings.TrimSpace(value.Endpoint) == "" && + strings.TrimSpace(value.Timeout) == "" { + return nil + } + return &contract.SettingsProviderModelsDiscoveryPayload{ + Enabled: cloneBoolPtr(value.Enabled), + Command: strings.TrimSpace(value.Command), + Endpoint: strings.TrimSpace(value.Endpoint), + Timeout: strings.TrimSpace(value.Timeout), + } +} + +func settingsProviderModelPayloads( + values []aghconfig.ProviderModelConfig, +) []contract.SettingsProviderModelPayload { + if values == nil { + return nil + } + payloads := make([]contract.SettingsProviderModelPayload, 0, len(values)) + for _, value := range values { + payloads = append(payloads, contract.SettingsProviderModelPayload{ + ID: strings.TrimSpace(value.ID), + DisplayName: strings.TrimSpace(value.DisplayName), + ContextWindow: cloneInt64Ptr(value.ContextWindow), + MaxInputTokens: cloneInt64Ptr(value.MaxInputTokens), + MaxOutputTokens: cloneInt64Ptr(value.MaxOutputTokens), + SupportsTools: cloneBoolPtr(value.SupportsTools), + SupportsReasoning: cloneBoolPtr(value.SupportsReasoning), + ReasoningEfforts: cloneStrings(value.ReasoningEfforts), + DefaultReasoningEffort: strings.TrimSpace(value.DefaultReasoningEffort), + CostInputPerMillion: cloneFloat64Ptr(value.CostInputPerMillion), + CostOutputPerMillion: cloneFloat64Ptr(value.CostOutputPerMillion), + }) + } + return payloads +} + +func providerModelsConfigIsEmpty(value aghconfig.ProviderModelsConfig) bool { + return strings.TrimSpace(value.Default) == "" && + value.Curated == nil && + value.Discovery.Enabled == nil && + strings.TrimSpace(value.Discovery.Command) == "" && + strings.TrimSpace(value.Discovery.Endpoint) == "" && + strings.TrimSpace(value.Discovery.Timeout) == "" +} + func settingsProviderCredentialSlotPayloads( values []aghconfig.ProviderCredentialSlot, ) []contract.SettingsProviderCredentialSlotPayload { @@ -2248,6 +2316,14 @@ func cloneStrings(src []string) []string { return append([]string(nil), src...) } +func cloneBoolPtr(src *bool) *bool { + if src == nil { + return nil + } + value := *src + return &value +} + func resourceKindsToStrings(values []resources.ResourceKind) []string { if len(values) == 0 { return nil diff --git a/internal/api/core/conversions_parsers_test.go b/internal/api/core/conversions_parsers_test.go index a89dc02e6..8f37adbf8 100644 --- a/internal/api/core/conversions_parsers_test.go +++ b/internal/api/core/conversions_parsers_test.go @@ -27,14 +27,16 @@ func TestSessionPayloadFromInfo(t *testing.T) { now := time.Date(2026, 4, 3, 12, 0, 0, 0, time.UTC) ttl := now.Add(time.Hour) payload := core.SessionPayloadFromInfo(&session.Info{ - ID: "sess-1", - Name: "demo", - AgentName: "coder", - Provider: "fake", - WorkspaceID: "ws_alpha", - Workspace: "/workspace", - Channel: "builders", - Type: session.SessionTypeDream, + ID: "sess-1", + Name: "demo", + AgentName: "coder", + Provider: "fake", + Model: "gpt-test", + ReasoningEffort: "high", + WorkspaceID: "ws_alpha", + Workspace: "/workspace", + Channel: "builders", + Type: session.SessionTypeDream, Lineage: &store.SessionLineage{ ParentSessionID: "sess-root", RootSessionID: "sess-root", @@ -92,6 +94,12 @@ func TestSessionPayloadFromInfo(t *testing.T) { if payload.Provider != "fake" { t.Fatalf("payload.Provider = %q, want %q", payload.Provider, "fake") } + if payload.Model != "gpt-test" { + t.Fatalf("payload.Model = %q, want %q", payload.Model, "gpt-test") + } + if payload.ReasoningEffort != "high" { + t.Fatalf("payload.ReasoningEffort = %q, want %q", payload.ReasoningEffort, "high") + } if payload.State != session.StateActive || payload.ACPSessionID != "acp-123" { t.Fatalf("payload session fields = %#v", payload) } diff --git a/internal/api/core/handlers.go b/internal/api/core/handlers.go index e0133930d..aae0cbf1c 100644 --- a/internal/api/core/handlers.go +++ b/internal/api/core/handlers.go @@ -326,13 +326,15 @@ func (h *BaseHandlers) CreateSession(c *gin.Context) { } sess, err := h.Sessions.Create(c.Request.Context(), session.CreateOpts{ - AgentName: req.AgentName, - Provider: strings.TrimSpace(req.Provider), - Name: req.Name, - Workspace: strings.TrimSpace(req.Workspace), - WorkspacePath: strings.TrimSpace(req.WorkspacePath), - Channel: channel, - Type: session.SessionTypeUser, + AgentName: req.AgentName, + Provider: strings.TrimSpace(req.Provider), + Model: strings.TrimSpace(req.Model), + ReasoningEffort: strings.TrimSpace(req.ReasoningEffort), + Name: req.Name, + Workspace: strings.TrimSpace(req.Workspace), + WorkspacePath: strings.TrimSpace(req.WorkspacePath), + Channel: channel, + Type: session.SessionTypeUser, }) if err != nil { h.respondError(c, StatusForSessionError(err), err) diff --git a/internal/api/core/memory_workspace_test.go b/internal/api/core/memory_workspace_test.go index 559f2d795..c8677c542 100644 --- a/internal/api/core/memory_workspace_test.go +++ b/internal/api/core/memory_workspace_test.go @@ -8,6 +8,7 @@ import ( "net/url" "os" "path/filepath" + "reflect" "strings" "testing" "time" @@ -884,7 +885,7 @@ func TestWorkspaceHandlersDelegateToService(t *testing.T) { t.Fatalf("len(providers) = %d, want %d (%#v)", got, want, getPayload.Providers) } for i, want := range expectedProviders { - if got := getPayload.Providers[i]; got != want { + if got := getPayload.Providers[i]; !reflect.DeepEqual(got, want) { t.Fatalf("providers[%d] = %#v, want %#v", i, got, want) } } diff --git a/internal/api/core/session_workspace.go b/internal/api/core/session_workspace.go index 8ff8f08a1..be4b84df9 100644 --- a/internal/api/core/session_workspace.go +++ b/internal/api/core/session_workspace.go @@ -36,6 +36,27 @@ func validateCreateSessionRequest(prefix string, workspaceRef string, workspaceP } } +// validateCreateSessionRuntimeOverrides enforces the model + reasoning_effort +// invariants for create-session payloads. Provider must be set when either +// override is present, and reasoning_effort must match the supported enum. +func validateCreateSessionRuntimeOverrides(prefix string, provider string, model string, reasoningEffort string) error { + trimmedProvider := strings.TrimSpace(provider) + trimmedModel := strings.TrimSpace(model) + trimmedEffort := strings.TrimSpace(reasoningEffort) + if trimmedModel != "" && trimmedProvider == "" { + return prefixedRuntimeOverrideError(prefix, "provider is required when model is set") + } + if trimmedEffort != "" { + if trimmedProvider == "" { + return prefixedRuntimeOverrideError(prefix, "provider is required when reasoning_effort is set") + } + if err := session.ValidateReasoningEffort(trimmedEffort); err != nil { + return prefixedRuntimeOverrideErr(prefix, err) + } + } + return nil +} + // LookupWorkspaceID resolves a workspace reference into a stable workspace ID. func lookupWorkspaceID(ctx context.Context, prefix string, workspaces WorkspaceGetter, ref string) (string, error) { if workspaces == nil { @@ -132,6 +153,8 @@ func statusForSessionError(err error) int { return http.StatusBadRequest case errors.Is(err, aghconfig.ErrProviderUnavailable): return http.StatusBadRequest + case errors.Is(err, session.ErrInvalidRuntimeOverride): + return http.StatusBadRequest case errors.Is(err, session.ErrSessionNotActive): return http.StatusBadRequest case errors.Is(err, session.ErrMaxSessionsReached), @@ -153,3 +176,15 @@ func prefixedError(prefix string, message string) error { } return fmt.Errorf("%s: %s", label, message) } + +func prefixedRuntimeOverrideError(prefix string, message string) error { + return fmt.Errorf("%w: %w", session.ErrInvalidRuntimeOverride, prefixedError(prefix, message)) +} + +func prefixedRuntimeOverrideErr(prefix string, err error) error { + label := strings.TrimSpace(prefix) + if label == "" { + return err + } + return fmt.Errorf("%s: %w", label, err) +} diff --git a/internal/api/core/session_workspace_internal_test.go b/internal/api/core/session_workspace_internal_test.go index 5e7eef22f..34af53402 100644 --- a/internal/api/core/session_workspace_internal_test.go +++ b/internal/api/core/session_workspace_internal_test.go @@ -40,6 +40,40 @@ func TestSessionWorkspaceHelpers(t *testing.T) { } }) + t.Run("validate create session runtime overrides", func(t *testing.T) { + t.Parallel() + + if err := validateCreateSessionRuntimeOverrides("core-test", "", "gpt-5.4", ""); !errors.Is( + err, + session.ErrInvalidRuntimeOverride, + ) { + t.Fatalf("validateCreateSessionRuntimeOverrides(model) error = %v, want ErrInvalidRuntimeOverride", err) + } + if err := validateCreateSessionRuntimeOverrides("core-test", "", "", "high"); !errors.Is( + err, + session.ErrInvalidRuntimeOverride, + ) { + t.Fatalf( + "validateCreateSessionRuntimeOverrides(reasoning provider) error = %v, want ErrInvalidRuntimeOverride", + err, + ) + } + if err := validateCreateSessionRuntimeOverrides( + "core-test", + "codex", + "", + "unsupported", + ); !errors.Is(err, session.ErrInvalidRuntimeOverride) { + t.Fatalf( + "validateCreateSessionRuntimeOverrides(reasoning enum) error = %v, want ErrInvalidRuntimeOverride", + err, + ) + } + if err := validateCreateSessionRuntimeOverrides("core-test", "codex", "gpt-5.4", "high"); err != nil { + t.Fatalf("validateCreateSessionRuntimeOverrides(valid) error = %v", err) + } + }) + t.Run("lookup workspace id", func(t *testing.T) { t.Parallel() @@ -136,6 +170,9 @@ func TestSessionWorkspaceStatusMappings(t *testing.T) { if got := statusForSessionError(aghconfig.ErrProviderUnavailable); got != http.StatusBadRequest { t.Fatalf("statusForSessionError(provider unavailable) = %d, want %d", got, http.StatusBadRequest) } + if got := statusForSessionError(session.ErrInvalidRuntimeOverride); got != http.StatusBadRequest { + t.Fatalf("statusForSessionError(invalid runtime override) = %d, want %d", got, http.StatusBadRequest) + } if got := statusForSessionError(session.ErrSessionNotActive); got != http.StatusBadRequest { t.Fatalf("statusForSessionError(not active) = %d, want %d", got, http.StatusBadRequest) } diff --git a/internal/api/core/settings.go b/internal/api/core/settings.go index 9944268f4..193f2e811 100644 --- a/internal/api/core/settings.go +++ b/internal/api/core/settings.go @@ -469,7 +469,7 @@ func (h *BaseHandlers) getSettingsCollectionItem(c *gin.Context, collection sett h.respondError(c, StatusForSettingsError(notFound), notFound) return } - c.JSON(http.StatusOK, contract.SettingsProviderResponse{Provider: settingsProviderItemPayload(item)}) + c.JSON(http.StatusOK, contract.SettingsProviderResponse{Provider: settingsProviderItemPayload(&item)}) case settingspkg.CollectionSandboxes: item, found := findSettingsSandbox(envelope.Sandboxes, name) if !found { @@ -797,7 +797,8 @@ func parsePutSettingsProviderRequest(c *gin.Context) (settingspkg.CollectionItem settings := settingspkg.ProviderSettings{ Command: strings.TrimSpace(body.Settings.Command), DisplayName: strings.TrimSpace(body.Settings.DisplayName), - DefaultModel: strings.TrimSpace(body.Settings.DefaultModel), + Models: providerModelsFromPayload(body.Settings.Models), + ModelsSet: body.Settings.Models != nil, Harness: aghconfig.ProviderHarness(strings.TrimSpace(body.Settings.Harness)), RuntimeProvider: strings.TrimSpace(body.Settings.RuntimeProvider), Transport: strings.TrimSpace(body.Settings.Transport), @@ -820,7 +821,7 @@ func parsePutSettingsProviderRequest(c *gin.Context) (settingspkg.CollectionItem func providerSettingsPayloadEmpty(payload contract.SettingsProviderSettingsPayload) bool { return strings.TrimSpace(payload.Command) == "" && strings.TrimSpace(payload.DisplayName) == "" && - strings.TrimSpace(payload.DefaultModel) == "" && + payload.Models == nil && strings.TrimSpace(payload.Harness) == "" && strings.TrimSpace(payload.RuntimeProvider) == "" && strings.TrimSpace(payload.Transport) == "" && @@ -852,6 +853,64 @@ func providerCredentialSlotsFromPayload( return slots } +func providerModelsFromPayload(payload *contract.SettingsProviderModelsPayload) aghconfig.ProviderModelsConfig { + if payload == nil { + return aghconfig.ProviderModelsConfig{} + } + return aghconfig.ProviderModelsConfig{ + Default: strings.TrimSpace(payload.Default), + Curated: providerModelConfigsFromPayload(payload.Curated), + Discovery: providerModelsDiscoveryFromPayload(payload.Discovery), + } +} + +func providerModelsDiscoveryFromPayload( + payload *contract.SettingsProviderModelsDiscoveryPayload, +) aghconfig.ProviderModelsDiscoveryConfig { + if payload == nil { + return aghconfig.ProviderModelsDiscoveryConfig{} + } + return aghconfig.ProviderModelsDiscoveryConfig{ + Enabled: cloneBoolPtr(payload.Enabled), + Command: strings.TrimSpace(payload.Command), + Endpoint: strings.TrimSpace(payload.Endpoint), + Timeout: strings.TrimSpace(payload.Timeout), + } +} + +func providerModelConfigsFromPayload( + payloads []contract.SettingsProviderModelPayload, +) []aghconfig.ProviderModelConfig { + if payloads == nil { + return nil + } + models := make([]aghconfig.ProviderModelConfig, 0, len(payloads)) + for _, payload := range payloads { + models = append(models, aghconfig.ProviderModelConfig{ + ID: strings.TrimSpace(payload.ID), + DisplayName: strings.TrimSpace(payload.DisplayName), + ContextWindow: cloneInt64Ptr(payload.ContextWindow), + MaxInputTokens: cloneInt64Ptr(payload.MaxInputTokens), + MaxOutputTokens: cloneInt64Ptr(payload.MaxOutputTokens), + SupportsTools: cloneBoolPtr(payload.SupportsTools), + SupportsReasoning: cloneBoolPtr(payload.SupportsReasoning), + ReasoningEfforts: trimStringSliceInternal(payload.ReasoningEfforts), + DefaultReasoningEffort: strings.TrimSpace(payload.DefaultReasoningEffort), + CostInputPerMillion: cloneFloat64Ptr(payload.CostInputPerMillion), + CostOutputPerMillion: cloneFloat64Ptr(payload.CostOutputPerMillion), + }) + } + return models +} + +func cloneFloat64Ptr(value *float64) *float64 { + if value == nil { + return nil + } + cloned := *value + return &cloned +} + func providerSecretWritesFromPayload( payloads []contract.SettingsProviderSecretWritePayload, ) []settingspkg.ProviderSecretWrite { @@ -1658,9 +1717,10 @@ func (h *BaseHandlers) drainSettingsLogTail( } func findSettingsProvider(values []settingspkg.ProviderItem, name string) (settingspkg.ProviderItem, bool) { - for _, value := range values { + for idx := range values { + value := &values[idx] if strings.TrimSpace(value.Name) == name { - return value, true + return *value, true } } return settingspkg.ProviderItem{}, false diff --git a/internal/api/core/settings_internal_test.go b/internal/api/core/settings_internal_test.go index fc23aef88..bc93a238a 100644 --- a/internal/api/core/settings_internal_test.go +++ b/internal/api/core/settings_internal_test.go @@ -56,6 +56,12 @@ func TestSettingsHelperFunctionsAndNilErrorWrappers(t *testing.T) { t.Fatal("findSettingsSandbox() = false, want true") } + if providerSettingsPayloadEmpty(contract.SettingsProviderSettingsPayload{ + Models: &contract.SettingsProviderModelsPayload{}, + }) { + t.Fatal("providerSettingsPayloadEmpty(empty models payload) = true, want false") + } + fireLimit := automationFireLimitFromPayload(automationmodel.FireLimitConfig{Max: 5, Window: "1m"}) if fireLimit.Max != 5 || fireLimit.Window != "1m" { t.Fatalf("automationFireLimitFromPayload() = %#v", fireLimit) diff --git a/internal/api/core/settings_test.go b/internal/api/core/settings_test.go index 67be94ab6..bd3c46a1b 100644 --- a/internal/api/core/settings_test.go +++ b/internal/api/core/settings_test.go @@ -623,8 +623,10 @@ func TestSettingsSectionAndCollectionConversions(t *testing.T) { { Name: "openai", Settings: settingspkg.ProviderSettings{ - Command: "codex", - DefaultModel: "gpt-5.4", + Command: "codex", + Models: aghconfig.ProviderModelsConfig{ + Default: "gpt-5.4", + }, CredentialSlots: []aghconfig.ProviderCredentialSlot{ { Name: "api_key", @@ -663,8 +665,10 @@ func TestSettingsSectionAndCollectionConversions(t *testing.T) { Scope: settingspkg.ScopeGlobal, }, Settings: settingspkg.ProviderSettings{ - Command: "codex", - DefaultModel: "gpt-5.4", + Command: "codex", + Models: aghconfig.ProviderModelsConfig{ + Default: "gpt-5.4", + }, }, }, }, @@ -1258,8 +1262,20 @@ func TestSettingsCollectionHandlersDelegateValidPayloads(t *testing.T) { path: "/api/settings/providers/openai", body: contract.PutSettingsProviderRequest{ Settings: contract.SettingsProviderSettingsPayload{ - Command: "codex", - DefaultModel: "gpt-5.4", + Command: "codex", + Models: &contract.SettingsProviderModelsPayload{ + Default: "gpt-5.4", + Curated: []contract.SettingsProviderModelPayload{ + { + ID: "gpt-5.4", + DisplayName: "GPT-5.4", + SupportsReasoning: boolPointer(true), + ReasoningEfforts: []string{"low", "high"}, + DefaultReasoningEffort: "high", + }, + {ID: "gpt-5.4-mini", DisplayName: "GPT-5.4 Mini"}, + }, + }, CredentialSlots: []contract.SettingsProviderCredentialSlotPayload{ { Name: "api_key", @@ -1274,9 +1290,24 @@ func TestSettingsCollectionHandlersDelegateValidPayloads(t *testing.T) { assert: func(t *testing.T, service *stubSettingsService) { t.Helper() if service.LastPutCollectionRequest.Provider == nil || - service.LastPutCollectionRequest.Provider.DefaultModel != "gpt-5.4" { + service.LastPutCollectionRequest.Provider.Models.Default != "gpt-5.4" { t.Fatalf("LastPutCollectionRequest.Provider = %#v", service.LastPutCollectionRequest.Provider) } + if got := service.LastPutCollectionRequest.Provider.Models.Curated; len(got) != 2 || + got[0].ID != "gpt-5.4" || + got[1].ID != "gpt-5.4-mini" { + t.Fatalf("Provider.Models.Curated = %#v", got) + } + model := service.LastPutCollectionRequest.Provider.Models.Curated[0] + if model.SupportsReasoning == nil || !*model.SupportsReasoning { + t.Fatalf( + "Provider.Models.Curated[0].SupportsReasoning = %#v, want true", + model.SupportsReasoning, + ) + } + if got, want := model.DefaultReasoningEffort, "high"; got != want { + t.Fatalf("Provider.Models.Curated[0].DefaultReasoningEffort = %q, want %q", got, want) + } }, assertResponse: assertAppliedSettingsMutation, }, @@ -1347,8 +1378,10 @@ func TestSettingsCollectionHandlersDelegateValidPayloads(t *testing.T) { { Name: "openai", Settings: settingspkg.ProviderSettings{ - Command: "codex", - DefaultModel: "gpt-5.4", + Command: "codex", + Models: aghconfig.ProviderModelsConfig{ + Default: "gpt-5.4", + }, CredentialSlots: []aghconfig.ProviderCredentialSlot{ { Name: "api_key", @@ -2179,6 +2212,10 @@ func decodeJSON(t *testing.T, body []byte, dest any) { } } +func boolPointer(value bool) *bool { + return &value +} + func appendLine(t *testing.T, path string, line string) { t.Helper() diff --git a/internal/api/core/workspaces.go b/internal/api/core/workspaces.go index bc7b67c20..909c53cfc 100644 --- a/internal/api/core/workspaces.go +++ b/internal/api/core/workspaces.go @@ -257,7 +257,10 @@ func (h *BaseHandlers) ResolveWorkspace(c *gin.Context) { } func (h *BaseHandlers) validateCreateSessionRequest(req contract.CreateSessionRequest) error { - return validateCreateSessionRequest(h.transportName(), req.Workspace, req.WorkspacePath) + if err := validateCreateSessionRequest(h.transportName(), req.Workspace, req.WorkspacePath); err != nil { + return err + } + return validateCreateSessionRuntimeOverrides(h.transportName(), req.Provider, req.Model, req.ReasoningEffort) } func (h *BaseHandlers) lookupWorkspaceID(ctx context.Context, ref string) (string, error) { diff --git a/internal/api/httpapi/transport_parity_integration_test.go b/internal/api/httpapi/transport_parity_integration_test.go index a1825ec8f..3d34ef485 100644 --- a/internal/api/httpapi/transport_parity_integration_test.go +++ b/internal/api/httpapi/transport_parity_integration_test.go @@ -657,7 +657,7 @@ func writeTransportProviderOverrideConfig( } } tree.SetPath(append(providerPath, "command"), strings.TrimSpace(command)) - tree.SetPath(append(providerPath, "default_model"), "transport-override-model") + tree.SetPath(append(providerPath, "models", "default"), "transport-override-model") credentialSlot, err := tomltree.TreeFromMap(map[string]any{ "name": "api_key", "target_env": "TRANSPORT_OVERRIDE_API_KEY", diff --git a/internal/api/udsapi/handlers_test.go b/internal/api/udsapi/handlers_test.go index b31110303..cd5bdc76f 100644 --- a/internal/api/udsapi/handlers_test.go +++ b/internal/api/udsapi/handlers_test.go @@ -8,6 +8,7 @@ import ( "net/http/httptest" "os" "path/filepath" + "reflect" "slices" "sort" "strings" @@ -1096,7 +1097,7 @@ func TestGetWorkspaceHandlerReturnsDetail(t *testing.T) { t.Fatalf("len(providers) = %d, want %d (%#v)", len(response.Providers), len(expectedProviders), response) } for i, want := range expectedProviders { - if got := response.Providers[i]; got != want { + if got := response.Providers[i]; !reflect.DeepEqual(got, want) { t.Fatalf("providers[%d] = %#v, want %#v", i, got, want) } } diff --git a/internal/api/udsapi/transport_parity_integration_test.go b/internal/api/udsapi/transport_parity_integration_test.go index 6a1e23d5e..1b174cdfc 100644 --- a/internal/api/udsapi/transport_parity_integration_test.go +++ b/internal/api/udsapi/transport_parity_integration_test.go @@ -1068,7 +1068,8 @@ func writeTransportProviderOverrideConfig( builder.WriteString(`command = "`) builder.WriteString(escapeTransportConfigString(command)) builder.WriteString("\"\n") - builder.WriteString(`default_model = "transport-override-model"` + "\n") + builder.WriteString("[providers." + providerName + ".models]\n") + builder.WriteString(`default = "transport-override-model"` + "\n") builder.WriteString("[[providers." + providerName + ".credential_slots]]\n") builder.WriteString(`name = "api_key"` + "\n") builder.WriteString(`target_env = "TRANSPORT_OVERRIDE_API_KEY"` + "\n") diff --git a/internal/cli/config.go b/internal/cli/config.go index 4aa0f8828..baec56168 100644 --- a/internal/cli/config.go +++ b/internal/cli/config.go @@ -25,6 +25,8 @@ const ( configEnvKey = "env" configSecretEnvKey = "secret_env" configProvidersKey = "providers" + configModelsKey = "models" + configDiscoveryKey = "discovery" configSessionMCPKey = "session_mcp" ) @@ -251,6 +253,10 @@ var ( "extensions.resources.operator_write_rate_limit.requests": configSetInt, "extensions.resources.operator_write_rate_limit.window": configSetDuration, "extensions.resources.operator_write_rate_limit.queue": configSetInt, + "model_catalog.sources.models_dev.enabled": configSetBool, + "model_catalog.sources.models_dev.endpoint": configSetString, + "model_catalog.sources.models_dev.ttl": configSetDuration, + "model_catalog.sources.models_dev.timeout": configSetDuration, "automation.enabled": configSetBool, "automation.timezone": configSetString, "automation.max_concurrent_jobs": configSetInt, @@ -1357,6 +1363,13 @@ func classifyConfigMutationPath(path []string) (configSetValueKind, bool, error) if len(path) == 3 && path[0] == configProvidersKey && path[2] == configSessionMCPKey { return configSetBool, false, nil } + if len(path) == 5 && + path[0] == configProvidersKey && + path[2] == configModelsKey && + path[3] == configDiscoveryKey && + path[4] == "enabled" { + return configSetBool, false, nil + } if isProviderMutationPath(path) { return configSetString, false, nil } @@ -1440,15 +1453,31 @@ func isProviderMutationPath(path []string) bool { if len(path) == 3 && path[0] == configProvidersKey { switch path[2] { case "command", - "default_model", "auth_mode", "env_policy", "home_policy", + "runtime_provider", + "transport", + "base_url", "auth_status_command", "auth_login_command": return true } } + if len(path) == 4 && path[0] == configProvidersKey && path[2] == configModelsKey { + if path[3] == "default" { + return true + } + } + if len(path) == 5 && + path[0] == configProvidersKey && + path[2] == configModelsKey && + path[3] == configDiscoveryKey { + switch path[4] { + case "command", "endpoint", "timeout": + return true + } + } return false } diff --git a/internal/cli/config_test.go b/internal/cli/config_test.go index 540dcc0ba..b42bc4be3 100644 --- a/internal/cli/config_test.go +++ b/internal/cli/config_test.go @@ -814,6 +814,18 @@ func TestConfigRenderingAndMutationHelpers(t *testing.T) { wantKind: configSetString, wantAllowed: true, }, + { + name: "Should allow provider model default", + path: "providers.codex.models.default", + wantKind: configSetString, + wantAllowed: true, + }, + { + name: "Should allow provider model discovery enabled", + path: "providers.codex.models.discovery.enabled", + wantKind: configSetBool, + wantAllowed: true, + }, { name: "Should redact sandbox env values", path: "sandboxes.dev.env.API_TOKEN", diff --git a/internal/cli/install.go b/internal/cli/install.go index a7f2dd3a9..65712ca8a 100644 --- a/internal/cli/install.go +++ b/internal/cli/install.go @@ -104,7 +104,7 @@ func newInstallCommand(deps commandDeps) *cobra.Command { record := installRecord{ AgentName: aghconfig.DefaultAgentName, Provider: cfg.Defaults.Provider, - Model: cfg.Providers[cfg.Defaults.Provider].DefaultModel, + Model: cfg.Providers[cfg.Defaults.Provider].Models.Default, Permissions: string(cfg.Permissions.Mode), ConfigFile: homePaths.ConfigFile, AgentFile: agentPath, @@ -206,12 +206,12 @@ func buildInstallWizardInput(cfg *aghconfig.Config) installWizardInput { for _, provider := range providers { resolved, err := cfg.ResolveProvider(provider) if err == nil { - suggestedModels[provider] = strings.TrimSpace(resolved.DefaultModel) + suggestedModels[provider] = strings.TrimSpace(resolved.Models.Default) modelRequired[provider] = installProviderRequiresModel(resolved) continue } configured := cfg.Providers[provider] - suggestedModels[provider] = strings.TrimSpace(configured.DefaultModel) + suggestedModels[provider] = strings.TrimSpace(configured.Models.Default) modelRequired[provider] = installProviderRequiresModel(configured) } diff --git a/internal/cli/install_test.go b/internal/cli/install_test.go index 1dc2bf528..f82e1dc05 100644 --- a/internal/cli/install_test.go +++ b/internal/cli/install_test.go @@ -175,8 +175,8 @@ func TestInstallCommandMachineOutput(t *testing.T) { if cfg.Defaults.Provider != "blackbox" { t.Fatalf("cfg.Defaults.Provider = %q, want blackbox", cfg.Defaults.Provider) } - if got := cfg.Providers["blackbox"].DefaultModel; got != "" { - t.Fatalf("cfg.Providers[blackbox].DefaultModel = %q, want empty", got) + if got := cfg.Providers["blackbox"].Models.Default; got != "" { + t.Fatalf("cfg.Providers[blackbox].Models.Default = %q, want empty", got) } }) @@ -291,7 +291,11 @@ func TestBuildInstallWizardInputAndBundleFormats(t *testing.T) { cfg := aghconfig.DefaultWithHome(aghconfig.HomePaths{}) cfg.Defaults.Provider = "codex" - cfg.Providers["custom"] = aghconfig.ProviderConfig{DefaultModel: "custom-model"} + cfg.Providers["custom"] = aghconfig.ProviderConfig{ + Models: aghconfig.ProviderModelsConfig{ + Default: "custom-model", + }, + } input := buildInstallWizardInput(&cfg) if len(input.Providers) == 0 { diff --git a/internal/cli/session.go b/internal/cli/session.go index 5806924e1..c1e468757 100644 --- a/internal/cli/session.go +++ b/internal/cli/session.go @@ -39,12 +39,14 @@ func newSessionCommand(deps commandDeps) *cobra.Command { func newSessionCreateCommand(deps commandDeps) *cobra.Command { var ( - agentName string - cwd string - name string - channel string - provider string - workspaceRef string + agentName string + cwd string + name string + channel string + provider string + model string + reasoningEffort string + workspaceRef string ) cmd := &cobra.Command{ @@ -56,6 +58,9 @@ func newSessionCreateCommand(deps commandDeps) *cobra.Command { # Start a named session for a specific registered workspace and agent agh session new --workspace checkout-api --agent reviewer --name review-api + # Override provider, model, and reasoning effort for this session only + agh session new --provider codex --model gpt-5.4 --reasoning-effort high + # Auto-register an absolute workspace path before creating the session agh session new --cwd "$PWD" --agent reviewer`, RunE: func(cmd *cobra.Command, _ []string) error { @@ -70,12 +75,14 @@ func newSessionCreateCommand(deps commandDeps) *cobra.Command { } created, err := client.CreateSession(cmd.Context(), CreateSessionRequest{ - AgentName: agentName, - Provider: strings.TrimSpace(provider), - Name: name, - Workspace: workspace, - WorkspacePath: workspacePath, - Channel: strings.TrimSpace(channel), + AgentName: agentName, + Provider: strings.TrimSpace(provider), + Model: strings.TrimSpace(model), + ReasoningEffort: strings.TrimSpace(reasoningEffort), + Name: name, + Workspace: workspace, + WorkspacePath: workspacePath, + Channel: strings.TrimSpace(channel), }) if err != nil { return err @@ -90,6 +97,13 @@ func newSessionCreateCommand(deps commandDeps) *cobra.Command { cmd.Flags().StringVar(&name, "name", "", "Optional session label") cmd.Flags().StringVar(&channel, "channel", "", "Optional network channel opt-in for the session") cmd.Flags().StringVar(&provider, "provider", "", "Optional provider override for this session") + cmd.Flags().StringVar(&model, "model", "", "Optional model override for this session") + cmd.Flags().StringVar( + &reasoningEffort, + "reasoning-effort", + "", + "Optional reasoning effort hint (minimal|low|medium|high|xhigh) for providers that support it", + ) return cmd } diff --git a/internal/config/autonomy.go b/internal/config/autonomy.go index 144ca8125..8ba8ec549 100644 --- a/internal/config/autonomy.go +++ b/internal/config/autonomy.go @@ -118,7 +118,7 @@ func (c CoordinatorConfig) Validate(path string, resolver providerResolver) erro return fmt.Errorf("%s.provider: %w", path, err) } if strings.TrimSpace(c.Model) == "" && - strings.TrimSpace(provider.DefaultModel) == "" && + strings.TrimSpace(provider.Models.Default) == "" && provider.RequiresRuntimeModel() { return fmt.Errorf("%s.model is required when provider %q has no default model", path, providerName) } @@ -151,7 +151,7 @@ func (c *Config) ResolveCoordinatorConfig(fallback AgentDef) (CoordinatorConfig, return CoordinatorConfig{}, fmt.Errorf("autonomy.coordinator.provider: %w", err) } if model == "" { - model = strings.TrimSpace(provider.DefaultModel) + model = strings.TrimSpace(provider.Models.Default) } if model == "" && provider.RequiresRuntimeModel() { return CoordinatorConfig{}, fmt.Errorf( diff --git a/internal/config/autonomy_test.go b/internal/config/autonomy_test.go index 645df938b..096ac4bab 100644 --- a/internal/config/autonomy_test.go +++ b/internal/config/autonomy_test.go @@ -192,7 +192,7 @@ max_active_per_workspace = 2 } } -func TestLoadAllowsDirectACPAutonomyProviderWithoutDefaultModel(t *testing.T) { +func TestLoadAllowsDirectACPAutonomyProviderWithoutModelDefault(t *testing.T) { t.Run("Should accept provider-managed model for direct ACP provider", func(t *testing.T) { workspaceRoot, homePaths := prepareAutonomyConfigTestEnv(t) writeFile(t, homePaths.ConfigFile, ` @@ -325,8 +325,9 @@ func TestLoadAutonomyOverlayPreservesOtherConfigSections(t *testing.T) { writeFile(t, homePaths.ConfigFile, ` [providers.claude] - default_model = "global-model" auth_mode = "bound_secret" + [providers.claude.models] + default = "global-model" [[providers.claude.credential_slots]] name = "api_key" target_env = "GLOBAL_KEY" @@ -423,7 +424,7 @@ max_children = 2 if err != nil { t.Fatalf("ResolveProvider(claude) error = %v", err) } - if claude.DefaultModel != "global-model" { + if claude.Models.Default != "global-model" { t.Fatalf("ResolveProvider(claude) = %#v, want merged provider fields", claude) } if slots := claude.EffectiveCredentialSlots(); len(slots) != 1 || diff --git a/internal/config/bootstrap.go b/internal/config/bootstrap.go index 97fdc2f3e..66fe7128f 100644 --- a/internal/config/bootstrap.go +++ b/internal/config/bootstrap.go @@ -80,7 +80,7 @@ func SaveBootstrapConfig(homePaths HomePaths, provider string, model string) (Co if selectedModel == "" { return nil } - return editor.SetValue([]string{"providers", selectedProvider, "default_model"}, selectedModel) + return editor.SetValue([]string{"providers", selectedProvider, "models", "default"}, selectedModel) }) } diff --git a/internal/config/bootstrap_test.go b/internal/config/bootstrap_test.go index ccfc848db..7200e1762 100644 --- a/internal/config/bootstrap_test.go +++ b/internal/config/bootstrap_test.go @@ -85,10 +85,10 @@ func TestSaveBootstrapConfigWritesManagedDefaults(t *testing.T) { slots, ) } - if reloaded.Providers["claude"].DefaultModel != "claude-sonnet-4-6" { + if reloaded.Providers["claude"].Models.Default != "claude-sonnet-4-6" { t.Fatalf( - "LoadGlobalConfig() Providers[claude].DefaultModel = %q, want %q", - reloaded.Providers["claude"].DefaultModel, + "LoadGlobalConfig() Providers[claude].Models.Default = %q, want %q", + reloaded.Providers["claude"].Models.Default, "claude-sonnet-4-6", ) } @@ -107,7 +107,7 @@ func TestSaveBootstrapConfigWritesManagedDefaults(t *testing.T) { `agent = "general"`, `provider = "claude"`, `mode = "approve-all"`, - `default_model = "claude-sonnet-4-6"`, + `default = "claude-sonnet-4-6"`, `port = 3030`, `secret_ref = "env:ANTHROPIC_KEY"`, } { @@ -132,8 +132,8 @@ func TestSaveBootstrapConfigAllowsProviderManagedModel(t *testing.T) { if cfg.Defaults.Provider != "blackbox" { t.Fatalf("SaveBootstrapConfig() Defaults.Provider = %q, want blackbox", cfg.Defaults.Provider) } - if got := cfg.Providers["blackbox"].DefaultModel; got != "" { - t.Fatalf("SaveBootstrapConfig() Providers[blackbox].DefaultModel = %q, want empty", got) + if got := cfg.Providers["blackbox"].Models.Default; got != "" { + t.Fatalf("SaveBootstrapConfig() Providers[blackbox].Models.Default = %q, want empty", got) } contents, err := os.ReadFile(homePaths.ConfigFile) diff --git a/internal/config/config.go b/internal/config/config.go index c26d14b97..5d5491982 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -28,6 +28,7 @@ const ( ConfigName = "config.toml" // marketplaceSchemeHTTP is the accepted plaintext marketplace URL scheme. marketplaceSchemeHTTP = "http" + urlSchemeHTTPS = "https" // skillsMarketplaceRegistryClawhub is the currently supported skills marketplace registry. skillsMarketplaceRegistryClawhub = "clawhub" ) @@ -442,6 +443,7 @@ type Config struct { Permissions PermissionsConfig `toml:"permissions"` MCPServers []MCPServer `toml:"mcp_servers,omitempty"` Providers map[string]ProviderConfig `toml:"providers"` + ModelCatalog ModelCatalogConfig `toml:"model_catalog"` Sandboxes map[string]SandboxProfile `toml:"sandboxes"` Observability ObservabilityConfig `toml:"observability"` Log LogConfig `toml:"log"` @@ -630,8 +632,9 @@ func DefaultWithHome(homePaths HomePaths) Config { Permissions: PermissionsConfig{ Mode: PermissionModeApproveAll, }, - Providers: map[string]ProviderConfig{}, - Sandboxes: map[string]SandboxProfile{}, + Providers: map[string]ProviderConfig{}, + ModelCatalog: DefaultModelCatalogConfig(), + Sandboxes: map[string]SandboxProfile{}, Observability: ObservabilityConfig{ Enabled: true, RetentionDays: 7, @@ -886,6 +889,9 @@ func (c *Config) validateFeatures(lookup envLookup) error { if err := c.Tools.Validate(c.MCPServers, c.Providers); err != nil { return err } + if err := c.ModelCatalog.Validate(); err != nil { + return err + } if err := c.Automation.validateWithEnv(lookup); err != nil { return fmt.Errorf("validate automation config: %w", err) } @@ -1555,7 +1561,7 @@ func (c MarketplaceConfig) Validate() error { if err != nil { return fmt.Errorf("skills.marketplace.base_url is invalid: %w", err) } - if parsed.Scheme != marketplaceSchemeHTTP && parsed.Scheme != "https" { + if parsed.Scheme != marketplaceSchemeHTTP && parsed.Scheme != urlSchemeHTTPS { return fmt.Errorf("skills.marketplace.base_url must use http or https: %q", c.BaseURL) } if strings.TrimSpace(parsed.Host) == "" { @@ -1588,7 +1594,7 @@ func (c ExtensionsMarketplaceConfig) Validate() error { if err != nil { return fmt.Errorf("extensions.marketplace.base_url is invalid: %w", err) } - if parsed.Scheme != "http" && parsed.Scheme != "https" { + if parsed.Scheme != "http" && parsed.Scheme != urlSchemeHTTPS { return fmt.Errorf("extensions.marketplace.base_url must use http or https: %q", c.BaseURL) } if strings.TrimSpace(parsed.Host) == "" { diff --git a/internal/config/config_test.go b/internal/config/config_test.go index 9768ad9c3..f443c1498 100644 --- a/internal/config/config_test.go +++ b/internal/config/config_test.go @@ -88,8 +88,9 @@ approval_timeout_seconds = 90 trusted_sources = ["mcp:linear", "extension:linear"] [providers.claude] - default_model = "claude-opus" auth_mode = "bound_secret" + [providers.claude.models] + default = "claude-opus" [[providers.claude.credential_slots]] name = "api_key" target_env = "ANTHROPIC_KEY" @@ -344,7 +345,7 @@ max_queue_depth = 250 if err != nil { t.Fatalf("ResolveProvider() error = %v", err) } - if claude.Command == "" || claude.DefaultModel != "claude-opus" { + if claude.Command == "" || claude.Models.Default != "claude-opus" { t.Fatalf("ResolveProvider() = %#v", claude) } if slots := claude.EffectiveCredentialSlots(); len(slots) != 1 || @@ -831,8 +832,12 @@ host = "localhost" port = 2123 [providers.claude] - default_model = "global-model" auth_mode = "bound_secret" + [providers.claude.models] + default = "global-model" + [[providers.claude.models.curated]] + id = "global-model" + display_name = "Global Model" [[providers.claude.credential_slots]] name = "api_key" target_env = "GLOBAL_KEY" @@ -859,7 +864,13 @@ base_url = "https://global.example.test/api/v1" port = 4242 [providers.claude] -default_model = "workspace-model" +[providers.claude.models] +default = "workspace-model" +[[providers.claude.models.curated]] +id = "workspace-model" +display_name = "Workspace Model" +reasoning_efforts = ["low", "high"] +default_reasoning_effort = "high" [session.limits] timeout = "45m" @@ -892,8 +903,20 @@ base_url = "https://workspace.example.test/api/v1" if err != nil { t.Fatalf("ResolveProvider() error = %v", err) } - if claude.DefaultModel != "workspace-model" { - t.Fatalf("ResolveProvider() DefaultModel = %q, want %q", claude.DefaultModel, "workspace-model") + if claude.Models.Default != "workspace-model" { + t.Fatalf("ResolveProvider() Models.Default = %q, want %q", claude.Models.Default, "workspace-model") + } + if len(claude.Models.Curated) != 1 { + t.Fatalf("ResolveProvider() Models.Curated = %#v, want one workspace model", claude.Models.Curated) + } + if got, want := claude.Models.Curated[0].ID, "workspace-model"; got != want { + t.Fatalf("ResolveProvider() Models.Curated[0].ID = %q, want %q", got, want) + } + if got, want := claude.Models.Curated[0].DisplayName, "Workspace Model"; got != want { + t.Fatalf("ResolveProvider() Models.Curated[0].DisplayName = %q, want %q", got, want) + } + if got, want := claude.Models.Curated[0].DefaultReasoningEffort, "high"; got != want { + t.Fatalf("ResolveProvider() Models.Curated[0].DefaultReasoningEffort = %q, want %q", got, want) } if slots := claude.EffectiveCredentialSlots(); len(slots) != 1 || slots[0].TargetEnv != "GLOBAL_KEY" || @@ -1355,8 +1378,9 @@ segment_bytes = 128 max_bytes_per_session = 2048 [providers.claude] -default_model = "global-model" auth_mode = "bound_secret" +[providers.claude.models] +default = "global-model" `) writeFile(t, filepath.Join(workspaceRoot, DirName, ConfigName), ` [observability.transcripts] @@ -1389,7 +1413,7 @@ segment_bytes = 256 if err != nil { t.Fatalf("ResolveProvider() error = %v", err) } - if claude.DefaultModel != "global-model" { + if claude.Models.Default != "global-model" { t.Fatalf("ResolveProvider() = %#v", claude) } if slots := claude.EffectiveCredentialSlots(); len(slots) != 1 || diff --git a/internal/config/merge.go b/internal/config/merge.go index 211bca2a6..cb34545f5 100644 --- a/internal/config/merge.go +++ b/internal/config/merge.go @@ -13,6 +13,8 @@ import ( "github.com/pedronauck/agh/internal/resources" ) +const providersConfigKey = "providers" + type configOverlay struct { Daemon daemonOverlay `toml:"daemon"` HTTP httpOverlay `toml:"http"` @@ -23,6 +25,7 @@ type configOverlay struct { Permissions permissionsOverlay `toml:"permissions"` MCPServers []mcpServerOverlay `toml:"mcp_servers"` Providers map[string]providerOverlay `toml:"providers"` + ModelCatalog modelCatalogOverlay `toml:"model_catalog"` Sandboxes map[string]sandboxOverlay `toml:"sandboxes"` Observability observabilityOverlay `toml:"observability"` Log logOverlay `toml:"log"` @@ -123,7 +126,7 @@ type permissionsOverlay struct { type providerOverlay struct { Command *string `toml:"command"` DisplayName *string `toml:"display_name"` - DefaultModel *string `toml:"default_model"` + Models *providerModelsOverlay `toml:"models"` Harness *ProviderHarness `toml:"harness"` RuntimeProvider *string `toml:"runtime_provider"` Transport *string `toml:"transport"` @@ -139,6 +142,34 @@ type providerOverlay struct { MCPServers []mcpServerOverlay `toml:"mcp_servers"` } +type providerModelsOverlay struct { + Default *string `toml:"default"` + Curated []ProviderModelConfig `toml:"curated"` + Discovery providerModelsDiscoveryOverlay `toml:"discovery"` +} + +type providerModelsDiscoveryOverlay struct { + Enabled *bool `toml:"enabled"` + Command *string `toml:"command"` + Endpoint *string `toml:"endpoint"` + Timeout *string `toml:"timeout"` +} + +type modelCatalogOverlay struct { + Sources modelCatalogSourcesOverlay `toml:"sources"` +} + +type modelCatalogSourcesOverlay struct { + ModelsDev modelsDevSourceOverlay `toml:"models_dev"` +} + +type modelsDevSourceOverlay struct { + Enabled *bool `toml:"enabled"` + Endpoint *string `toml:"endpoint"` + TTL *string `toml:"ttl"` + Timeout *string `toml:"timeout"` +} + type providerCredentialOverlay struct { Name *string `toml:"name"` TargetEnv *string `toml:"target_env"` @@ -534,12 +565,41 @@ func loadConfigOverlayBytes(contents []byte, source string) (configOverlay, erro } if undecoded := meta.Undecoded(); len(undecoded) > 0 { + if err := rejectRemovedProviderModelKeys(source, undecoded); err != nil { + return overlay, err + } return overlay, fmt.Errorf("unknown config keys in %q: %s", source, joinTOMLKeys(undecoded)) } return overlay, nil } +func rejectRemovedProviderModelKeys(source string, keys []burnttoml.Key) error { + for _, key := range sortedTOMLKeys(keys) { + if len(key) != 3 || key[0] != providersConfigKey { + continue + } + replacement := "" + switch key[2] { + case "default_model": + replacement = fmt.Sprintf("providers.%s.models.default", key[1]) + case "supported_models": + replacement = fmt.Sprintf("providers.%s.models.curated", key[1]) + case "supports_reasoning_effort": + replacement = fmt.Sprintf("providers.%s.models.curated[].reasoning_efforts", key[1]) + } + if replacement != "" { + return fmt.Errorf( + "removed config key %q in %q: use %q", + key.String(), + source, + replacement, + ) + } + } + return nil +} + func (o *configOverlay) Apply(dst *Config) error { o.Daemon.Apply(&dst.Daemon) o.HTTP.Apply(&dst.HTTP) @@ -552,6 +612,7 @@ func (o *configOverlay) Apply(dst *Config) error { dst.MCPServers = applyMCPServerOverlays(dst.MCPServers, o.MCPServers) } applyProviderOverlays(dst, o.Providers) + o.ModelCatalog.Apply(&dst.ModelCatalog) applySandboxOverlays(dst, o.Sandboxes) o.Observability.Apply(&dst.Observability) o.Log.Apply(&dst.Log) @@ -705,8 +766,8 @@ func (o providerOverlay) Apply(dst *ProviderConfig) { if o.DisplayName != nil { dst.DisplayName = *o.DisplayName } - if o.DefaultModel != nil { - dst.DefaultModel = *o.DefaultModel + if o.Models != nil { + o.Models.Apply(&dst.Models) } if o.Harness != nil { dst.Harness = *o.Harness @@ -749,6 +810,54 @@ func (o providerOverlay) Apply(dst *ProviderConfig) { } } +func (o providerModelsOverlay) Apply(dst *ProviderModelsConfig) { + if o.Default != nil { + dst.Default = *o.Default + } + if o.Curated != nil { + dst.Curated = cloneProviderModelConfigs(o.Curated) + } + o.Discovery.Apply(&dst.Discovery) +} + +func (o providerModelsDiscoveryOverlay) Apply(dst *ProviderModelsDiscoveryConfig) { + if o.Enabled != nil { + dst.Enabled = boolRef(*o.Enabled) + } + if o.Command != nil { + dst.Command = *o.Command + } + if o.Endpoint != nil { + dst.Endpoint = *o.Endpoint + } + if o.Timeout != nil { + dst.Timeout = *o.Timeout + } +} + +func (o modelCatalogOverlay) Apply(dst *ModelCatalogConfig) { + o.Sources.Apply(&dst.Sources) +} + +func (o modelCatalogSourcesOverlay) Apply(dst *ModelCatalogSourcesConfig) { + o.ModelsDev.Apply(&dst.ModelsDev) +} + +func (o modelsDevSourceOverlay) Apply(dst *ModelsDevSourceConfig) { + if o.Enabled != nil { + dst.Enabled = boolRef(*o.Enabled) + } + if o.Endpoint != nil { + dst.Endpoint = *o.Endpoint + } + if o.TTL != nil { + dst.TTL = *o.TTL + } + if o.Timeout != nil { + dst.Timeout = *o.Timeout + } +} + func applyProviderCredentialOverlays(overlays []providerCredentialOverlay) []ProviderCredentialSlot { slots := make([]ProviderCredentialSlot, 0, len(overlays)) for _, overlay := range overlays { @@ -1594,11 +1703,19 @@ func joinTOMLKeys(keys []burnttoml.Key) string { return "" } - values := make([]string, 0, len(keys)) - for _, key := range keys { + sorted := sortedTOMLKeys(keys) + values := make([]string, 0, len(sorted)) + for _, key := range sorted { values = append(values, key.String()) } - sort.Strings(values) return strings.Join(values, ", ") } + +func sortedTOMLKeys(keys []burnttoml.Key) []burnttoml.Key { + sorted := append([]burnttoml.Key(nil), keys...) + sort.Slice(sorted, func(i, j int) bool { + return sorted[i].String() < sorted[j].String() + }) + return sorted +} diff --git a/internal/config/perf_bench_test.go b/internal/config/perf_bench_test.go index bdad69c7a..7d8c23131 100644 --- a/internal/config/perf_bench_test.go +++ b/internal/config/perf_bench_test.go @@ -37,9 +37,9 @@ func BenchmarkResolveAgentMergedMCPServers(b *testing.B) { MCPServers: benchmarkMCPServers("global", 24, 0), Providers: map[string]ProviderConfig{ "claude": { - Command: "npx -y @agentclientprotocol/claude-agent-acp@latest", - DefaultModel: "claude-sonnet-4-6", - MCPServers: benchmarkMCPServers("provider", 24, 8), + Command: "npx -y @agentclientprotocol/claude-agent-acp@latest", + Models: ProviderModelsConfig{Default: "claude-sonnet-4-6"}, + MCPServers: benchmarkMCPServers("provider", 24, 8), }, }, } diff --git a/internal/config/persistence_test.go b/internal/config/persistence_test.go index a97f4f424..5a3329fba 100644 --- a/internal/config/persistence_test.go +++ b/internal/config/persistence_test.go @@ -451,7 +451,7 @@ func TestOverlayEditorSetTableMutations(t *testing.T) { editor, err := newOverlayEditor(ConfigName, []byte(` # provider block [providers.openai] - default_model = "gpt-4o" + models = { default = "gpt-4o" } command = "openai" [defaults] @@ -462,8 +462,8 @@ agent = "general" } err = editor.SetTable([]string{"providers", "openai"}, map[string]any{ - "default_model": "gpt-5", - "command": "openai-next", + "models": map[string]any{"default": "gpt-5"}, + "command": "openai-next", }) if err != nil { t.Fatalf("editor.SetTable() error = %v", err) @@ -477,7 +477,7 @@ agent = "general" for _, want := range []string{ "[providers.openai]", - `default_model = "gpt-5"`, + `default = "gpt-5"`, `command = "openai-next"`, "[defaults]", `agent = "general"`, @@ -486,7 +486,7 @@ agent = "general" t.Fatalf("rendered config missing %q\n%s", want, text) } } - if strings.Contains(text, `default_model = "gpt-4o"`) { + if strings.Contains(text, `default = "gpt-4o"`) { t.Fatalf("rendered config still contains old model\n%s", text) } }) @@ -496,17 +496,17 @@ agent = "general" editor, err := newOverlayEditor(ConfigName, []byte(` [providers.openai] -default_model = "gpt-4o" +command = "openai" -[providers.openai.options] -temperature = 0.2 +[providers.openai.models] +default = "gpt-4o" `)) if err != nil { t.Fatalf("newOverlayEditor() error = %v", err) } err = editor.SetTable([]string{"providers", "openai"}, map[string]any{ - "default_model": "gpt-5", + "models": map[string]any{"default": "gpt-5"}, }) if err == nil { t.Fatal("editor.SetTable() error = nil, want nested-subtable rejection") @@ -676,7 +676,7 @@ agent = "general" provider = "openai" [providers.openai] - default_model = "gpt-4o" + models = { default = "gpt-4o" } command = "openai" `)) if err != nil { @@ -728,7 +728,7 @@ provider = "openai" for _, unwanted := range []string{ `provider = "openai"`, "[providers.openai]", - `default_model = "gpt-4o"`, + `default = "gpt-4o"`, `command = "openai"`, } { if strings.Contains(text, unwanted) { diff --git a/internal/config/provider.go b/internal/config/provider.go index 1049a8334..a87227c38 100644 --- a/internal/config/provider.go +++ b/internal/config/provider.go @@ -4,8 +4,10 @@ import ( "errors" "fmt" "maps" + "net/url" "regexp" "strings" + "time" "github.com/pedronauck/agh/internal/vault" ) @@ -63,11 +65,59 @@ type ProviderCredentialSlot struct { Required bool `toml:"required"` } +// ProviderModelsConfig describes provider-scoped model defaults and metadata. +type ProviderModelsConfig struct { + Default string `toml:"default,omitempty"` + Curated []ProviderModelConfig `toml:"curated,omitempty"` + Discovery ProviderModelsDiscoveryConfig `toml:"discovery,omitempty"` +} + +// ProviderModelsDiscoveryConfig describes optional side-effect-free model discovery. +type ProviderModelsDiscoveryConfig struct { + Enabled *bool `toml:"enabled,omitempty"` + Command string `toml:"command,omitempty"` + Endpoint string `toml:"endpoint,omitempty"` + Timeout string `toml:"timeout,omitempty"` +} + +// ProviderModelConfig describes one curated provider model entry. +type ProviderModelConfig struct { + ID string `toml:"id"` + DisplayName string `toml:"display_name,omitempty"` + ContextWindow *int64 `toml:"context_window,omitempty"` + MaxInputTokens *int64 `toml:"max_input_tokens,omitempty"` + MaxOutputTokens *int64 `toml:"max_output_tokens,omitempty"` + SupportsTools *bool `toml:"supports_tools,omitempty"` + SupportsReasoning *bool `toml:"supports_reasoning,omitempty"` + ReasoningEfforts []string `toml:"reasoning_efforts,omitempty"` + DefaultReasoningEffort string `toml:"default_reasoning_effort,omitempty"` + CostInputPerMillion *float64 `toml:"cost_input_per_million,omitempty"` + CostOutputPerMillion *float64 `toml:"cost_output_per_million,omitempty"` +} + +// ModelCatalogConfig controls daemon-owned model catalog sources. +type ModelCatalogConfig struct { + Sources ModelCatalogSourcesConfig `toml:"sources,omitempty"` +} + +// ModelCatalogSourcesConfig groups built-in model catalog sources. +type ModelCatalogSourcesConfig struct { + ModelsDev ModelsDevSourceConfig `toml:"models_dev,omitempty"` +} + +// ModelsDevSourceConfig controls the models.dev catalog source. +type ModelsDevSourceConfig struct { + Enabled *bool `toml:"enabled,omitempty"` + Endpoint string `toml:"endpoint,omitempty"` + TTL string `toml:"ttl,omitempty"` + Timeout string `toml:"timeout,omitempty"` +} + // ProviderConfig describes how to launch a provider in ACP mode. type ProviderConfig struct { Command string `toml:"command"` DisplayName string `toml:"display_name,omitempty"` - DefaultModel string `toml:"default_model"` + Models ProviderModelsConfig `toml:"models,omitempty"` Harness ProviderHarness `toml:"harness,omitempty"` RuntimeProvider string `toml:"runtime_provider,omitempty"` Transport string `toml:"transport,omitempty"` @@ -164,6 +214,9 @@ const ( piACPCommand = "npx -y pi-acp@latest" piACPAuthLoginCommand = piACPCommand + " --terminal-login" providerAPIKeyCredential = "api_key" + defaultModelsDevEndpoint = "https://models.dev/api.json" + defaultModelsDevTTL = "24h" + defaultModelsDevTimeout = "10s" ) var builtinProviderAliases = map[string]string{ @@ -208,22 +261,56 @@ var builtinProviderAliases = map[string]string{ var builtinProviders = map[string]ProviderConfig{ "claude": { - Command: "npx -y @agentclientprotocol/claude-agent-acp@latest", - DisplayName: "Claude Code", - Harness: ProviderHarnessACP, - DefaultModel: "claude-sonnet-4-6", + Command: "npx -y @agentclientprotocol/claude-agent-acp@latest", + DisplayName: "Claude Code", + Harness: ProviderHarnessACP, + Models: ProviderModelsConfig{ + Default: "claude-sonnet-4-6", + Curated: []ProviderModelConfig{ + {ID: "claude-opus-4-7", DisplayName: "Claude Opus 4.7"}, + {ID: "claude-sonnet-4-6", DisplayName: "Claude Sonnet 4.6"}, + {ID: "claude-haiku-4-5", DisplayName: "Claude Haiku 4.5"}, + }, + }, }, "codex": { - Command: "npx -y @zed-industries/codex-acp@latest", - DisplayName: "Codex", - Harness: ProviderHarnessACP, - DefaultModel: "gpt-5.4", + Command: "npx -y @zed-industries/codex-acp@latest", + DisplayName: "Codex", + Harness: ProviderHarnessACP, + Models: ProviderModelsConfig{ + Default: "gpt-5.4", + Curated: []ProviderModelConfig{ + { + ID: "gpt-5.4", + DisplayName: "GPT-5.4", + SupportsTools: boolRef(true), + SupportsReasoning: boolRef(true), + ReasoningEfforts: []string{"minimal", "low", "medium", "high", "xhigh"}, + DefaultReasoningEffort: "medium", + }, + { + ID: "gpt-5.4-mini", + DisplayName: "GPT-5.4 Mini", + SupportsTools: boolRef(true), + SupportsReasoning: boolRef(true), + ReasoningEfforts: []string{"minimal", "low", "medium", "high", "xhigh"}, + DefaultReasoningEffort: "medium", + }, + {ID: "gpt-5.3", DisplayName: "GPT-5.3"}, + {ID: "gpt-5.3-mini", DisplayName: "GPT-5.3 Mini"}, + }, + }, }, "gemini": { - Command: "gemini --acp", - DisplayName: "Gemini CLI", - Harness: ProviderHarnessACP, - DefaultModel: "gemini-3.1-pro-preview", + Command: "gemini --acp", + DisplayName: "Gemini CLI", + Harness: ProviderHarnessACP, + Models: ProviderModelsConfig{ + Default: "gemini-3.1-pro-preview", + Curated: []ProviderModelConfig{ + {ID: "gemini-3.1-pro-preview", DisplayName: "Gemini 3.1 Pro Preview"}, + }, + }, }, "opencode": { Command: "npx -y opencode-ai@latest acp", @@ -277,10 +364,15 @@ var builtinProviders = map[string]ProviderConfig{ Harness: ProviderHarnessACP, }, "qwen-code": { - Command: "npx -y @qwen-code/qwen-code@latest --acp --experimental-skills", - DisplayName: "Qwen Code", - Harness: ProviderHarnessACP, - DefaultModel: "qwen3.6-plus", + Command: "npx -y @qwen-code/qwen-code@latest --acp --experimental-skills", + DisplayName: "Qwen Code", + Harness: ProviderHarnessACP, + Models: ProviderModelsConfig{ + Default: "qwen3.6-plus", + Curated: []ProviderModelConfig{ + {ID: "qwen3.6-plus", DisplayName: "Qwen3.6 Plus"}, + }, + }, }, "copilot": { Command: "copilot --acp --stdio", @@ -302,72 +394,117 @@ var builtinProviders = map[string]ProviderConfig{ DisplayName: "Pi", Harness: ProviderHarnessPiACP, RuntimeProvider: "anthropic", - DefaultModel: "claude-opus-4-7", AuthLoginCmd: piACPAuthLoginCommand, + Models: ProviderModelsConfig{ + Default: "claude-opus-4-7", + Curated: []ProviderModelConfig{ + {ID: "claude-opus-4-7", DisplayName: "Claude Opus 4.7"}, + }, + }, }, "openrouter": { Command: piACPCommand, DisplayName: "OpenRouter", Harness: ProviderHarnessPiACP, RuntimeProvider: "openrouter", - DefaultModel: "openai/gpt-5.4", CredentialSlots: []ProviderCredentialSlot{apiKeyCredentialSlot("OPENROUTER_API_KEY")}, + Models: ProviderModelsConfig{ + Default: "openai/gpt-5.4", + Curated: []ProviderModelConfig{ + {ID: "openai/gpt-5.4", DisplayName: "OpenAI GPT-5.4"}, + }, + }, }, "zai": { Command: piACPCommand, DisplayName: "z.ai", Harness: ProviderHarnessPiACP, RuntimeProvider: "zai", - DefaultModel: "glm-4.6", CredentialSlots: []ProviderCredentialSlot{apiKeyCredentialSlot("ZAI_API_KEY")}, + Models: ProviderModelsConfig{ + Default: "glm-4.6", + Curated: []ProviderModelConfig{ + {ID: "glm-4.6", DisplayName: "GLM-4.6"}, + }, + }, }, "moonshot": { Command: piACPCommand, DisplayName: "Moonshot / Kimi", Harness: ProviderHarnessPiACP, RuntimeProvider: "kimi-coding", - DefaultModel: "kimi-k2-thinking", CredentialSlots: []ProviderCredentialSlot{apiKeyCredentialSlot("KIMI_API_KEY")}, + Models: ProviderModelsConfig{ + Default: "kimi-k2-thinking", + Curated: []ProviderModelConfig{ + {ID: "kimi-k2-thinking", DisplayName: "Kimi K2 Thinking"}, + }, + }, }, "vercel-ai-gateway": { Command: piACPCommand, DisplayName: "Vercel AI Gateway", Harness: ProviderHarnessPiACP, RuntimeProvider: "vercel-ai-gateway", - DefaultModel: "anthropic/claude-opus-4-7", CredentialSlots: []ProviderCredentialSlot{apiKeyCredentialSlot("AI_GATEWAY_API_KEY")}, + Models: ProviderModelsConfig{ + Default: "anthropic/claude-opus-4-7", + Curated: []ProviderModelConfig{ + {ID: "anthropic/claude-opus-4-7", DisplayName: "Anthropic Claude Opus 4.7"}, + }, + }, }, "xai": { Command: piACPCommand, DisplayName: "xAI", Harness: ProviderHarnessPiACP, RuntimeProvider: "xai", - DefaultModel: "grok-4-fast-non-reasoning", CredentialSlots: []ProviderCredentialSlot{apiKeyCredentialSlot("XAI_API_KEY")}, + Models: ProviderModelsConfig{ + Default: "grok-4-fast-non-reasoning", + Curated: []ProviderModelConfig{ + {ID: "grok-4-fast-non-reasoning", DisplayName: "Grok 4 Fast Non-Reasoning"}, + }, + }, }, "minimax": { Command: piACPCommand, DisplayName: "MiniMax", Harness: ProviderHarnessPiACP, RuntimeProvider: "minimax", - DefaultModel: "MiniMax-M2.1", CredentialSlots: []ProviderCredentialSlot{apiKeyCredentialSlot("MINIMAX_API_KEY")}, + Models: ProviderModelsConfig{ + Default: "MiniMax-M2.1", + Curated: []ProviderModelConfig{ + {ID: "MiniMax-M2.1", DisplayName: "MiniMax M2.1"}, + }, + }, }, "mistral": { Command: piACPCommand, DisplayName: "Mistral", Harness: ProviderHarnessPiACP, RuntimeProvider: "mistral", - DefaultModel: "devstral-medium-latest", CredentialSlots: []ProviderCredentialSlot{apiKeyCredentialSlot("MISTRAL_API_KEY")}, + Models: ProviderModelsConfig{ + Default: "devstral-medium-latest", + Curated: []ProviderModelConfig{ + {ID: "devstral-medium-latest", DisplayName: "Devstral Medium Latest"}, + }, + }, }, "groq": { Command: piACPCommand, DisplayName: "Groq", Harness: ProviderHarnessPiACP, RuntimeProvider: "groq", - DefaultModel: "openai/gpt-oss-120b", CredentialSlots: []ProviderCredentialSlot{apiKeyCredentialSlot("GROQ_API_KEY")}, + Models: ProviderModelsConfig{ + Default: "openai/gpt-oss-120b", + Curated: []ProviderModelConfig{ + {ID: "openai/gpt-oss-120b", DisplayName: "OpenAI GPT-OSS 120B"}, + }, + }, }, } @@ -477,7 +614,7 @@ func (c *Config) ResolveAgent(agent AgentDef) (ResolvedAgent, error) { model := strings.TrimSpace(agent.Model) if model == "" { - model = strings.TrimSpace(provider.DefaultModel) + model = strings.TrimSpace(provider.Models.Default) } if model == "" && provider.RequiresRuntimeModel() { return ResolvedAgent{}, fmt.Errorf( @@ -599,8 +736,8 @@ func mergeProvider(base ProviderConfig, override ProviderConfig) ProviderConfig if strings.TrimSpace(override.DisplayName) != "" { merged.DisplayName = override.DisplayName } - if strings.TrimSpace(override.DefaultModel) != "" { - merged.DefaultModel = override.DefaultModel + if !providerModelsConfigIsZero(override.Models) { + merged.Models = mergeProviderModels(merged.Models, override.Models) } if override.Harness != "" { merged.Harness = override.Harness @@ -643,6 +780,53 @@ func mergeProvider(base ProviderConfig, override ProviderConfig) ProviderConfig return merged } +func mergeProviderModels(base ProviderModelsConfig, override ProviderModelsConfig) ProviderModelsConfig { + merged := cloneProviderModelsConfig(base) + if strings.TrimSpace(override.Default) != "" { + merged.Default = override.Default + } + if override.Curated != nil { + merged.Curated = cloneProviderModelConfigs(override.Curated) + } + if !providerModelsDiscoveryConfigIsZero(override.Discovery) { + merged.Discovery = mergeProviderModelsDiscovery(merged.Discovery, override.Discovery) + } + return merged +} + +func mergeProviderModelsDiscovery( + base ProviderModelsDiscoveryConfig, + override ProviderModelsDiscoveryConfig, +) ProviderModelsDiscoveryConfig { + merged := cloneProviderModelsDiscoveryConfig(base) + if override.Enabled != nil { + merged.Enabled = boolRef(*override.Enabled) + } + if strings.TrimSpace(override.Command) != "" { + merged.Command = override.Command + } + if strings.TrimSpace(override.Endpoint) != "" { + merged.Endpoint = override.Endpoint + } + if strings.TrimSpace(override.Timeout) != "" { + merged.Timeout = override.Timeout + } + return merged +} + +func providerModelsConfigIsZero(value ProviderModelsConfig) bool { + return strings.TrimSpace(value.Default) == "" && + value.Curated == nil && + providerModelsDiscoveryConfigIsZero(value.Discovery) +} + +func providerModelsDiscoveryConfigIsZero(value ProviderModelsDiscoveryConfig) bool { + return value.Enabled == nil && + strings.TrimSpace(value.Command) == "" && + strings.TrimSpace(value.Endpoint) == "" && + strings.TrimSpace(value.Timeout) == "" +} + func newUnknownProviderError(providerName string) error { return fmt.Errorf("%w: unknown provider %q", ErrProviderUnavailable, providerName) } @@ -840,6 +1024,159 @@ func (p ProviderConfig) SessionMCPEnabled() bool { return *p.SessionMCP } +// Validate reports whether the provider model block is usable. +func (m ProviderModelsConfig) Validate(path string) error { + if strings.TrimSpace(m.Default) == "" && m.Default != "" { + return fmt.Errorf("%s.default is required", path) + } + seen := make(map[string]struct{}, len(m.Curated)) + for idx, model := range m.Curated { + modelPath := fmt.Sprintf("%s.curated[%d]", path, idx) + id := strings.TrimSpace(model.ID) + if id == "" { + return fmt.Errorf("%s.id is required", modelPath) + } + if _, ok := seen[id]; ok { + return fmt.Errorf("%s.id duplicates %q", modelPath, id) + } + seen[id] = struct{}{} + efforts := make(map[string]struct{}, len(model.ReasoningEfforts)) + for effortIdx, effort := range model.ReasoningEfforts { + trimmed := strings.TrimSpace(effort) + if trimmed == "" { + return fmt.Errorf("%s.reasoning_efforts[%d] is required", modelPath, effortIdx) + } + if _, ok := efforts[trimmed]; ok { + return fmt.Errorf("%s.reasoning_efforts[%d] duplicates %q", modelPath, effortIdx, trimmed) + } + efforts[trimmed] = struct{}{} + } + defaultEffort := strings.TrimSpace(model.DefaultReasoningEffort) + if defaultEffort != "" && len(efforts) > 0 { + if _, ok := efforts[defaultEffort]; !ok { + return fmt.Errorf("%s.default_reasoning_effort must be listed in reasoning_efforts", modelPath) + } + } + } + return m.Discovery.Validate(path + ".discovery") +} + +// Validate reports whether the discovery source config is usable. +func (d ProviderModelsDiscoveryConfig) Validate(path string) error { + command := strings.TrimSpace(d.Command) + endpoint := strings.TrimSpace(d.Endpoint) + if command != "" && unsafeDiscoveryCommand(command) { + return fmt.Errorf("%s.command must be a single-line command", path) + } + if endpoint != "" { + if err := validateAbsoluteHTTPURL(path+".endpoint", endpoint); err != nil { + return err + } + } + if command != "" && endpoint != "" { + return fmt.Errorf("%s.command and %s.endpoint are mutually exclusive", path, path) + } + if strings.TrimSpace(d.Timeout) != "" { + if err := validatePositiveDuration(path+".timeout", d.Timeout); err != nil { + return err + } + } + if d.Enabled != nil && *d.Enabled && command == "" && endpoint == "" { + return fmt.Errorf("%s requires command or endpoint when enabled", path) + } + return nil +} + +// DefaultModelCatalogConfig returns the default model catalog source config. +func DefaultModelCatalogConfig() ModelCatalogConfig { + return ModelCatalogConfig{ + Sources: ModelCatalogSourcesConfig{ + ModelsDev: ModelsDevSourceConfig{ + Enabled: boolRef(true), + Endpoint: defaultModelsDevEndpoint, + TTL: defaultModelsDevTTL, + Timeout: defaultModelsDevTimeout, + }, + }, + } +} + +// Validate reports whether model catalog config is usable. +func (c ModelCatalogConfig) Validate() error { + return c.Sources.ModelsDev.Validate("model_catalog.sources.models_dev") +} + +// EffectiveEnabled reports whether the models.dev source should run. +func (c ModelsDevSourceConfig) EffectiveEnabled() bool { + if c.Enabled == nil { + return true + } + return *c.Enabled +} + +// EffectiveEndpoint returns the configured endpoint or the default models.dev endpoint. +func (c ModelsDevSourceConfig) EffectiveEndpoint() string { + if endpoint := strings.TrimSpace(c.Endpoint); endpoint != "" { + return endpoint + } + return defaultModelsDevEndpoint +} + +// EffectiveTTL returns the configured TTL or the default models.dev TTL. +func (c ModelsDevSourceConfig) EffectiveTTL() string { + if ttl := strings.TrimSpace(c.TTL); ttl != "" { + return ttl + } + return defaultModelsDevTTL +} + +// EffectiveTimeout returns the configured timeout or the default models.dev timeout. +func (c ModelsDevSourceConfig) EffectiveTimeout() string { + if timeout := strings.TrimSpace(c.Timeout); timeout != "" { + return timeout + } + return defaultModelsDevTimeout +} + +// Validate reports whether the models.dev source config is usable. +func (c ModelsDevSourceConfig) Validate(path string) error { + if err := validateAbsoluteHTTPURL(path+".endpoint", c.EffectiveEndpoint()); err != nil { + return err + } + if err := validatePositiveDuration(path+".ttl", c.EffectiveTTL()); err != nil { + return err + } + return validatePositiveDuration(path+".timeout", c.EffectiveTimeout()) +} + +func validatePositiveDuration(path string, raw string) error { + duration, err := time.ParseDuration(strings.TrimSpace(raw)) + if err != nil { + return fmt.Errorf("%s must be a positive duration", path) + } + if duration <= 0 { + return fmt.Errorf("%s must be a positive duration", path) + } + return nil +} + +func validateAbsoluteHTTPURL(path string, raw string) error { + parsed, err := url.Parse(strings.TrimSpace(raw)) + if err != nil || parsed.Scheme == "" || parsed.Host == "" { + return fmt.Errorf("%s must be an absolute HTTP(S) URL", path) + } + switch parsed.Scheme { + case string(MCPServerTransportHTTP), urlSchemeHTTPS: + return nil + default: + return fmt.Errorf("%s must be an absolute HTTP(S) URL", path) + } +} + +func unsafeDiscoveryCommand(command string) bool { + return strings.ContainsAny(command, "\x00\r\n") +} + // Validate reports whether the harness is supported. func (h ProviderHarness) Validate(path string) error { switch h { @@ -903,7 +1240,7 @@ func validProviderSecretRef(ref string) bool { if vault.IsEnvRef(normalized) { return vault.ValidateRef(normalized) == nil } - if err := vault.ValidateSecretRefNamespace(normalized, "providers"); err != nil { + if err := vault.ValidateSecretRefNamespace(normalized, providersConfigKey); err != nil { return false } path := strings.TrimPrefix(normalized, "vault:providers/") @@ -997,6 +1334,9 @@ func validateResolvedProvider(name string, provider ProviderConfig) error { if strings.TrimSpace(provider.Command) == "" { return fmt.Errorf("provider %q command is required", name) } + if err := provider.Models.Validate(fmt.Sprintf("providers.%s.models", name)); err != nil { + return err + } if err := provider.EffectiveHarness().Validate(fmt.Sprintf("providers.%s.harness", name)); err != nil { return err } @@ -1147,7 +1487,7 @@ func cloneProvider(src ProviderConfig) ProviderConfig { return ProviderConfig{ Command: src.Command, DisplayName: src.DisplayName, - DefaultModel: src.DefaultModel, + Models: cloneProviderModelsConfig(src.Models), Harness: src.Harness, RuntimeProvider: src.RuntimeProvider, Transport: src.Transport, @@ -1175,6 +1515,64 @@ func cloneBoolRef(src *bool) *bool { return boolRef(*src) } +func cloneInt64Ref(src *int64) *int64 { + if src == nil { + return nil + } + value := *src + return &value +} + +func cloneFloat64Ref(src *float64) *float64 { + if src == nil { + return nil + } + value := *src + return &value +} + +func cloneProviderModelsConfig(src ProviderModelsConfig) ProviderModelsConfig { + return ProviderModelsConfig{ + Default: src.Default, + Curated: cloneProviderModelConfigs(src.Curated), + Discovery: cloneProviderModelsDiscoveryConfig(src.Discovery), + } +} + +func cloneProviderModelsDiscoveryConfig( + src ProviderModelsDiscoveryConfig, +) ProviderModelsDiscoveryConfig { + return ProviderModelsDiscoveryConfig{ + Enabled: cloneBoolRef(src.Enabled), + Command: src.Command, + Endpoint: src.Endpoint, + Timeout: src.Timeout, + } +} + +func cloneProviderModelConfigs(src []ProviderModelConfig) []ProviderModelConfig { + if src == nil { + return nil + } + cloned := make([]ProviderModelConfig, len(src)) + for idx, model := range src { + cloned[idx] = ProviderModelConfig{ + ID: model.ID, + DisplayName: model.DisplayName, + ContextWindow: cloneInt64Ref(model.ContextWindow), + MaxInputTokens: cloneInt64Ref(model.MaxInputTokens), + MaxOutputTokens: cloneInt64Ref(model.MaxOutputTokens), + SupportsTools: cloneBoolRef(model.SupportsTools), + SupportsReasoning: cloneBoolRef(model.SupportsReasoning), + ReasoningEfforts: cloneStrings(model.ReasoningEfforts), + DefaultReasoningEffort: model.DefaultReasoningEffort, + CostInputPerMillion: cloneFloat64Ref(model.CostInputPerMillion), + CostOutputPerMillion: cloneFloat64Ref(model.CostOutputPerMillion), + } + } + return cloned +} + func cloneProviderCredentialSlots(src []ProviderCredentialSlot) []ProviderCredentialSlot { if len(src) == 0 { return nil diff --git a/internal/config/provider_test.go b/internal/config/provider_test.go index f77919136..09d774339 100644 --- a/internal/config/provider_test.go +++ b/internal/config/provider_test.go @@ -218,11 +218,19 @@ func TestBuiltinProvidersContainExpectedCommands(t *testing.T) { firstNonEmpty(tc.runtimeProvider, tc.name), ) } - if got.DefaultModel != tc.defaultModel { + if got.Models.Default != tc.defaultModel { t.Fatalf( - "BuiltinProviders()[%q].DefaultModel = %q, want %q", + "BuiltinProviders()[%q].Models.Default = %q, want %q", tc.name, - got.DefaultModel, + got.Models.Default, + tc.defaultModel, + ) + } + if tc.defaultModel != "" && !providerCuratedModelsContain(got.Models.Curated, tc.defaultModel) { + t.Fatalf( + "BuiltinProviders()[%q].Models.Curated = %#v, want default model %q", + tc.name, + got.Models.Curated, tc.defaultModel, ) } @@ -266,6 +274,15 @@ func TestBuiltinProvidersContainExpectedCommands(t *testing.T) { } } +func providerCuratedModelsContain(models []ProviderModelConfig, id string) bool { + for _, model := range models { + if model.ID == id { + return true + } + } + return false +} + func TestRepoRootConfigProviderDefaultsMatchBuiltinRegistry(t *testing.T) { t.Parallel() @@ -277,15 +294,15 @@ func TestRepoRootConfigProviderDefaultsMatchBuiltinRegistry(t *testing.T) { builtins := BuiltinProviders() for name, provider := range overlay.Providers { - if provider.DefaultModel == "" { + if provider.Models.Default == "" { continue } builtin, ok := builtins[name] if !ok { t.Fatalf("repo config provider %q is not in the builtin registry", name) } - if got, want := provider.DefaultModel, builtin.DefaultModel; got != want { - t.Fatalf("repo config provider %q default_model = %q, want builtin %q", name, got, want) + if got, want := provider.Models.Default, builtin.Models.Default; got != want { + t.Fatalf("repo config provider %q models.default = %q, want builtin %q", name, got, want) } } } @@ -387,7 +404,7 @@ func TestProviderConfigOverrideMergesWithBuiltins(t *testing.T) { cfg := Config{ Providers: map[string]ProviderConfig{ "claude": { - DefaultModel: "claude-opus-override", + Models: ProviderModelsConfig{Default: "claude-opus-override"}, }, }, } @@ -399,8 +416,8 @@ func TestProviderConfigOverrideMergesWithBuiltins(t *testing.T) { if provider.Command == "" { t.Fatal("ResolveProvider() Command = empty, want builtin command") } - if provider.DefaultModel != "claude-opus-override" { - t.Fatalf("ResolveProvider() DefaultModel = %q, want %q", provider.DefaultModel, "claude-opus-override") + if provider.Models.Default != "claude-opus-override" { + t.Fatalf("ResolveProvider() Models.Default = %q, want %q", provider.Models.Default, "claude-opus-override") } if provider.EffectiveAuthMode() != ProviderAuthModeNativeCLI { t.Fatalf("ResolveProvider() AuthMode = %q, want native_cli", provider.EffectiveAuthMode()) @@ -1046,6 +1063,378 @@ func TestResolveProviderRejectsUnknownProvider(t *testing.T) { } } +func TestResolveProviderMergesRuntimeOverrideHints(t *testing.T) { + t.Parallel() + + homePaths, err := ResolveHomePathsFrom(filepath.Join(t.TempDir(), "home")) + if err != nil { + t.Fatalf("ResolveHomePathsFrom() error = %v", err) + } + cfg := DefaultWithHome(homePaths) + cfg.Providers["codex"] = ProviderConfig{ + Models: ProviderModelsConfig{ + Default: "gpt-manual", + Curated: []ProviderModelConfig{ + {ID: "gpt-custom", DisplayName: "Custom GPT"}, + {ID: "gpt-mini", DisplayName: "Mini GPT"}, + }, + }, + } + + provider, err := cfg.ResolveProvider("codex") + if err != nil { + t.Fatalf("ResolveProvider(codex) error = %v", err) + } + if got, want := provider.Models.Default, "gpt-manual"; got != want { + t.Fatalf("ResolveProvider(codex) Models.Default = %q, want %q", got, want) + } + wantModels := []ProviderModelConfig{ + {ID: "gpt-custom", DisplayName: "Custom GPT"}, + {ID: "gpt-mini", DisplayName: "Mini GPT"}, + } + if !reflect.DeepEqual(provider.Models.Curated, wantModels) { + t.Fatalf("ResolveProvider(codex) Models.Curated = %#v, want %#v", provider.Models.Curated, wantModels) + } +} + +func TestResolveProviderPreservesExplicitEmptyCuratedModels(t *testing.T) { + t.Parallel() + + homePaths, err := ResolveHomePathsFrom(filepath.Join(t.TempDir(), "home")) + if err != nil { + t.Fatalf("ResolveHomePathsFrom() error = %v", err) + } + cfg := DefaultWithHome(homePaths) + cfg.Providers["codex"] = ProviderConfig{ + Models: ProviderModelsConfig{ + Curated: []ProviderModelConfig{}, + }, + } + + provider, err := cfg.ResolveProvider("codex") + if err != nil { + t.Fatalf("ResolveProvider(codex) error = %v", err) + } + if got := len(provider.Models.Curated); got != 0 { + t.Fatalf("ResolveProvider(codex) Models.Curated len = %d, want 0", got) + } +} + +func TestLoadProviderRuntimeOverrideHintsFromTOML(t *testing.T) { + t.Parallel() + + homePaths, err := ResolveHomePathsFrom(filepath.Join(t.TempDir(), "home")) + if err != nil { + t.Fatalf("ResolveHomePathsFrom() error = %v", err) + } + if err := EnsureHomeLayout(homePaths); err != nil { + t.Fatalf("EnsureHomeLayout() error = %v", err) + } + writeFile(t, homePaths.ConfigFile, ` +[providers.codex.models] +default = "gpt-manual" + +[[providers.codex.models.curated]] +id = "gpt-custom" +display_name = "Custom GPT" + +[[providers.codex.models.curated]] +id = "gpt-mini" +display_name = "Mini GPT" +`) + + cfg, err := LoadForHome(homePaths, withoutDotEnv()) + if err != nil { + t.Fatalf("LoadForHome() error = %v", err) + } + provider, err := cfg.ResolveProvider("codex") + if err != nil { + t.Fatalf("ResolveProvider(codex) error = %v", err) + } + if got, want := provider.Models.Default, "gpt-manual"; got != want { + t.Fatalf("ResolveProvider(codex) Models.Default = %q, want %q", got, want) + } + wantModels := []ProviderModelConfig{ + {ID: "gpt-custom", DisplayName: "Custom GPT"}, + {ID: "gpt-mini", DisplayName: "Mini GPT"}, + } + if !reflect.DeepEqual(provider.Models.Curated, wantModels) { + t.Fatalf("ResolveProvider(codex) Models.Curated = %#v, want %#v", provider.Models.Curated, wantModels) + } +} + +func TestLoadRejectsBlankProviderCuratedModelID(t *testing.T) { + t.Parallel() + + homePaths, err := ResolveHomePathsFrom(filepath.Join(t.TempDir(), "home")) + if err != nil { + t.Fatalf("ResolveHomePathsFrom() error = %v", err) + } + if err := EnsureHomeLayout(homePaths); err != nil { + t.Fatalf("EnsureHomeLayout() error = %v", err) + } + writeFile(t, homePaths.ConfigFile, ` +[[providers.codex.models.curated]] +id = "gpt-custom" + +[[providers.codex.models.curated]] +id = " " +`) + + _, err = LoadForHome(homePaths, withoutDotEnv()) + if err == nil { + t.Fatal("LoadForHome() error = nil, want blank curated id validation") + } + if !strings.Contains(err.Error(), `providers.codex.models.curated[1].id is required`) { + t.Fatalf("LoadForHome() error = %v, want curated id index detail", err) + } +} + +func TestLoadRejectsInvalidProviderModelsConfig(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + config string + wantErr string + }{ + { + name: "Should reject duplicate curated model IDs", + config: ` +[[providers.codex.models.curated]] +id = "gpt-5.4" + +[[providers.codex.models.curated]] +id = "gpt-5.4" +`, + wantErr: `providers.codex.models.curated[1].id duplicates "gpt-5.4"`, + }, + { + name: "Should reject default reasoning effort outside allowed efforts", + config: ` +[[providers.codex.models.curated]] +id = "gpt-5.4" +reasoning_efforts = ["low", "medium"] +default_reasoning_effort = "high" +`, + wantErr: `providers.codex.models.curated[0].default_reasoning_effort must be listed in reasoning_efforts`, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + homePaths, err := ResolveHomePathsFrom(filepath.Join(t.TempDir(), "home")) + if err != nil { + t.Fatalf("ResolveHomePathsFrom() error = %v", err) + } + if err := EnsureHomeLayout(homePaths); err != nil { + t.Fatalf("EnsureHomeLayout() error = %v", err) + } + writeFile(t, homePaths.ConfigFile, tc.config) + + _, err = LoadForHome(homePaths, withoutDotEnv()) + if err == nil { + t.Fatal("LoadForHome() error = nil, want validation error") + } + if !strings.Contains(err.Error(), tc.wantErr) { + t.Fatalf("LoadForHome() error = %v, want %q", err, tc.wantErr) + } + }) + } +} + +func TestLoadRejectsRemovedProviderModelKeys(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + config string + removedPath string + replacement string + }{ + { + name: "Should reject old default_model key", + config: ` +[providers.codex] +default_model = "gpt-5.4" +`, + removedPath: `providers.codex.default_model`, + replacement: `providers.codex.models.default`, + }, + { + name: "Should reject old supported_models key", + config: ` +[providers.codex] +supported_models = ["gpt-5.4"] +`, + removedPath: `providers.codex.supported_models`, + replacement: `providers.codex.models.curated`, + }, + { + name: "Should reject old supports_reasoning_effort key", + config: ` +[providers.codex] +supports_reasoning_effort = true +`, + removedPath: `providers.codex.supports_reasoning_effort`, + replacement: `providers.codex.models.curated[].reasoning_efforts`, + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + homePaths, err := ResolveHomePathsFrom(filepath.Join(t.TempDir(), "home")) + if err != nil { + t.Fatalf("ResolveHomePathsFrom() error = %v", err) + } + if err := EnsureHomeLayout(homePaths); err != nil { + t.Fatalf("EnsureHomeLayout() error = %v", err) + } + writeFile(t, homePaths.ConfigFile, tc.config) + + _, err = LoadForHome(homePaths, withoutDotEnv()) + if err == nil { + t.Fatal("LoadForHome() error = nil, want removed key error") + } + message := err.Error() + if !strings.Contains(message, `removed config key "`+tc.removedPath+`"`) || + !strings.Contains(message, `use "`+tc.replacement+`"`) { + t.Fatalf( + "LoadForHome() error = %v, want removed path %q and replacement %q", + err, + tc.removedPath, + tc.replacement, + ) + } + }) + } +} + +func TestModelCatalogModelsDevConfigValidatesDefaultsAndOverrides(t *testing.T) { + t.Parallel() + + defaults := DefaultModelCatalogConfig().Sources.ModelsDev + if !defaults.EffectiveEnabled() { + t.Fatal("DefaultModelCatalogConfig().ModelsDev enabled = false, want true") + } + if got, want := defaults.EffectiveEndpoint(), defaultModelsDevEndpoint; got != want { + t.Fatalf("ModelsDev EffectiveEndpoint() = %q, want %q", got, want) + } + if got, want := defaults.EffectiveTTL(), defaultModelsDevTTL; got != want { + t.Fatalf("ModelsDev EffectiveTTL() = %q, want %q", got, want) + } + if got, want := defaults.EffectiveTimeout(), defaultModelsDevTimeout; got != want { + t.Fatalf("ModelsDev EffectiveTimeout() = %q, want %q", got, want) + } + + enabled := false + override := ModelsDevSourceConfig{ + Enabled: &enabled, + Endpoint: "https://models.example.test/api.json", + TTL: "2h", + Timeout: "5s", + } + if err := override.Validate("model_catalog.sources.models_dev"); err != nil { + t.Fatalf("ModelsDev Validate(valid override) error = %v", err) + } + if override.EffectiveEnabled() { + t.Fatal("ModelsDev EffectiveEnabled() = true, want explicit false") + } + if got, want := override.EffectiveEndpoint(), "https://models.example.test/api.json"; got != want { + t.Fatalf("ModelsDev EffectiveEndpoint() = %q, want %q", got, want) + } + + tests := []struct { + name string + value ModelsDevSourceConfig + wantErr string + }{ + { + name: "Should reject invalid endpoint", + value: ModelsDevSourceConfig{Endpoint: "file:///tmp/models.json"}, + wantErr: "model_catalog.sources.models_dev.endpoint must be an absolute HTTP(S) URL", + }, + { + name: "Should reject invalid TTL", + value: ModelsDevSourceConfig{TTL: "soon"}, + wantErr: "model_catalog.sources.models_dev.ttl must be a positive duration", + }, + { + name: "Should reject invalid timeout", + value: ModelsDevSourceConfig{Timeout: "0s"}, + wantErr: "model_catalog.sources.models_dev.timeout must be a positive duration", + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + err := tc.value.Validate("model_catalog.sources.models_dev") + if err == nil { + t.Fatal("ModelsDev Validate() error = nil, want validation error") + } + if !strings.Contains(err.Error(), tc.wantErr) { + t.Fatalf("ModelsDev Validate() error = %v, want %q", err, tc.wantErr) + } + }) + } +} + +func TestProviderModelsDiscoveryConfigRejectsUnsafeConfiguration(t *testing.T) { + t.Parallel() + + enabled := true + tests := []struct { + name string + value ProviderModelsDiscoveryConfig + wantErr string + }{ + { + name: "Should reject multiline command", + value: ProviderModelsDiscoveryConfig{Command: "models\nlist"}, + wantErr: "providers.codex.models.discovery.command must be a single-line command", + }, + { + name: "Should reject ambiguous command and endpoint", + value: ProviderModelsDiscoveryConfig{ + Command: "models list", + Endpoint: "https://models.example.test", + }, + wantErr: "providers.codex.models.discovery.command and providers.codex.models.discovery.endpoint are mutually exclusive", + }, + { + name: "Should reject invalid endpoint", + value: ProviderModelsDiscoveryConfig{Endpoint: "ftp://models.example.test"}, + wantErr: "providers.codex.models.discovery.endpoint must be an absolute HTTP(S) URL", + }, + { + name: "Should reject enabled discovery without source", + value: ProviderModelsDiscoveryConfig{Enabled: &enabled}, + wantErr: "providers.codex.models.discovery requires command or endpoint when enabled", + }, + { + name: "Should reject invalid timeout", + value: ProviderModelsDiscoveryConfig{Command: "models list", Timeout: "-1s"}, + wantErr: "providers.codex.models.discovery.timeout must be a positive duration", + }, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + err := tc.value.Validate("providers.codex.models.discovery") + if err == nil { + t.Fatal("Discovery Validate() error = nil, want validation error") + } + if !strings.Contains(err.Error(), tc.wantErr) { + t.Fatalf("Discovery Validate() error = %v, want %q", err, tc.wantErr) + } + }) + } +} + func TestResolveAgentDefaultsToolsAndPermissions(t *testing.T) { homePaths, err := ResolveHomePathsFrom(filepath.Join(t.TempDir(), "home")) if err != nil { @@ -1149,8 +1538,8 @@ func TestResolveSessionAgent(t *testing.T) { cfg := DefaultWithHome(homePaths) cfg.Providers["claude"] = ProviderConfig{ - Command: "provider-claude-command", - DefaultModel: "provider-claude-model", + Command: "provider-claude-command", + Models: ProviderModelsConfig{Default: "provider-claude-model"}, } agent := AgentDef{ @@ -1184,8 +1573,8 @@ func TestResolveSessionAgent(t *testing.T) { cfg := DefaultWithHome(homePaths) cfg.Defaults.Provider = "claude" cfg.Providers["claude"] = ProviderConfig{ - Command: "provider-claude-command", - DefaultModel: "provider-claude-model", + Command: "provider-claude-command", + Models: ProviderModelsConfig{Default: "provider-claude-model"}, } agent := AgentDef{ @@ -1220,15 +1609,15 @@ func TestResolveSessionAgent(t *testing.T) { {Name: "global", Command: "global-command"}, } cfg.Providers["claude"] = ProviderConfig{ - Command: "workspace-claude-command", - DefaultModel: "workspace-claude-model", + Command: "workspace-claude-command", + Models: ProviderModelsConfig{Default: "workspace-claude-model"}, MCPServers: []MCPServer{ {Name: "provider-claude", Command: "provider-claude-command"}, }, } cfg.Providers["codex"] = ProviderConfig{ - Command: "workspace-codex-command", - DefaultModel: "workspace-codex-model", + Command: "workspace-codex-command", + Models: ProviderModelsConfig{Default: "workspace-codex-model"}, MCPServers: []MCPServer{ {Name: "provider-codex", Command: "provider-codex-command"}, {Name: "shared-provider", Command: "shared-provider-codex", Args: []string{"--codex"}}, @@ -1320,8 +1709,8 @@ func TestResolveSessionAgent(t *testing.T) { cfg := DefaultWithHome(homePaths) cfg.Providers["codex"] = ProviderConfig{ - Command: "workspace-codex-command", - DefaultModel: "workspace-codex-model", + Command: "workspace-codex-command", + Models: ProviderModelsConfig{Default: "workspace-codex-model"}, } agent := AgentDef{ diff --git a/internal/config/tool_surface.go b/internal/config/tool_surface.go index 94b8f5156..26e8001fb 100644 --- a/internal/config/tool_surface.go +++ b/internal/config/tool_surface.go @@ -321,10 +321,9 @@ func ClassifyToolConfigPath(path []string) (PathPolicy, error) { policy.Kind = kind return policy, nil } - if len(clean) == 3 && clean[0] == "providers" { + if len(clean) == 3 && clean[0] == providersConfigKey { switch clean[2] { case "command", - "default_model", "auth_mode", "env_policy", "home_policy", @@ -337,6 +336,12 @@ func ClassifyToolConfigPath(path []string) (PathPolicy, error) { return policy, nil } } + if len(clean) == 4 && clean[0] == providersConfigKey && clean[2] == "models" { + if clean[3] == "default" { + policy.Kind = ConfigValueString + return policy, nil + } + } policy.Denial = ConfigPathForbidden return policy, nil } @@ -646,7 +651,7 @@ func configPathIsTrustRoot(path []string) bool { return true case "hooks": return true - case "providers": + case providersConfigKey: return providerConfigPathIsTrustRoot(path) case "memory": return memoryConfigPathIsTrustRoot(path) diff --git a/internal/extension/contract/host_api.go b/internal/extension/contract/host_api.go index ec0a8d869..3f8f886f5 100644 --- a/internal/extension/contract/host_api.go +++ b/internal/extension/contract/host_api.go @@ -128,10 +128,12 @@ type SessionsListParams struct { // SessionsCreateParams starts a new session. type SessionsCreateParams struct { - Agent string `json:"agent"` - Prompt string `json:"prompt,omitempty"` - Provider string `json:"provider,omitempty"` - Workspace string `json:"workspace,omitempty"` + Agent string `json:"agent"` + Prompt string `json:"prompt,omitempty"` + Provider string `json:"provider,omitempty"` + Model string `json:"model,omitempty"` + ReasoningEffort string `json:"reasoning_effort,omitempty"` + Workspace string `json:"workspace,omitempty"` } // SessionsPromptParams submits one prompt to an existing session. diff --git a/internal/extension/host_api.go b/internal/extension/host_api.go index 6a7c28eb1..260539016 100644 --- a/internal/extension/host_api.go +++ b/internal/extension/host_api.go @@ -883,10 +883,12 @@ func (h *HostAPIHandler) handleSessionsCreate(ctx context.Context, raw json.RawM } sess, err := h.sessions.Create(ctx, session.CreateOpts{ - AgentName: strings.TrimSpace(params.Agent), - Provider: strings.TrimSpace(params.Provider), - Workspace: strings.TrimSpace(params.Workspace), - Type: session.SessionTypeSystem, + AgentName: strings.TrimSpace(params.Agent), + Provider: strings.TrimSpace(params.Provider), + Model: strings.TrimSpace(params.Model), + ReasoningEffort: strings.TrimSpace(params.ReasoningEffort), + Workspace: strings.TrimSpace(params.Workspace), + Type: session.SessionTypeSystem, }) if err != nil { return nil, err diff --git a/internal/session/manager.go b/internal/session/manager.go index 407f274e3..5101f0568 100644 --- a/internal/session/manager.go +++ b/internal/session/manager.go @@ -35,6 +35,8 @@ var ( ErrPendingPermissionNotFound = errors.New("session: pending permission not found") // ErrPendingPermissionConflict reports that the approval request matched multiple pending permissions. ErrPendingPermissionConflict = errors.New("session: pending permission lookup is ambiguous") + // ErrInvalidRuntimeOverride reports that a session runtime override is invalid. + ErrInvalidRuntimeOverride = errors.New("session: invalid runtime override") ) // CreateOpts defines the inputs required to create a new session. @@ -42,6 +44,7 @@ type CreateOpts struct { AgentName string Provider string Model string + ReasoningEffort string SandboxRef string DisableSandbox bool Name string diff --git a/internal/session/manager_start.go b/internal/session/manager_start.go index 2d14b4568..3064408a2 100644 --- a/internal/session/manager_start.go +++ b/internal/session/manager_start.go @@ -28,6 +28,7 @@ type sessionStartSpec struct { agentName string provider string model string + reasoningEffort string sandboxDisabled bool workspace workspacepkg.ResolvedWorkspace channel string @@ -103,6 +104,7 @@ func (m *Manager) prepareCreateStart(ctx context.Context, opts CreateOpts) (sess agentName: strings.TrimSpace(agentName), provider: strings.TrimSpace(opts.Provider), model: strings.TrimSpace(opts.Model), + reasoningEffort: strings.TrimSpace(opts.ReasoningEffort), sandboxDisabled: sandboxDisabled, workspace: resolvedWorkspace, channel: strings.TrimSpace(opts.Channel), @@ -135,6 +137,8 @@ func (m *Manager) prepareResumeStart(ctx context.Context, meta store.SessionMeta sessionName: meta.Name, agentName: meta.AgentName, provider: strings.TrimSpace(meta.Provider), + model: strings.TrimSpace(meta.Model), + reasoningEffort: strings.TrimSpace(meta.ReasoningEffort), workspace: resolvedWorkspace, channel: strings.TrimSpace(meta.Channel), sessionType: normalizeSessionType(Type(meta.SessionType)), @@ -364,6 +368,9 @@ func (m *Manager) prepareSessionStartRuntime( if err != nil { return sessionStartRuntime{}, fmt.Errorf("session: resolve session agent %q: %w", spec.agentName, err) } + if err := spec.validateRuntimeOverrides(resolved); err != nil { + return sessionStartRuntime{}, err + } startMCPServers, err := m.sessionMCPServers(ctx, spec, resolved) if err != nil { @@ -377,6 +384,25 @@ func (m *Manager) prepareSessionStartRuntime( }, nil } +func (s *sessionStartSpec) validateRuntimeOverrides(_ aghconfig.ResolvedAgent) error { + providerOverride := strings.TrimSpace(s.provider) + modelOverride := strings.TrimSpace(s.model) + reasoningEffort := strings.TrimSpace(s.reasoningEffort) + if modelOverride != "" && providerOverride == "" { + return fmt.Errorf("%w: provider is required when model is set", ErrInvalidRuntimeOverride) + } + if reasoningEffort == "" { + return nil + } + if providerOverride == "" { + return fmt.Errorf("%w: provider is required when reasoning_effort is set", ErrInvalidRuntimeOverride) + } + if err := ValidateReasoningEffort(reasoningEffort); err != nil { + return err + } + return nil +} + func (m *Manager) sessionMCPServers( ctx context.Context, spec *sessionStartSpec, @@ -440,6 +466,7 @@ func (s *sessionStartSpec) newStartingSession( AgentName: resolved.Name, Provider: strings.TrimSpace(resolved.Provider), Model: strings.TrimSpace(resolved.Model), + ReasoningEffort: strings.TrimSpace(s.reasoningEffort), WorkspaceID: s.workspace.ID, Workspace: s.workspace.RootDir, Channel: s.channel, @@ -602,6 +629,12 @@ func sessionStartEnvForProvider( env = setSessionStartEnvValue(env, "AGH_AGENT_NAME", strings.TrimSpace(session.AgentName)) env = unsetSessionStartEnvKeys(env, "AGH_SESSION_CHANNEL", "AGH_PEER_ID") + if effort := strings.TrimSpace(session.ReasoningEffort); effort != "" { + env = setSessionStartEnvValue(env, "AGH_REASONING_EFFORT", effort) + } else { + env = unsetSessionStartEnvKeys(env, "AGH_REASONING_EFFORT") + } + channel := strings.TrimSpace(session.Channel) if channel == "" { return env diff --git a/internal/session/manager_test.go b/internal/session/manager_test.go index 2c556754b..7ef5952c2 100644 --- a/internal/session/manager_test.go +++ b/internal/session/manager_test.go @@ -102,6 +102,7 @@ func TestCreateAppliesRuntimeModelOverride(t *testing.T) { h := newHarness(t) session, err := h.manager.Create(testutil.Context(t), CreateOpts{ AgentName: "coder", + Provider: "codex", Model: "task-profile-model", Name: "profiled-worker", Workspace: h.workspaceID, @@ -122,6 +123,76 @@ func TestCreateAppliesRuntimeModelOverride(t *testing.T) { t.Fatalf("meta.Model = %q, want task-profile-model", meta.Model) } }) + + t.Run("Should reject model override without provider override", func(t *testing.T) { + t.Parallel() + + h := newHarness(t) + _, err := h.manager.Create(testutil.Context(t), CreateOpts{ + AgentName: "coder", + Model: "task-profile-model", + Workspace: h.workspaceID, + }) + if !errors.Is(err, ErrInvalidRuntimeOverride) { + t.Fatalf("Create() error = %v, want ErrInvalidRuntimeOverride", err) + } + }) + + t.Run("Should persist supported reasoning effort override", func(t *testing.T) { + t.Parallel() + + h := newHarness(t) + session, err := h.manager.Create(testutil.Context(t), CreateOpts{ + AgentName: "coder", + Provider: "codex", + ReasoningEffort: "high", + Name: "reasoned-worker", + Workspace: h.workspaceID, + }) + if err != nil { + t.Fatalf("Create() error = %v", err) + } + t.Cleanup(func() { + if err := h.manager.Stop(testutil.Context(t), session.ID); err != nil { + t.Fatalf("Stop() error = %v", err) + } + }) + + if got := session.Info().ReasoningEffort; got != "high" { + t.Fatalf("session.Info().ReasoningEffort = %q, want high", got) + } + if meta := readMeta(t, session.MetaPath()); meta.ReasoningEffort != "high" { + t.Fatalf("meta.ReasoningEffort = %q, want high", meta.ReasoningEffort) + } + }) + + t.Run("Should persist reasoning effort without provider-level support flag", func(t *testing.T) { + t.Parallel() + + h := newHarness(t) + session, err := h.manager.Create(testutil.Context(t), CreateOpts{ + AgentName: "coder", + Provider: "claude", + ReasoningEffort: "high", + Name: "reasoned-claude-worker", + Workspace: h.workspaceID, + }) + if err != nil { + t.Fatalf("Create() error = %v", err) + } + t.Cleanup(func() { + if err := h.manager.Stop(testutil.Context(t), session.ID); err != nil { + t.Fatalf("Stop() error = %v", err) + } + }) + + if got := session.Info().ReasoningEffort; got != "high" { + t.Fatalf("session.Info().ReasoningEffort = %q, want high", got) + } + if meta := readMeta(t, session.MetaPath()); meta.ReasoningEffort != "high" { + t.Fatalf("meta.ReasoningEffort = %q, want high", meta.ReasoningEffort) + } + }) } func TestCreateNotifiesSessionCreationBeforeImmediateExit(t *testing.T) { diff --git a/internal/session/query.go b/internal/session/query.go index 61c324258..c84b17b65 100644 --- a/internal/session/query.go +++ b/internal/session/query.go @@ -309,6 +309,7 @@ func sessionInfoFromMeta(meta store.SessionMeta) *Info { AgentName: meta.AgentName, Provider: meta.Provider, Model: strings.TrimSpace(meta.Model), + ReasoningEffort: strings.TrimSpace(meta.ReasoningEffort), WorkspaceID: meta.WorkspaceID, Channel: meta.Channel, Type: normalizeSessionType(Type(meta.SessionType)), diff --git a/internal/session/query_test.go b/internal/session/query_test.go index 69d381a31..26c022bdc 100644 --- a/internal/session/query_test.go +++ b/internal/session/query_test.go @@ -762,18 +762,19 @@ func TestReadMetaAndQueryHelpers(t *testing.T) { createdAt := time.Date(2026, 4, 3, 12, 0, 0, 0, time.UTC) updatedAt := createdAt.Add(time.Minute) info := sessionInfoFromMeta(store.SessionMeta{ - ID: "sess-1", - Name: "stored", - AgentName: "coder", - Provider: "codex", - Model: " gpt-4o ", - WorkspaceID: "ws-1", - State: string(StateStopped), - StopReason: &stopReason, - StopDetail: "deadline exceeded", - ACPSessionID: &acpID, - CreatedAt: createdAt, - UpdatedAt: updatedAt, + ID: "sess-1", + Name: "stored", + AgentName: "coder", + Provider: "codex", + Model: " gpt-4o ", + ReasoningEffort: " high ", + WorkspaceID: "ws-1", + State: string(StateStopped), + StopReason: &stopReason, + StopDetail: "deadline exceeded", + ACPSessionID: &acpID, + CreatedAt: createdAt, + UpdatedAt: updatedAt, }) if got := info.ACPSessionID; got != "acp-123" { t.Fatalf("sessionInfoFromMeta().ACPSessionID = %q, want %q", got, "acp-123") @@ -784,6 +785,9 @@ func TestReadMetaAndQueryHelpers(t *testing.T) { if got := info.Model; got != "gpt-4o" { t.Fatalf("sessionInfoFromMeta().Model = %q, want %q", got, "gpt-4o") } + if got := info.ReasoningEffort; got != "high" { + t.Fatalf("sessionInfoFromMeta().ReasoningEffort = %q, want %q", got, "high") + } if got := info.State; got != StateStopped { t.Fatalf("sessionInfoFromMeta().State = %q, want %q", got, StateStopped) } diff --git a/internal/session/runtime_overrides.go b/internal/session/runtime_overrides.go new file mode 100644 index 000000000..a2972e5cf --- /dev/null +++ b/internal/session/runtime_overrides.go @@ -0,0 +1,27 @@ +package session + +import ( + "fmt" + "slices" + "strings" +) + +// SupportedReasoningEfforts is the canonical ordered enum accepted by session creation. +var SupportedReasoningEfforts = []string{"minimal", "low", "medium", "high", "xhigh"} + +// IsSupportedReasoningEffort reports whether value is an accepted reasoning effort. +func IsSupportedReasoningEffort(value string) bool { + return slices.Contains(SupportedReasoningEfforts, strings.TrimSpace(value)) +} + +// ValidateReasoningEffort validates one reasoning effort override. +func ValidateReasoningEffort(value string) error { + trimmed := strings.TrimSpace(value) + if trimmed == "" || IsSupportedReasoningEffort(trimmed) { + return nil + } + return fmt.Errorf( + "%w: reasoning_effort must be one of minimal, low, medium, high, xhigh", + ErrInvalidRuntimeOverride, + ) +} diff --git a/internal/session/session.go b/internal/session/session.go index fa66a2213..9ef7e1f81 100644 --- a/internal/session/session.go +++ b/internal/session/session.go @@ -52,6 +52,7 @@ type Info struct { AgentName string Provider string Model string + ReasoningEffort string WorkspaceID string Workspace string Channel string @@ -81,6 +82,7 @@ type Session struct { AgentName string Provider string Model string + ReasoningEffort string WorkspaceID string Workspace string Channel string @@ -131,6 +133,7 @@ func (s *Session) Info() *Info { AgentName: s.AgentName, Provider: s.Provider, Model: s.Model, + ReasoningEffort: s.ReasoningEffort, WorkspaceID: s.WorkspaceID, Workspace: s.Workspace, Channel: s.Channel, @@ -833,6 +836,7 @@ func (s *Session) Meta() store.SessionMeta { AgentName: s.AgentName, Provider: s.Provider, Model: s.Model, + ReasoningEffort: s.ReasoningEffort, WorkspaceID: s.WorkspaceID, Channel: s.Channel, SessionType: string(normalizeSessionType(s.Type)), diff --git a/internal/settings/collections.go b/internal/settings/collections.go index 2a8c83a37..d6b4bcb6a 100644 --- a/internal/settings/collections.go +++ b/internal/settings/collections.go @@ -241,7 +241,7 @@ func (s *service) buildProviderItems(ctx context.Context, cfg *aghconfig.Config) } } - items = append(items, cloneProviderItem(item)) + items = append(items, cloneProviderItem(&item)) } return items, nil } @@ -250,7 +250,7 @@ func providerSettingsFromConfig(name string, provider aghconfig.ProviderConfig) return ProviderSettings{ Command: provider.Command, DisplayName: provider.DisplayName, - DefaultModel: provider.DefaultModel, + Models: cloneProviderModelsConfig(provider.Models), Harness: provider.EffectiveHarness(), RuntimeProvider: provider.RuntimeProviderName(name), Transport: strings.TrimSpace(provider.Transport), @@ -498,7 +498,11 @@ func (s *service) putProvider( } if _, err := aghconfig.EditConfigOverlay(s.homePaths, "", target, func(editor *aghconfig.OverlayEditor) error { - return editor.SetTable([]string{"providers", name}, values) + path := []string{"providers", name} + if err := editor.Delete(path); err != nil { + return err + } + return editor.SetTable(path, values) }); err != nil { return MutationResult{}, fmt.Errorf("settings: write provider %q: %w", name, err) } @@ -1098,8 +1102,8 @@ func providerSettingsMap(settings ProviderSettings) map[string]any { if strings.TrimSpace(settings.DisplayName) != "" { values["display_name"] = strings.TrimSpace(settings.DisplayName) } - if strings.TrimSpace(settings.DefaultModel) != "" { - values["default_model"] = strings.TrimSpace(settings.DefaultModel) + if models := providerModelsSettingsMap(settings.Models); len(models) > 0 { + values["models"] = models } if settings.Harness != "" { values["harness"] = string(settings.Harness) @@ -1134,6 +1138,83 @@ func providerSettingsMap(settings ProviderSettings) map[string]any { return values } +func providerModelsSettingsMap(models aghconfig.ProviderModelsConfig) map[string]any { + values := make(map[string]any) + if strings.TrimSpace(models.Default) != "" { + values["default"] = strings.TrimSpace(models.Default) + } + if len(models.Curated) > 0 { + values["curated"] = providerModelConfigMaps(models.Curated) + } + if discovery := providerModelsDiscoveryMap(models.Discovery); len(discovery) > 0 { + values["discovery"] = discovery + } + return values +} + +func providerModelConfigMaps(models []aghconfig.ProviderModelConfig) []map[string]any { + values := make([]map[string]any, 0, len(models)) + for _, model := range models { + entry := make(map[string]any) + if strings.TrimSpace(model.ID) != "" { + entry["id"] = strings.TrimSpace(model.ID) + } + if strings.TrimSpace(model.DisplayName) != "" { + entry["display_name"] = strings.TrimSpace(model.DisplayName) + } + if model.ContextWindow != nil { + entry["context_window"] = *model.ContextWindow + } + if model.MaxInputTokens != nil { + entry["max_input_tokens"] = *model.MaxInputTokens + } + if model.MaxOutputTokens != nil { + entry["max_output_tokens"] = *model.MaxOutputTokens + } + if model.SupportsTools != nil { + entry["supports_tools"] = *model.SupportsTools + } + if model.SupportsReasoning != nil { + entry["supports_reasoning"] = *model.SupportsReasoning + } + if len(model.ReasoningEfforts) > 0 { + entry["reasoning_efforts"] = cloneStringSlicePreserveNil(model.ReasoningEfforts) + } + if strings.TrimSpace(model.DefaultReasoningEffort) != "" { + entry["default_reasoning_effort"] = strings.TrimSpace(model.DefaultReasoningEffort) + } + if model.CostInputPerMillion != nil { + entry["cost_input_per_million"] = *model.CostInputPerMillion + } + if model.CostOutputPerMillion != nil { + entry["cost_output_per_million"] = *model.CostOutputPerMillion + } + values = append(values, entry) + } + return values +} + +func providerModelsDiscoveryMap(discovery aghconfig.ProviderModelsDiscoveryConfig) map[string]any { + values := make(map[string]any) + if discovery.Enabled != nil { + values["enabled"] = *discovery.Enabled + } + if strings.TrimSpace(discovery.Command) != "" { + values["command"] = strings.TrimSpace(discovery.Command) + } + if strings.TrimSpace(discovery.Endpoint) != "" { + values["endpoint"] = strings.TrimSpace(discovery.Endpoint) + } + if strings.TrimSpace(discovery.Timeout) != "" { + values["timeout"] = strings.TrimSpace(discovery.Timeout) + } + return values +} + +func boolPtr(value bool) *bool { + return &value +} + func providerCredentialSlotMaps(slots []aghconfig.ProviderCredentialSlot) []map[string]any { values := make([]map[string]any, 0, len(slots)) for _, slot := range slots { diff --git a/internal/settings/models.go b/internal/settings/models.go index c270482b7..49690a5cf 100644 --- a/internal/settings/models.go +++ b/internal/settings/models.go @@ -464,7 +464,8 @@ type SourceMetadata struct { type ProviderSettings struct { Command string DisplayName string - DefaultModel string + Models aghconfig.ProviderModelsConfig + ModelsSet bool Harness aghconfig.ProviderHarness RuntimeProvider string Transport string @@ -633,20 +634,100 @@ func cloneSourceMetadata(value SourceMetadata) SourceMetadata { } func cloneProviderSettings(value ProviderSettings) ProviderSettings { + value.Models = cloneProviderModelsConfig(value.Models) value.CredentialSlots = append([]aghconfig.ProviderCredentialSlot(nil), value.CredentialSlots...) return value } -func cloneProviderItem(value ProviderItem) ProviderItem { - value.Settings = cloneProviderSettings(value.Settings) - value.Credentials = append([]ProviderCredentialStatus(nil), value.Credentials...) - value.SourceMetadata = cloneSourceMetadata(value.SourceMetadata) +func cloneProviderModelsConfig(value aghconfig.ProviderModelsConfig) aghconfig.ProviderModelsConfig { + return aghconfig.ProviderModelsConfig{ + Default: value.Default, + Curated: cloneProviderModelConfigs(value.Curated), + Discovery: cloneProviderModelsDiscoveryConfig(value.Discovery), + } +} + +func cloneProviderModelsDiscoveryConfig( + value aghconfig.ProviderModelsDiscoveryConfig, +) aghconfig.ProviderModelsDiscoveryConfig { + return aghconfig.ProviderModelsDiscoveryConfig{ + Enabled: cloneBoolPtr(value.Enabled), + Command: value.Command, + Endpoint: value.Endpoint, + Timeout: value.Timeout, + } +} + +func cloneProviderModelConfigs(values []aghconfig.ProviderModelConfig) []aghconfig.ProviderModelConfig { + if values == nil { + return nil + } + cloned := make([]aghconfig.ProviderModelConfig, len(values)) + for idx, value := range values { + cloned[idx] = aghconfig.ProviderModelConfig{ + ID: value.ID, + DisplayName: value.DisplayName, + ContextWindow: cloneInt64Ptr(value.ContextWindow), + MaxInputTokens: cloneInt64Ptr(value.MaxInputTokens), + MaxOutputTokens: cloneInt64Ptr(value.MaxOutputTokens), + SupportsTools: cloneBoolPtr(value.SupportsTools), + SupportsReasoning: cloneBoolPtr(value.SupportsReasoning), + ReasoningEfforts: cloneStringSlicePreserveNil(value.ReasoningEfforts), + DefaultReasoningEffort: value.DefaultReasoningEffort, + CostInputPerMillion: cloneFloat64Ptr(value.CostInputPerMillion), + CostOutputPerMillion: cloneFloat64Ptr(value.CostOutputPerMillion), + } + } + return cloned +} + +func cloneInt64Ptr(value *int64) *int64 { + if value == nil { + return nil + } + cloned := *value + return &cloned +} + +func cloneFloat64Ptr(value *float64) *float64 { + if value == nil { + return nil + } + cloned := *value + return &cloned +} + +func cloneStringSlicePreserveNil(value []string) []string { + if value == nil { + return nil + } + cloned := make([]string, len(value)) + copy(cloned, value) + return cloned +} + +func cloneBoolPtr(value *bool) *bool { + if value == nil { + return nil + } + cloned := *value + return &cloned +} + +func cloneProviderItem(value *ProviderItem) ProviderItem { + if value == nil { + return ProviderItem{} + } + cloned := *value + cloned.Settings = cloneProviderSettings(value.Settings) + cloned.Credentials = append([]ProviderCredentialStatus(nil), value.Credentials...) + cloned.SourceMetadata = cloneSourceMetadata(value.SourceMetadata) if value.Fallback != nil { fallback := *value.Fallback fallback.Settings = cloneProviderSettings(fallback.Settings) - value.Fallback = &fallback + cloned.Fallback = &fallback } - return value + return cloned } func cloneMCPServerItem(value MCPServerItem) MCPServerItem { diff --git a/internal/settings/service_test.go b/internal/settings/service_test.go index 4c4816f3d..a06643ed9 100644 --- a/internal/settings/service_test.go +++ b/internal/settings/service_test.go @@ -666,11 +666,19 @@ func TestListCollectionBuildsProvidersSandboxesAndHooks(t *testing.T) { writeFile(t, homePaths.ConfigFile, baseSettingsConfig()+` [providers.codex] -default_model = "gpt-5" +[providers.codex.models] +default = "gpt-5" +[[providers.codex.models.curated]] +id = "gpt-5" +display_name = "GPT-5" +[[providers.codex.models.curated]] +id = "gpt-5-mini" +display_name = "GPT-5 Mini" [providers.custom] command = "custom-acp --stdio" - default_model = "custom-model" + [providers.custom.models] + default = "custom-model" [[providers.custom.credential_slots]] name = "api_key" target_env = "CUSTOM_API_KEY" @@ -715,9 +723,18 @@ command = "/bin/ship" t.Fatalf("ListCollection(providers) error = %v", err) } codex := mustFindProviderItem(t, providers.Providers, "codex") - if got, want := codex.Settings.DefaultModel, "gpt-5"; got != want { + if got, want := codex.Settings.Models.Default, "gpt-5"; got != want { t.Fatalf("codex default model = %q, want %q", got, want) } + if got, want := len(codex.Settings.Models.Curated), 2; got != want { + t.Fatalf("codex curated model count = %d, want %d", got, want) + } + if got, want := codex.Settings.Models.Curated[0].ID, "gpt-5"; got != want { + t.Fatalf("codex curated[0].ID = %q, want %q", got, want) + } + if got, want := codex.Settings.Models.Curated[1].ID, "gpt-5-mini"; got != want { + t.Fatalf("codex curated[1].ID = %q, want %q", got, want) + } if !codex.Default { t.Fatal("codex default = false, want true") } @@ -782,8 +799,21 @@ func TestCollectionMutationsProviderSandboxAndHook(t *testing.T) { CollectionRequest: CollectionRequest{Collection: CollectionProviders}, Name: "custom", Provider: &ProviderSettings{ - Command: "custom-acp --stdio", - DefaultModel: "custom-model", + Command: "custom-acp --stdio", + Models: aghconfig.ProviderModelsConfig{ + Default: "custom-model", + Curated: []aghconfig.ProviderModelConfig{ + { + ID: "custom-model", + DisplayName: "Custom Model", + SupportsReasoning: boolPtr(true), + ReasoningEfforts: []string{"low", "high"}, + DefaultReasoningEffort: "high", + SupportsTools: boolPtr(true), + }, + {ID: "custom-fast", DisplayName: "Custom Fast"}, + }, + }, CredentialSlots: []aghconfig.ProviderCredentialSlot{ { Name: "api_key", @@ -806,9 +836,32 @@ func TestCollectionMutationsProviderSandboxAndHook(t *testing.T) { } configPayload := readFile(t, homePaths.ConfigFile) if !strings.Contains(configPayload, "[providers.custom]") || - !strings.Contains(configPayload, `default_model = "custom-model"`) { + !strings.Contains(configPayload, "[providers.custom.models]") || + !strings.Contains(configPayload, `default = "custom-model"`) || + !strings.Contains(configPayload, `[[providers.custom.models.curated]]`) || + !strings.Contains(configPayload, `id = "custom-model"`) || + !strings.Contains(configPayload, `reasoning_efforts = ["low", "high"]`) { t.Fatalf("config payload missing provider overlay:\n%s", configPayload) } + clearModelsResult, err := service.PutCollectionItem(ctx, CollectionItemPutRequest{ + CollectionRequest: CollectionRequest{Collection: CollectionProviders}, + Name: "custom", + Provider: &ProviderSettings{ + Command: "custom-acp --stdio", + ModelsSet: true, + }, + }) + if err != nil { + t.Fatalf("PutCollectionItem(clear provider models) error = %v", err) + } + if got, want := clearModelsResult.WriteTarget, WriteTargetGlobalConfig; got != want { + t.Fatalf("clear provider models write target = %q, want %q", got, want) + } + configPayload = readFile(t, homePaths.ConfigFile) + if strings.Contains(configPayload, `default = "custom-model"`) || + strings.Contains(configPayload, `[[providers.custom.models.curated]]`) { + t.Fatalf("config payload still contains provider model overlay after clear:\n%s", configPayload) + } if _, err := service.DeleteCollectionItem(ctx, CollectionItemDeleteRequest{ CollectionRequest: CollectionRequest{Collection: CollectionProviders}, Name: "custom", @@ -2014,9 +2067,10 @@ command = "/bin/echo" func mustFindProviderItem(t *testing.T, items []ProviderItem, name string) ProviderItem { t.Helper() - for _, item := range items { + for idx := range items { + item := &items[idx] if item.Name == name { - return item + return *item } } t.Fatalf("Provider item %q not found in %#v", name, items) diff --git a/internal/situation/service.go b/internal/situation/service.go index 4f7be71ec..3efb37d60 100644 --- a/internal/situation/service.go +++ b/internal/situation/service.go @@ -387,7 +387,7 @@ func (s *Service) resolveAgent( model := strings.TrimSpace(agent.Model) if provider != "" && model == "" { if providerConfig, err := workspaceSnapshot.Config.ResolveProvider(provider); err == nil { - model = strings.TrimSpace(providerConfig.DefaultModel) + model = strings.TrimSpace(providerConfig.Models.Default) } } return aghconfig.ResolvedAgent{ diff --git a/internal/store/types.go b/internal/store/types.go index 7cec62a49..fbd5b7be4 100644 --- a/internal/store/types.go +++ b/internal/store/types.go @@ -1372,6 +1372,7 @@ type SessionMeta struct { AgentName string `json:"agent_name"` Provider string `json:"provider,omitempty"` Model string `json:"model,omitempty"` + ReasoningEffort string `json:"reasoning_effort,omitempty"` WorkspaceID string `json:"workspace_id,omitempty"` Channel string `json:"channel,omitempty"` SessionType string `json:"session_type,omitempty"` diff --git a/internal/testutil/e2e/config_seed_test.go b/internal/testutil/e2e/config_seed_test.go index dd4f1aeea..9cc4d23de 100644 --- a/internal/testutil/e2e/config_seed_test.go +++ b/internal/testutil/e2e/config_seed_test.go @@ -19,8 +19,10 @@ func TestSeedConfigPreservesLiveProviderAndAgentValidation(t *testing.T) { DefaultAgent: "coder", Providers: map[string]aghconfig.ProviderConfig{ "fake": { - Command: "fake-agent --stdio", - DefaultModel: "fake-model", + Command: "fake-agent --stdio", + Models: aghconfig.ProviderModelsConfig{ + Default: "fake-model", + }, CredentialSlots: []aghconfig.ProviderCredentialSlot{ { Name: "api_key", diff --git a/internal/workspace/clone.go b/internal/workspace/clone.go index fad7120cb..68ecd6df2 100644 --- a/internal/workspace/clone.go +++ b/internal/workspace/clone.go @@ -65,6 +65,7 @@ func cloneConfig(src *aghconfig.Config) aghconfig.Config { Permissions: src.Permissions, MCPServers: cloneMCPServers(src.MCPServers), Providers: cloneProviders(src.Providers), + ModelCatalog: cloneModelCatalogConfig(src.ModelCatalog), Sandboxes: cloneSandboxProfiles(src.Sandboxes), Observability: src.Observability, Log: src.Log, @@ -157,17 +158,102 @@ func cloneProvider(src aghconfig.ProviderConfig) aghconfig.ProviderConfig { return aghconfig.ProviderConfig{ Command: src.Command, DisplayName: src.DisplayName, - DefaultModel: src.DefaultModel, + Models: cloneProviderModelsConfig(src.Models), Harness: src.Harness, RuntimeProvider: src.RuntimeProvider, Transport: src.Transport, BaseURL: src.BaseURL, + AuthMode: src.AuthMode, + EnvPolicy: src.EnvPolicy, + HomePolicy: src.HomePolicy, + AuthStatusCmd: src.AuthStatusCmd, + AuthLoginCmd: src.AuthLoginCmd, + SessionMCP: cloneBoolPtr(src.SessionMCP), Aliases: append([]string(nil), src.Aliases...), CredentialSlots: append([]aghconfig.ProviderCredentialSlot(nil), src.CredentialSlots...), MCPServers: cloneMCPServers(src.MCPServers), } } +func cloneBoolPtr(src *bool) *bool { + if src == nil { + return nil + } + value := *src + return &value +} + +func cloneInt64Ptr(src *int64) *int64 { + if src == nil { + return nil + } + value := *src + return &value +} + +func cloneFloat64Ptr(src *float64) *float64 { + if src == nil { + return nil + } + value := *src + return &value +} + +func cloneProviderModelsConfig(src aghconfig.ProviderModelsConfig) aghconfig.ProviderModelsConfig { + return aghconfig.ProviderModelsConfig{ + Default: src.Default, + Curated: cloneProviderModelConfigs(src.Curated), + Discovery: cloneProviderModelsDiscoveryConfig(src.Discovery), + } +} + +func cloneProviderModelsDiscoveryConfig( + src aghconfig.ProviderModelsDiscoveryConfig, +) aghconfig.ProviderModelsDiscoveryConfig { + return aghconfig.ProviderModelsDiscoveryConfig{ + Enabled: cloneBoolPtr(src.Enabled), + Command: src.Command, + Endpoint: src.Endpoint, + Timeout: src.Timeout, + } +} + +func cloneProviderModelConfigs(src []aghconfig.ProviderModelConfig) []aghconfig.ProviderModelConfig { + if src == nil { + return nil + } + cloned := make([]aghconfig.ProviderModelConfig, len(src)) + for idx, model := range src { + cloned[idx] = aghconfig.ProviderModelConfig{ + ID: model.ID, + DisplayName: model.DisplayName, + ContextWindow: cloneInt64Ptr(model.ContextWindow), + MaxInputTokens: cloneInt64Ptr(model.MaxInputTokens), + MaxOutputTokens: cloneInt64Ptr(model.MaxOutputTokens), + SupportsTools: cloneBoolPtr(model.SupportsTools), + SupportsReasoning: cloneBoolPtr(model.SupportsReasoning), + ReasoningEfforts: append([]string(nil), model.ReasoningEfforts...), + DefaultReasoningEffort: model.DefaultReasoningEffort, + CostInputPerMillion: cloneFloat64Ptr(model.CostInputPerMillion), + CostOutputPerMillion: cloneFloat64Ptr(model.CostOutputPerMillion), + } + } + return cloned +} + +func cloneModelCatalogConfig(src aghconfig.ModelCatalogConfig) aghconfig.ModelCatalogConfig { + return aghconfig.ModelCatalogConfig{ + Sources: aghconfig.ModelCatalogSourcesConfig{ + ModelsDev: aghconfig.ModelsDevSourceConfig{ + Enabled: cloneBoolPtr(src.Sources.ModelsDev.Enabled), + Endpoint: src.Sources.ModelsDev.Endpoint, + TTL: src.Sources.ModelsDev.TTL, + Timeout: src.Sources.ModelsDev.Timeout, + }, + }, + } +} + func cloneAgentDefs(src []aghconfig.AgentDef) []aghconfig.AgentDef { if len(src) == 0 { return nil diff --git a/internal/workspace/resolver_test.go b/internal/workspace/resolver_test.go index b9ca3730f..d03e4d788 100644 --- a/internal/workspace/resolver_test.go +++ b/internal/workspace/resolver_test.go @@ -1274,8 +1274,10 @@ func TestCloneConfigProducesDeepCopy(t *testing.T) { }, Providers: map[string]aghconfig.ProviderConfig{ "claude": { - Command: "claude", - DefaultModel: "sonnet", + Command: "claude", + Models: aghconfig.ProviderModelsConfig{ + Default: "sonnet", + }, CredentialSlots: []aghconfig.ProviderCredentialSlot{ { Name: "api_key", diff --git a/openapi/agh.json b/openapi/agh.json index 1f8fab750..76da08f09 100644 --- a/openapi/agh.json +++ b/openapi/agh.json @@ -4804,12 +4804,18 @@ ], "type": "object" }, + "model": { + "type": "string" + }, "name": { "type": "string" }, "provider": { "type": "string" }, + "reasoning_effort": { + "type": "string" + }, "sandbox": { "nullable": true, "properties": { @@ -35390,12 +35396,18 @@ ], "type": "object" }, + "model": { + "type": "string" + }, "name": { "type": "string" }, "provider": { "type": "string" }, + "reasoning_effort": { + "type": "string" + }, "sandbox": { "nullable": true, "properties": { @@ -36066,12 +36078,18 @@ ], "type": "object" }, + "model": { + "type": "string" + }, "name": { "type": "string" }, "provider": { "type": "string" }, + "reasoning_effort": { + "type": "string" + }, "sandbox": { "nullable": true, "properties": { @@ -42203,12 +42221,18 @@ ], "type": "object" }, + "model": { + "type": "string" + }, "name": { "type": "string" }, "provider": { "type": "string" }, + "reasoning_effort": { + "type": "string" + }, "sandbox": { "nullable": true, "properties": { @@ -42348,12 +42372,18 @@ "channel": { "type": "string" }, + "model": { + "type": "string" + }, "name": { "type": "string" }, "provider": { "type": "string" }, + "reasoning_effort": { + "type": "string" + }, "workspace": { "type": "string" }, @@ -42686,12 +42716,18 @@ ], "type": "object" }, + "model": { + "type": "string" + }, "name": { "type": "string" }, "provider": { "type": "string" }, + "reasoning_effort": { + "type": "string" + }, "sandbox": { "nullable": true, "properties": { @@ -43246,12 +43282,18 @@ ], "type": "object" }, + "model": { + "type": "string" + }, "name": { "type": "string" }, "provider": { "type": "string" }, + "reasoning_effort": { + "type": "string" + }, "sandbox": { "nullable": true, "properties": { @@ -44537,12 +44579,18 @@ ], "type": "object" }, + "model": { + "type": "string" + }, "name": { "type": "string" }, "provider": { "type": "string" }, + "reasoning_effort": { + "type": "string" + }, "sandbox": { "nullable": true, "properties": { @@ -53091,9 +53139,6 @@ }, "type": "array" }, - "default_model": { - "type": "string" - }, "display_name": { "type": "string" }, @@ -53106,6 +53151,93 @@ "home_policy": { "type": "string" }, + "models": { + "nullable": true, + "properties": { + "curated": { + "items": { + "properties": { + "context_window": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "cost_input_per_million": { + "format": "double", + "nullable": true, + "type": "number" + }, + "cost_output_per_million": { + "format": "double", + "nullable": true, + "type": "number" + }, + "default_reasoning_effort": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "id": { + "type": "string" + }, + "max_input_tokens": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "max_output_tokens": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "reasoning_efforts": { + "items": { + "type": "string" + }, + "type": "array" + }, + "supports_reasoning": { + "nullable": true, + "type": "boolean" + }, + "supports_tools": { + "nullable": true, + "type": "boolean" + } + }, + "required": [ + "id" + ], + "type": "object" + }, + "type": "array" + }, + "default": { + "type": "string" + }, + "discovery": { + "nullable": true, + "properties": { + "command": { + "type": "string" + }, + "enabled": { + "nullable": true, + "type": "boolean" + }, + "endpoint": { + "type": "string" + }, + "timeout": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object" + }, "runtime_provider": { "type": "string" }, @@ -53200,9 +53332,6 @@ }, "type": "array" }, - "default_model": { - "type": "string" - }, "display_name": { "type": "string" }, @@ -53215,6 +53344,91 @@ "home_policy": { "type": "string" }, + "models": { + "nullable": true, + "properties": { + "curated": { + "items": { + "properties": { + "context_window": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "cost_input_per_million": { + "format": "double", + "nullable": true, + "type": "number" + }, + "cost_output_per_million": { + "format": "double", + "nullable": true, + "type": "number" + }, + "default_reasoning_effort": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "id": { + "type": "string" + }, + "max_input_tokens": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "max_output_tokens": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "reasoning_efforts": { + "items": { + "type": "string" + }, + "type": "array" + }, + "supports_reasoning": { + "nullable": true, + "type": "boolean" + }, + "supports_tools": { + "nullable": true, + "type": "boolean" + } + }, + "required": ["id"], + "type": "object" + }, + "type": "array" + }, + "default": { + "type": "string" + }, + "discovery": { + "nullable": true, + "properties": { + "command": { + "type": "string" + }, + "enabled": { + "nullable": true, + "type": "boolean" + }, + "endpoint": { + "type": "string" + }, + "timeout": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object" + }, "runtime_provider": { "type": "string" }, @@ -53650,9 +53864,6 @@ }, "type": "array" }, - "default_model": { - "type": "string" - }, "display_name": { "type": "string" }, @@ -53665,6 +53876,91 @@ "home_policy": { "type": "string" }, + "models": { + "nullable": true, + "properties": { + "curated": { + "items": { + "properties": { + "context_window": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "cost_input_per_million": { + "format": "double", + "nullable": true, + "type": "number" + }, + "cost_output_per_million": { + "format": "double", + "nullable": true, + "type": "number" + }, + "default_reasoning_effort": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "id": { + "type": "string" + }, + "max_input_tokens": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "max_output_tokens": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "reasoning_efforts": { + "items": { + "type": "string" + }, + "type": "array" + }, + "supports_reasoning": { + "nullable": true, + "type": "boolean" + }, + "supports_tools": { + "nullable": true, + "type": "boolean" + } + }, + "required": ["id"], + "type": "object" + }, + "type": "array" + }, + "default": { + "type": "string" + }, + "discovery": { + "nullable": true, + "properties": { + "command": { + "type": "string" + }, + "enabled": { + "nullable": true, + "type": "boolean" + }, + "endpoint": { + "type": "string" + }, + "timeout": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object" + }, "runtime_provider": { "type": "string" }, @@ -53759,9 +54055,6 @@ }, "type": "array" }, - "default_model": { - "type": "string" - }, "display_name": { "type": "string" }, @@ -53774,6 +54067,91 @@ "home_policy": { "type": "string" }, + "models": { + "nullable": true, + "properties": { + "curated": { + "items": { + "properties": { + "context_window": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "cost_input_per_million": { + "format": "double", + "nullable": true, + "type": "number" + }, + "cost_output_per_million": { + "format": "double", + "nullable": true, + "type": "number" + }, + "default_reasoning_effort": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "id": { + "type": "string" + }, + "max_input_tokens": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "max_output_tokens": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "reasoning_efforts": { + "items": { + "type": "string" + }, + "type": "array" + }, + "supports_reasoning": { + "nullable": true, + "type": "boolean" + }, + "supports_tools": { + "nullable": true, + "type": "boolean" + } + }, + "required": ["id"], + "type": "object" + }, + "type": "array" + }, + "default": { + "type": "string" + }, + "discovery": { + "nullable": true, + "properties": { + "command": { + "type": "string" + }, + "enabled": { + "nullable": true, + "type": "boolean" + }, + "endpoint": { + "type": "string" + }, + "timeout": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object" + }, "runtime_provider": { "type": "string" }, @@ -54016,9 +54394,6 @@ }, "type": "array" }, - "default_model": { - "type": "string" - }, "display_name": { "type": "string" }, @@ -54031,6 +54406,91 @@ "home_policy": { "type": "string" }, + "models": { + "nullable": true, + "properties": { + "curated": { + "items": { + "properties": { + "context_window": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "cost_input_per_million": { + "format": "double", + "nullable": true, + "type": "number" + }, + "cost_output_per_million": { + "format": "double", + "nullable": true, + "type": "number" + }, + "default_reasoning_effort": { + "type": "string" + }, + "display_name": { + "type": "string" + }, + "id": { + "type": "string" + }, + "max_input_tokens": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "max_output_tokens": { + "format": "int64", + "nullable": true, + "type": "integer" + }, + "reasoning_efforts": { + "items": { + "type": "string" + }, + "type": "array" + }, + "supports_reasoning": { + "nullable": true, + "type": "boolean" + }, + "supports_tools": { + "nullable": true, + "type": "boolean" + } + }, + "required": ["id"], + "type": "object" + }, + "type": "array" + }, + "default": { + "type": "string" + }, + "discovery": { + "nullable": true, + "properties": { + "command": { + "type": "string" + }, + "enabled": { + "nullable": true, + "type": "boolean" + }, + "endpoint": { + "type": "string" + }, + "timeout": { + "type": "string" + } + }, + "type": "object" + } + }, + "type": "object" + }, "runtime_provider": { "type": "string" }, @@ -79294,9 +79754,6 @@ "auth_mode": { "type": "string" }, - "default_model": { - "type": "string" - }, "display_name": { "type": "string" }, @@ -79634,12 +80091,18 @@ ], "type": "object" }, + "model": { + "type": "string" + }, "name": { "type": "string" }, "provider": { "type": "string" }, + "reasoning_effort": { + "type": "string" + }, "sandbox": { "nullable": true, "properties": { diff --git a/sdk/typescript/src/generated/contracts.ts b/sdk/typescript/src/generated/contracts.ts index 78c462c35..445b713dc 100644 --- a/sdk/typescript/src/generated/contracts.ts +++ b/sdk/typescript/src/generated/contracts.ts @@ -3548,6 +3548,8 @@ export interface SessionsCreateParams { agent: string; prompt?: string; provider?: string; + model?: string; + reasoning_effort?: string; workspace?: string; } diff --git a/web/e2e/__tests__/session-provider-override.spec.ts b/web/e2e/__tests__/session-provider-override.spec.ts index 0a0b30579..23f645765 100644 --- a/web/e2e/__tests__/session-provider-override.spec.ts +++ b/web/e2e/__tests__/session-provider-override.spec.ts @@ -283,7 +283,8 @@ async function writeWorkspaceConfig(input: { lines.push( `[providers.${overrideProvider}]`, `command = "${escapeTomlString(input.overrideCommand)}"`, - `default_model = "qa-browser-model"`, + `[providers.${overrideProvider}.models]`, + `default = "qa-browser-model"`, `[[providers.${overrideProvider}.credential_slots]]`, `name = "api_key"`, `target_env = "QA_BROWSER_API_KEY"`, diff --git a/web/e2e/fixtures/__tests__/runtime-seed.test.ts b/web/e2e/fixtures/__tests__/runtime-seed.test.ts index 654ed905c..41debb174 100644 --- a/web/e2e/fixtures/__tests__/runtime-seed.test.ts +++ b/web/e2e/fixtures/__tests__/runtime-seed.test.ts @@ -1118,7 +1118,7 @@ describe("browser runtime seed helpers", () => { name: "browser-provider", settings: { command: "browser-provider", - default_model: "gpt-5.4", + models: { default: "gpt-5.4", curated: [{ id: "gpt-5.4" }] }, }, }, ], diff --git a/web/src/generated/agh-openapi.d.ts b/web/src/generated/agh-openapi.d.ts index 6bf9f8747..6596a7888 100644 --- a/web/src/generated/agh-openapi.d.ts +++ b/web/src/generated/agh-openapi.d.ts @@ -5482,8 +5482,10 @@ export interface operations { /** Format: date-time */ ttl_expires_at?: string | null; } | null; + model?: string; name?: string; provider: string; + reasoning_effort?: string; sandbox?: { backend?: string; instance_id?: string; @@ -19446,8 +19448,10 @@ export interface operations { /** Format: date-time */ ttl_expires_at?: string | null; } | null; + model?: string; name?: string; provider: string; + reasoning_effort?: string; sandbox?: { backend?: string; instance_id?: string; @@ -19696,8 +19700,10 @@ export interface operations { /** Format: date-time */ ttl_expires_at?: string | null; } | null; + model?: string; name?: string; provider: string; + reasoning_effort?: string; sandbox?: { backend?: string; instance_id?: string; @@ -22572,8 +22578,10 @@ export interface operations { /** Format: date-time */ ttl_expires_at?: string | null; } | null; + model?: string; name?: string; provider: string; + reasoning_effort?: string; sandbox?: { backend?: string; instance_id?: string; @@ -22650,8 +22658,10 @@ export interface operations { "application/json": { agent_name?: string; channel?: string; + model?: string; name?: string; provider?: string; + reasoning_effort?: string; workspace?: string; workspace_path?: string; }; @@ -22758,8 +22768,10 @@ export interface operations { /** Format: date-time */ ttl_expires_at?: string | null; } | null; + model?: string; name?: string; provider: string; + reasoning_effort?: string; sandbox?: { backend?: string; instance_id?: string; @@ -22960,8 +22972,10 @@ export interface operations { /** Format: date-time */ ttl_expires_at?: string | null; } | null; + model?: string; name?: string; provider: string; + reasoning_effort?: string; sandbox?: { backend?: string; instance_id?: string; @@ -23597,8 +23611,10 @@ export interface operations { /** Format: date-time */ ttl_expires_at?: string | null; } | null; + model?: string; name?: string; provider: string; + reasoning_effort?: string; sandbox?: { backend?: string; instance_id?: string; @@ -27779,11 +27795,37 @@ export interface operations { secret_ref: string; target_env: string; }[]; - default_model?: string; display_name?: string; env_policy?: string; harness?: string; home_policy?: string; + models?: { + curated?: { + /** Format: int64 */ + context_window?: number | null; + /** Format: double */ + cost_input_per_million?: number | null; + /** Format: double */ + cost_output_per_million?: number | null; + default_reasoning_effort?: string; + display_name?: string; + id: string; + /** Format: int64 */ + max_input_tokens?: number | null; + /** Format: int64 */ + max_output_tokens?: number | null; + reasoning_efforts?: string[]; + supports_reasoning?: boolean | null; + supports_tools?: boolean | null; + }[]; + default?: string; + discovery?: { + command?: string; + enabled?: boolean | null; + endpoint?: string; + timeout?: string; + } | null; + } | null; runtime_provider?: string; transport?: string; }; @@ -27817,11 +27859,37 @@ export interface operations { secret_ref: string; target_env: string; }[]; - default_model?: string; display_name?: string; env_policy?: string; harness?: string; home_policy?: string; + models?: { + curated?: { + /** Format: int64 */ + context_window?: number | null; + /** Format: double */ + cost_input_per_million?: number | null; + /** Format: double */ + cost_output_per_million?: number | null; + default_reasoning_effort?: string; + display_name?: string; + id: string; + /** Format: int64 */ + max_input_tokens?: number | null; + /** Format: int64 */ + max_output_tokens?: number | null; + reasoning_efforts?: string[]; + supports_reasoning?: boolean | null; + supports_tools?: boolean | null; + }[]; + default?: string; + discovery?: { + command?: string; + enabled?: boolean | null; + endpoint?: string; + timeout?: string; + } | null; + } | null; runtime_provider?: string; transport?: string; }; @@ -27944,11 +28012,37 @@ export interface operations { secret_ref: string; target_env: string; }[]; - default_model?: string; display_name?: string; env_policy?: string; harness?: string; home_policy?: string; + models?: { + curated?: { + /** Format: int64 */ + context_window?: number | null; + /** Format: double */ + cost_input_per_million?: number | null; + /** Format: double */ + cost_output_per_million?: number | null; + default_reasoning_effort?: string; + display_name?: string; + id: string; + /** Format: int64 */ + max_input_tokens?: number | null; + /** Format: int64 */ + max_output_tokens?: number | null; + reasoning_efforts?: string[]; + supports_reasoning?: boolean | null; + supports_tools?: boolean | null; + }[]; + default?: string; + discovery?: { + command?: string; + enabled?: boolean | null; + endpoint?: string; + timeout?: string; + } | null; + } | null; runtime_provider?: string; transport?: string; }; @@ -27982,11 +28076,37 @@ export interface operations { secret_ref: string; target_env: string; }[]; - default_model?: string; display_name?: string; env_policy?: string; harness?: string; home_policy?: string; + models?: { + curated?: { + /** Format: int64 */ + context_window?: number | null; + /** Format: double */ + cost_input_per_million?: number | null; + /** Format: double */ + cost_output_per_million?: number | null; + default_reasoning_effort?: string; + display_name?: string; + id: string; + /** Format: int64 */ + max_input_tokens?: number | null; + /** Format: int64 */ + max_output_tokens?: number | null; + reasoning_efforts?: string[]; + supports_reasoning?: boolean | null; + supports_tools?: boolean | null; + }[]; + default?: string; + discovery?: { + command?: string; + enabled?: boolean | null; + endpoint?: string; + timeout?: string; + } | null; + } | null; runtime_provider?: string; transport?: string; }; @@ -28097,11 +28217,37 @@ export interface operations { secret_ref: string; target_env: string; }[]; - default_model?: string; display_name?: string; env_policy?: string; harness?: string; home_policy?: string; + models?: { + curated?: { + /** Format: int64 */ + context_window?: number | null; + /** Format: double */ + cost_input_per_million?: number | null; + /** Format: double */ + cost_output_per_million?: number | null; + default_reasoning_effort?: string; + display_name?: string; + id: string; + /** Format: int64 */ + max_input_tokens?: number | null; + /** Format: int64 */ + max_output_tokens?: number | null; + reasoning_efforts?: string[]; + supports_reasoning?: boolean | null; + supports_tools?: boolean | null; + }[]; + default?: string; + discovery?: { + command?: string; + enabled?: boolean | null; + endpoint?: string; + timeout?: string; + } | null; + } | null; runtime_provider?: string; transport?: string; }; @@ -41509,7 +41655,6 @@ export interface operations { }[]; providers?: { auth_mode?: string; - default_model?: string; display_name?: string; env_policy?: string; harness?: string; @@ -41610,8 +41755,10 @@ export interface operations { /** Format: date-time */ ttl_expires_at?: string | null; } | null; + model?: string; name?: string; provider: string; + reasoning_effort?: string; sandbox?: { backend?: string; instance_id?: string; diff --git a/web/src/hooks/routes/__tests__/use-settings-providers-page.test.tsx b/web/src/hooks/routes/__tests__/use-settings-providers-page.test.tsx index 4a97719be..56b3faf93 100644 --- a/web/src/hooks/routes/__tests__/use-settings-providers-page.test.tsx +++ b/web/src/hooks/routes/__tests__/use-settings-providers-page.test.tsx @@ -39,7 +39,10 @@ const claudeEntry: SettingsProviderCollection["providers"][number] = { command_available: true, settings: { command: "npx -y @agentclientprotocol/claude-agent-acp@latest", - default_model: "claude-sonnet-4-6", + models: { + default: "claude-sonnet-4-6", + curated: [{ id: "claude-sonnet-4-6" }, { id: "claude-haiku-4-5" }], + }, auth_mode: "native_cli", env_policy: "filtered", home_policy: "operator", @@ -54,7 +57,7 @@ const claudeEntry: SettingsProviderCollection["providers"][number] = { fallback: { settings: { command: "npx -y @agentclientprotocol/claude-agent-acp@latest", - default_model: "claude-sonnet-4-6", + models: { default: "claude-sonnet-4-6" }, }, source: { kind: "builtin-provider", scope: "global" }, }, @@ -66,7 +69,17 @@ const codexEntry: SettingsProviderCollection["providers"][number] = { command_available: true, settings: { command: "npx -y @zed-industries/codex-acp@latest", - default_model: "gpt-5.4", + models: { + default: "gpt-5.4", + curated: [ + { + id: "gpt-5.4", + supports_reasoning: true, + reasoning_efforts: ["low", "medium", "high"], + }, + { id: "gpt-5.4-mini" }, + ], + }, auth_mode: "bound_secret", env_policy: "filtered", home_policy: "operator", @@ -160,7 +173,14 @@ describe("useSettingsProvidersPage", () => { expect(result.current.editor).toMatchObject({ mode: "create", - draft: { name: "", command: "", default_model: "", target_env: "", auth_mode: "native_cli" }, + draft: { + name: "", + command: "", + model_default: "", + curated_models: "", + target_env: "", + auth_mode: "native_cli", + }, }); }); @@ -193,7 +213,8 @@ describe("useSettingsProvidersPage", () => { name: "claude", draft: expect.objectContaining({ command: "npx -y @agentclientprotocol/claude-agent-acp@latest", - default_model: "claude-sonnet-4-6", + model_default: "claude-sonnet-4-6", + curated_models: "claude-sonnet-4-6\nclaude-haiku-4-5", target_env: "", auth_mode: "native_cli", env_policy: "filtered", @@ -223,7 +244,11 @@ describe("useSettingsProvidersPage", () => { result.current.openEdit(claudeEntry); }); act(() => { - result.current.updateDraft(draft => ({ ...draft, default_model: "claude-haiku" })); + result.current.updateDraft(draft => ({ + ...draft, + model_default: "claude-haiku", + curated_models: "claude-haiku\nclaude-sonnet-4-6", + })); }); act(() => { result.current.saveEditor(); @@ -236,7 +261,10 @@ describe("useSettingsProvidersPage", () => { expect(putSettingsProvider).toHaveBeenCalledWith("claude", { settings: { command: "npx -y @agentclientprotocol/claude-agent-acp@latest", - default_model: "claude-haiku", + models: { + default: "claude-haiku", + curated: [{ id: "claude-haiku" }, { id: "claude-sonnet-4-6" }], + }, harness: "acp", auth_mode: "native_cli", env_policy: "filtered", @@ -262,7 +290,10 @@ describe("useSettingsProvidersPage", () => { name: "openrouter", settings: { command: "npx -y pi-acp@latest", - default_model: "openai/gpt-5.4", + models: { + default: "openai/gpt-5.4", + curated: [{ id: "openai/gpt-5.4", supports_reasoning: true }], + }, harness: "pi_acp", runtime_provider: "openrouter", auth_mode: "bound_secret", @@ -318,7 +349,7 @@ describe("useSettingsProvidersPage", () => { act(() => { result.current.updateDraft(draft => ({ ...draft, - default_model: "anthropic/claude-sonnet", + model_default: "anthropic/claude-sonnet", credential_slots: draft.credential_slots.map((slot, index) => index === 1 ? { ...slot, secret_ref: "vault:providers/openrouter/organization" } : slot ), @@ -336,7 +367,10 @@ describe("useSettingsProvidersPage", () => { expect(putSettingsProvider).toHaveBeenCalledWith("openrouter", { settings: { command: "npx -y pi-acp@latest", - default_model: "anthropic/claude-sonnet", + models: { + default: "anthropic/claude-sonnet", + curated: [{ id: "openai/gpt-5.4" }], + }, harness: "pi_acp", runtime_provider: "openrouter", auth_mode: "bound_secret", @@ -391,7 +425,7 @@ describe("useSettingsProvidersPage", () => { ...draft, name: "openrouter", command: "npx -y pi-acp@latest", - default_model: "openai/gpt-5.4", + model_default: "openai/gpt-5.4", target_env: "OPENROUTER_API_KEY", harness: "pi_acp", runtime_provider: "openrouter", @@ -411,7 +445,10 @@ describe("useSettingsProvidersPage", () => { expect(putSettingsProvider).toHaveBeenCalledWith("openrouter", { settings: { command: "npx -y pi-acp@latest", - default_model: "openai/gpt-5.4", + models: { + default: "openai/gpt-5.4", + curated: [], + }, harness: "pi_acp", runtime_provider: "openrouter", auth_mode: "bound_secret", diff --git a/web/src/hooks/routes/use-settings-providers-page.ts b/web/src/hooks/routes/use-settings-providers-page.ts index 7e6422386..25242899f 100644 --- a/web/src/hooks/routes/use-settings-providers-page.ts +++ b/web/src/hooks/routes/use-settings-providers-page.ts @@ -14,12 +14,17 @@ import { type ProviderCredentialSlotDraft = NonNullable< NonNullable["credential_slots"] >[number]; +type ProviderModelsPayload = NonNullable< + NonNullable["models"] +>; +type ProviderModelPayload = NonNullable[number]; export type ProviderDraft = { name: string; command: string; display_name: string; - default_model: string; + model_default: string; + curated_models: string; target_env: string; harness: string; runtime_provider: string; @@ -49,7 +54,8 @@ function emptyDraft(): ProviderDraft { name: "", command: "", display_name: "", - default_model: "", + model_default: "", + curated_models: "", target_env: "", harness: "acp", runtime_provider: "", @@ -74,7 +80,8 @@ function toDraft(entry: SettingsProviderEntry): ProviderDraft { name: entry.name, command: entry.settings.command ?? "", display_name: entry.settings.display_name ?? "", - default_model: entry.settings.default_model ?? "", + model_default: entry.settings.models?.default ?? "", + curated_models: joinCuratedModels(entry.settings.models?.curated ?? []), target_env: credentialSlot?.target_env ?? "", harness: entry.settings.harness ?? "acp", runtime_provider: entry.settings.runtime_provider ?? "", @@ -96,7 +103,10 @@ function toRequest(draft: ProviderDraft): SettingsProviderRequest { const settings: SettingsProviderRequest["settings"] = {}; if (draft.command.trim()) settings.command = draft.command.trim(); if (draft.display_name.trim()) settings.display_name = draft.display_name.trim(); - if (draft.default_model.trim()) settings.default_model = draft.default_model.trim(); + settings.models = { + ...(draft.model_default.trim() ? { default: draft.model_default.trim() } : {}), + curated: parseCuratedModels(draft.curated_models), + }; if (draft.harness.trim()) settings.harness = draft.harness.trim(); if (draft.runtime_provider.trim()) settings.runtime_provider = draft.runtime_provider.trim(); if (draft.transport.trim()) settings.transport = draft.transport.trim(); @@ -137,6 +147,25 @@ function envSecretRef(apiKeyEnv?: string): string { return envName ? `env:${envName}` : ""; } +function joinCuratedModels(models: ProviderModelPayload[]): string { + return models + .map(model => model.id.trim()) + .filter(Boolean) + .join("\n"); +} + +function parseCuratedModels(raw: string): ProviderModelPayload[] { + const seen = new Set(); + const models: ProviderModelPayload[] = []; + for (const part of raw.split(/[\n,]/u)) { + const model = part.trim(); + if (!model || seen.has(model)) continue; + seen.add(model); + models.push({ id: model }); + } + return models; +} + function credentialSlotsForDraft( slots: ProviderCredentialSlotDraft[] ): ProviderCredentialSlotDraft[] { diff --git a/web/src/routes/_app.tsx b/web/src/routes/_app.tsx index 9de93590b..0886ad3b8 100644 --- a/web/src/routes/_app.tsx +++ b/web/src/routes/_app.tsx @@ -67,16 +67,23 @@ function AppLayout() { diff --git a/web/src/routes/_app/settings/__tests__/-providers.test.tsx b/web/src/routes/_app/settings/__tests__/-providers.test.tsx index de34d5f05..ec80ddc84 100644 --- a/web/src/routes/_app/settings/__tests__/-providers.test.tsx +++ b/web/src/routes/_app/settings/__tests__/-providers.test.tsx @@ -27,7 +27,10 @@ const claudeEntry: SettingsProviderEntry = { command_available: true, settings: { command: "npx -y @agentclientprotocol/claude-agent-acp@latest", - default_model: "claude-sonnet-4-6", + models: { + default: "claude-sonnet-4-6", + curated: [{ id: "claude-sonnet-4-6" }, { id: "claude-haiku-4-5" }], + }, auth_mode: "native_cli", env_policy: "filtered", home_policy: "operator", @@ -60,7 +63,17 @@ const builtinEntry: SettingsProviderEntry = { command_available: true, settings: { command: "npx -y @zed-industries/codex-acp@latest", - default_model: "gpt-5.4", + models: { + default: "gpt-5.4", + curated: [ + { + id: "gpt-5.4", + supports_reasoning: true, + reasoning_efforts: ["low", "medium", "high"], + }, + { id: "gpt-5.4-mini" }, + ], + }, auth_mode: "bound_secret", env_policy: "filtered", home_policy: "operator", @@ -244,6 +257,12 @@ describe("ProvidersSettingsPage", () => { expect(screen.getByTestId("settings-page-providers-card-claude-auth-mode")).toHaveTextContent( "native_cli" ); + expect( + screen.getByTestId("settings-page-providers-card-claude-curated-models") + ).toHaveTextContent("claude-sonnet-4-6"); + expect(screen.getByTestId("settings-page-providers-card-codex-reasoning")).toHaveTextContent( + "Per model" + ); expect(screen.getByTestId("settings-page-providers-card-claude-auth-status")).toHaveTextContent( "native_cli" ); @@ -326,7 +345,8 @@ describe("ProvidersSettingsPage", () => { name: "claude", command: "npx -y @agentclientprotocol/claude-agent-acp@latest", display_name: "Claude", - default_model: "claude-sonnet-4-6", + model_default: "claude-sonnet-4-6", + curated_models: "claude-sonnet-4-6\nclaude-haiku-4-5", target_env: "", harness: "acp", runtime_provider: "", @@ -354,6 +374,9 @@ describe("ProvidersSettingsPage", () => { expect(screen.getByTestId("settings-providers-editor-command-input")).toHaveValue( "npx -y @agentclientprotocol/claude-agent-acp@latest" ); + expect(screen.getByTestId("settings-providers-editor-curated-models-input")).toHaveValue( + "claude-sonnet-4-6\nclaude-haiku-4-5" + ); expect(screen.getByTestId("settings-providers-editor-source-effective")).toHaveTextContent( "CONFIG" ); @@ -368,7 +391,8 @@ describe("ProvidersSettingsPage", () => { name: "claude", command: "npx -y @agentclientprotocol/claude-agent-acp@latest", display_name: "", - default_model: "", + model_default: "", + curated_models: "", target_env: "ANTHROPIC_API_KEY", harness: "acp", runtime_provider: "", diff --git a/web/src/routes/_app/settings/providers.tsx b/web/src/routes/_app/settings/providers.tsx index 0acd2f1c6..05f7ac9a1 100644 --- a/web/src/routes/_app/settings/providers.tsx +++ b/web/src/routes/_app/settings/providers.tsx @@ -10,6 +10,7 @@ import { Input, NativeSelect, NativeSelectOption, + Textarea, } from "@agh/ui"; import { @@ -296,10 +297,27 @@ function ProviderEditor({ - onChange(current => ({ ...current, default_model: event.target.value })) + onChange(current => ({ ...current, model_default: event.target.value })) + } + /> + } + /> + + onChange(current => ({ ...current, curated_models: event.target.value })) } /> } diff --git a/web/src/systems/session/components/__tests__/session-create-dialog.test.tsx b/web/src/systems/session/components/__tests__/session-create-dialog.test.tsx index 89a6ae58b..a34628e71 100644 --- a/web/src/systems/session/components/__tests__/session-create-dialog.test.tsx +++ b/web/src/systems/session/components/__tests__/session-create-dialog.test.tsx @@ -28,9 +28,11 @@ const providerOptions: SessionProviderOption[] = [ display_name: "Claude Code", harness: "acp", runtime_provider: "claude", - default_model: "claude-sonnet-4-6", }, - { name: "codex" }, + { + name: "codex", + display_name: "Codex", + }, { name: "openrouter", display_name: "OpenRouter", @@ -48,18 +50,29 @@ function getDialogBackdrop(): HTMLElement { } function makeProps(overrides: Partial = {}): SessionCreateDialogProps { + const selectedProvider = overrides.selectedProvider ?? "claude"; + const fallbackProviderOption = + overrides.providerOptions?.find(option => option.name === selectedProvider) ?? + providerOptions.find(option => option.name === selectedProvider); return { open: true, onOpenChange: vi.fn(), agents, workspace, selectedAgentName: "claude-agent", - selectedProvider: "claude", + selectedProvider, + selectedProviderOption: fallbackProviderOption, + selectedModel: "", + selectedReasoning: "", + modelOptions: [], + reasoningSupported: false, providerOptions, providersLoading: false, providersError: null, onAgentChange: vi.fn(), onProviderChange: vi.fn(), + onModelChange: vi.fn(), + onReasoningChange: vi.fn(), onSubmit: vi.fn(), isSubmitting: false, submitError: null, @@ -68,18 +81,19 @@ function makeProps(overrides: Partial = {}): SessionCr } describe("SessionCreateDialog", () => { - it("renders the provider picker with every workspace provider option", () => { - render(); + it("renders the provider picker with the selected provider in the trigger", () => { + render( + + + + ); - expect(screen.getByTestId("session-create-dialog").className).toContain("sm:max-w-lg"); + expect(screen.getByTestId("session-create-dialog").className).toContain("sm:max-w-xl"); expect(screen.getByTestId("session-create-dialog").className).not.toContain("sm:max-w-[30rem]"); - const picker = screen.getByTestId("session-create-provider-select") as HTMLSelectElement; - expect(picker).toBeEnabled(); - expect(picker.value).toBe("claude"); - const values = Array.from(picker.options).map(option => option.value); - expect(values).toEqual(["claude", "codex", "openrouter"]); - expect(picker).toHaveTextContent("Claude Code · claude-sonnet-4-6"); + const trigger = screen.getByTestId("session-create-provider-select"); + expect(trigger).toBeEnabled(); + expect(trigger).toHaveTextContent("Claude Code"); expect(screen.getByTestId("session-create-provider-runtime")).toHaveTextContent("acp"); }); @@ -108,16 +122,48 @@ describe("SessionCreateDialog", () => { expect(onAgentChange).toHaveBeenCalledWith("codex-agent"); }); - it("calls onProviderChange when the operator picks a different provider", () => { + it("calls onProviderChange when the operator picks a different provider", async () => { + const user = userEvent.setup(); const onProviderChange = vi.fn(); - render(); + render( + + + + ); - fireEvent.change(screen.getByTestId("session-create-provider-select"), { - target: { value: "codex" }, - }); + await user.click(screen.getByTestId("session-create-provider-select")); + await user.click(screen.getByTestId("provider-command-item-codex")); expect(onProviderChange).toHaveBeenCalledWith("codex"); }); + it("lets the operator select a model and reasoning effort", async () => { + const user = userEvent.setup(); + const onModelChange = vi.fn(); + const onReasoningChange = vi.fn(); + render( + + + + ); + + await user.click(screen.getByTestId("session-create-model-select")); + await user.click(screen.getByTestId("model-command-item-gpt-5.4-mini")); + expect(onModelChange).toHaveBeenCalledWith("gpt-5.4-mini"); + + await user.click(screen.getByTestId("session-create-reasoning-select")); + await user.click(screen.getByTestId("reasoning-command-item-high")); + expect(onReasoningChange).toHaveBeenCalledWith("high"); + }); + it("calls onSubmit only once when the form is submitted with a valid draft", () => { const onSubmit = vi.fn(); render(); @@ -127,7 +173,17 @@ describe("SessionCreateDialog", () => { }); it("disables submit when no providers are available and surfaces an empty-state note", () => { - render(); + render( + + + + ); expect(screen.getByTestId("session-create-dialog-submit")).toBeDisabled(); expect(screen.getByTestId("session-create-providers-empty")).toBeInTheDocument(); @@ -148,12 +204,15 @@ describe("SessionCreateDialog", () => { it("disables submit when the current selections are no longer available", () => { render( - + + + ); expect(screen.getByTestId("session-create-dialog-submit")).toBeDisabled(); @@ -161,12 +220,19 @@ describe("SessionCreateDialog", () => { it("shows provider-loading state and disables the picker while loading", () => { render( - + + + ); - const picker = screen.getByTestId("session-create-provider-select") as HTMLSelectElement; + const picker = screen.getByTestId("session-create-provider-select"); expect(picker).toBeDisabled(); expect(picker).toHaveTextContent("Loading providers…"); }); @@ -179,6 +245,7 @@ describe("SessionCreateDialog", () => { workspace: undefined, selectedAgentName: "claude-agent", selectedProvider: "claude", + selectedProviderOption: undefined, })} /> @@ -190,11 +257,8 @@ describe("SessionCreateDialog", () => { ); expect(screen.queryByTestId("session-create-agent-default")).not.toBeInTheDocument(); - const providerPicker = screen.getByTestId( - "session-create-provider-select" - ) as HTMLSelectElement; + const providerPicker = screen.getByTestId("session-create-provider-select"); expect(providerPicker).toBeDisabled(); - expect(providerPicker.value).toBe(""); expect(providerPicker).toHaveTextContent("Select a workspace first"); expect(screen.queryByTestId("session-create-provider-runtime")).not.toBeInTheDocument(); expect(screen.queryByTestId("session-create-providers-empty")).not.toBeInTheDocument(); diff --git a/web/src/systems/session/components/model-command-select.tsx b/web/src/systems/session/components/model-command-select.tsx new file mode 100644 index 000000000..126b72d66 --- /dev/null +++ b/web/src/systems/session/components/model-command-select.tsx @@ -0,0 +1,169 @@ +import { useMemo, useState, type KeyboardEvent } from "react"; +import { Boxes, ChevronsUpDown } from "lucide-react"; + +import { + cn, + Command, + CommandEmpty, + CommandGroup, + CommandInput, + CommandItem, + CommandList, + Popover, + PopoverContent, + PopoverTrigger, +} from "@agh/ui"; + +const TRIGGER_BASE = + "flex h-9 w-full items-center justify-between gap-2 rounded-md border border-input bg-background px-3 py-2 text-sm shadow-none outline-none transition-colors hover:bg-accent disabled:cursor-not-allowed disabled:opacity-50 focus-visible:ring-2 focus-visible:ring-ring/50"; + +export interface ModelCommandSelectProps { + options: string[]; + defaultModel: string | null; + value: string; + onChange: (next: string) => void; + placeholder?: string; + disabled?: boolean; + triggerId?: string; + triggerTestId?: string; + className?: string; +} + +export function ModelCommandSelect({ + options, + defaultModel, + value, + onChange, + placeholder = "Use provider default", + disabled, + triggerId, + triggerTestId, + className, +}: ModelCommandSelectProps) { + const [open, setOpen] = useState(false); + const [query, setQuery] = useState(""); + const trimmedValue = value.trim(); + const trimmedDefault = defaultModel?.trim() ?? ""; + const knownOptions = useMemo(() => { + const seen = new Set(); + const result: string[] = []; + for (const option of options) { + const trimmed = option.trim(); + if (!trimmed || seen.has(trimmed)) continue; + seen.add(trimmed); + result.push(trimmed); + } + return result; + }, [options]); + const trimmedQuery = query.trim(); + const showCustomItem = + trimmedQuery !== "" && !knownOptions.some(option => option === trimmedQuery); + + const handleSelect = (next: string) => { + onChange(next); + setOpen(false); + setQuery(""); + }; + + const handleClear = () => { + onChange(""); + setOpen(false); + setQuery(""); + }; + + const handleInputKeyDown = (event: KeyboardEvent) => { + if (event.key === "Enter" && trimmedQuery !== "") { + event.preventDefault(); + handleSelect(trimmedQuery); + } + }; + + const triggerLabel = trimmedValue + ? trimmedValue + : trimmedDefault + ? `${placeholder} · ${trimmedDefault}` + : placeholder; + + const triggerEmphasis = trimmedValue ? "text-foreground" : "text-muted-foreground"; + + return ( + setOpen(next)}> + + + + + + + + + + {trimmedQuery === "" + ? "No models listed for this provider." + : "Press Enter to use this name."} + + + +
+ Use provider default + {trimmedDefault ? ( + + {trimmedDefault} + + ) : null} +
+
+
+ {knownOptions.length > 0 ? ( + + {knownOptions.map(option => ( + handleSelect(option)} + data-checked={trimmedValue === option ? "true" : "false"} + data-testid={`model-command-item-${option}`} + > + {option} + + ))} + + ) : null} + {showCustomItem ? ( + + handleSelect(trimmedQuery)} + data-testid="model-command-item-custom" + > + Use "{trimmedQuery}" + + + ) : null} +
+
+
+
+ ); +} diff --git a/web/src/systems/session/components/reasoning-command-select.tsx b/web/src/systems/session/components/reasoning-command-select.tsx new file mode 100644 index 000000000..0570c7880 --- /dev/null +++ b/web/src/systems/session/components/reasoning-command-select.tsx @@ -0,0 +1,129 @@ +import { useState } from "react"; +import { ChevronsUpDown, Gauge } from "lucide-react"; + +import { + cn, + Command, + CommandEmpty, + CommandGroup, + CommandInput, + CommandItem, + CommandList, + Popover, + PopoverContent, + PopoverTrigger, +} from "@agh/ui"; + +const TRIGGER_BASE = + "flex h-9 w-full items-center justify-between gap-2 rounded-md border border-input bg-background px-3 py-2 text-sm shadow-none outline-none transition-colors hover:bg-accent disabled:cursor-not-allowed disabled:opacity-50 focus-visible:ring-2 focus-visible:ring-ring/50"; + +export const REASONING_EFFORTS = ["minimal", "low", "medium", "high", "xhigh"] as const; +export type ReasoningEffort = (typeof REASONING_EFFORTS)[number]; + +const REASONING_LABELS: Record = { + minimal: "Minimal · fastest", + low: "Low", + medium: "Medium", + high: "High", + xhigh: "Extra high · deepest", +}; + +export interface ReasoningCommandSelectProps { + value: string; + onChange: (next: string) => void; + placeholder?: string; + disabled?: boolean; + disabledHint?: string; + triggerId?: string; + triggerTestId?: string; + className?: string; +} + +export function ReasoningCommandSelect({ + value, + onChange, + placeholder = "Use provider default", + disabled, + disabledHint, + triggerId, + triggerTestId, + className, +}: ReasoningCommandSelectProps) { + const [open, setOpen] = useState(false); + const trimmedValue = value.trim(); + + const handleSelect = (next: string) => { + onChange(next); + setOpen(false); + }; + + const triggerLabel = trimmedValue + ? (REASONING_LABELS[trimmedValue as ReasoningEffort] ?? trimmedValue) + : disabled && disabledHint + ? disabledHint + : placeholder; + + const triggerEmphasis = trimmedValue ? "text-foreground" : "text-muted-foreground"; + + return ( + setOpen(next)}> + + + + + + + + + + No matching effort levels. + + + handleSelect("")} + data-checked={trimmedValue === "" ? "true" : "false"} + data-testid="reasoning-command-item-default" + > + Use provider default + + {REASONING_EFFORTS.map(effort => ( + handleSelect(effort)} + data-checked={trimmedValue === effort ? "true" : "false"} + data-testid={`reasoning-command-item-${effort}`} + > +
+ + {REASONING_LABELS[effort]} + + + {effort} + +
+
+ ))} +
+
+
+
+
+ ); +} diff --git a/web/src/systems/session/components/session-create-dialog.tsx b/web/src/systems/session/components/session-create-dialog.tsx index e60c7ec70..9ff8e44d2 100644 --- a/web/src/systems/session/components/session-create-dialog.tsx +++ b/web/src/systems/session/components/session-create-dialog.tsx @@ -12,12 +12,17 @@ import { Field, FieldDescription, FieldLabel, - NativeSelect, - NativeSelectOption, } from "@agh/ui"; import { AgentCommandSelect, AgentIcon, type AgentPayload } from "@/systems/agent"; -import type { SessionProviderOption, WorkspacePayload } from "@/systems/workspace"; +import { + ProviderCommandSelect, + type SessionProviderOption, + type WorkspacePayload, +} from "@/systems/workspace"; + +import { ModelCommandSelect } from "./model-command-select"; +import { ReasoningCommandSelect } from "./reasoning-command-select"; export interface SessionCreateDialogProps { open: boolean; @@ -26,11 +31,18 @@ export interface SessionCreateDialogProps { workspace: WorkspacePayload | undefined; selectedAgentName: string; selectedProvider: string; + selectedProviderOption: SessionProviderOption | undefined; + selectedModel: string; + selectedReasoning: string; + modelOptions: string[]; + reasoningSupported: boolean; providerOptions: SessionProviderOption[]; providersLoading: boolean; providersError: string | null; onAgentChange: (agentName: string) => void; onProviderChange: (provider: string) => void; + onModelChange: (model: string) => void; + onReasoningChange: (effort: string) => void; onSubmit: () => void; isSubmitting: boolean; submitError: string | null; @@ -43,11 +55,18 @@ function SessionCreateDialog({ workspace, selectedAgentName, selectedProvider, + selectedProviderOption, + selectedModel, + selectedReasoning, + modelOptions, + reasoningSupported, providerOptions, providersLoading, providersError, onAgentChange, onProviderChange, + onModelChange, + onReasoningChange, onSubmit, isSubmitting, submitError, @@ -64,9 +83,7 @@ function SessionCreateDialog({ const hasSelectedProvider = providerOptions.some( option => option.name === trimmedSelectedProvider ); - const activeProvider = workspaceSelected - ? providerOptions.find(option => option.name === trimmedSelectedProvider) - : undefined; + const activeProvider = selectedProviderOption ?? undefined; const agentPlaceholder = !workspaceSelected ? "Select a workspace first" : hasAgents @@ -76,7 +93,9 @@ function SessionCreateDialog({ ? "Select a workspace first" : providersLoading ? "Loading providers…" - : "No providers available"; + : hasProviderOptions + ? "Select a provider" + : "No providers available"; const canSubmit = !isSubmitting && !providersLoading && @@ -102,7 +121,7 @@ function SessionCreateDialog({ return ( @@ -151,25 +170,17 @@ function SessionCreateDialog({ Override the runtime for this session only. The agent default is preselected when it matches a provider visible in this workspace. - onProviderChange(next ?? "")} disabled={ !workspaceSelected || providersLoading || !hasProviderOptions || isSubmitting } - id="session-create-provider" - onChange={event => onProviderChange(event.target.value)} - value={workspaceSelected ? selectedProvider : ""} - > - {workspaceSelected && hasProviderOptions ? null : ( - {providerPlaceholder} - )} - {providerOptions.map(option => ( - - {providerOptionLabel(option)} - - ))} - + triggerId="session-create-provider" + triggerTestId="session-create-provider-select" + placeholder={providerPlaceholder} + /> {activeProvider ? (
{activeProvider.runtime_provider} ) : null} - {activeProvider.default_model ? ( - {activeProvider.default_model} - ) : null}
) : null} {providersError ? ( @@ -203,6 +211,48 @@ function SessionCreateDialog({ ) : null} +
+ + Model + + Override the model for this session, or inherit the provider default. + + + + + + Reasoning effort + + Hint reasoning depth when the selected provider supports it. + + + +
+ {submitError ? (

- + + ) : null} @@ -234,6 +280,7 @@ function SessionCreateDialog({ Hint reasoning depth when the selected provider supports it. + {defaultReasoning ? ( +

+ Default reasoning: {defaultReasoning} +

+ ) : null} @@ -285,4 +340,86 @@ function SessionCreateDialog({ ); } +interface CatalogStatusLineProps { + loading: boolean; + refreshing: boolean; + stale: boolean; + error: string | null; + refreshError: string | null; + optionCount: number; +} + +function CatalogStatusLine({ + loading, + refreshing, + stale, + error, + refreshError, + optionCount, +}: CatalogStatusLineProps) { + if (refreshError) { + return ( +

+ {refreshError} +

+ ); + } + if (error) { + return ( +

+ {error}. Type a model name to continue. +

+ ); + } + if (refreshing) { + return ( +

+ Refreshing model catalog… +

+ ); + } + if (loading) { + return ( +

+ Loading provider models… +

+ ); + } + if (stale) { + return ( +

+ Some models are stale — refresh to confirm availability. +

+ ); + } + if (optionCount === 0) { + return ( +

+ No catalog models — type a model name to continue. +

+ ); + } + return null; +} + export { SessionCreateDialog }; diff --git a/web/src/systems/session/hooks/__tests__/use-session-create-dialog.test.tsx b/web/src/systems/session/hooks/__tests__/use-session-create-dialog.test.tsx index 97d815b68..2c339d12b 100644 --- a/web/src/systems/session/hooks/__tests__/use-session-create-dialog.test.tsx +++ b/web/src/systems/session/hooks/__tests__/use-session-create-dialog.test.tsx @@ -1,7 +1,13 @@ -import { act, renderHook } from "@testing-library/react"; +import { QueryClient, QueryClientProvider } from "@tanstack/react-query"; +import { act, renderHook, waitFor } from "@testing-library/react"; +import { createElement, type ReactNode } from "react"; import { beforeEach, describe, expect, it, vi } from "vitest"; import type { AgentPayload } from "@/systems/agent"; +import type { + ProviderModelsListResponse, + ProviderModelsRefreshResponse, +} from "@/systems/model-catalog"; import type { WorkspaceDetailPayload, WorkspacePayload } from "@/systems/workspace"; import type { SessionPayload } from "../../types"; @@ -13,12 +19,16 @@ const { mockToastError, mockUseCreateSessionPending, mockWorkspaceQuery, + mockListProviderModels, + mockRefreshProviderModels, } = vi.hoisted(() => ({ mockNavigate: vi.fn<(input: unknown) => Promise>(), mockMutateAsync: vi.fn<(input: unknown) => Promise>(), mockToastError: vi.fn(), mockUseCreateSessionPending: { current: false as boolean }, mockWorkspaceQuery: vi.fn(), + mockListProviderModels: vi.fn<(input: unknown) => Promise>(), + mockRefreshProviderModels: vi.fn<(input: unknown) => Promise>(), })); vi.mock("@tanstack/react-router", () => ({ @@ -41,6 +51,17 @@ vi.mock("@/systems/workspace", async () => { }; }); +vi.mock("@/systems/model-catalog/adapters/model-catalog-api", async () => { + const actual = await vi.importActual< + typeof import("@/systems/model-catalog/adapters/model-catalog-api") + >("@/systems/model-catalog/adapters/model-catalog-api"); + return { + ...actual, + listProviderModels: (...args: unknown[]) => mockListProviderModels(args[0]), + refreshProviderModels: (...args: unknown[]) => mockRefreshProviderModels(args[0]), + }; +}); + vi.mock("../use-session-actions", () => ({ useCreateSession: () => ({ mutateAsync: mockMutateAsync, @@ -79,6 +100,62 @@ let workspaceQueryResult: { error: Error | null; }; +const codexCatalog: ProviderModelsListResponse = { + models: [ + { + provider_id: "codex", + model_id: "gpt-5.4", + display_name: "GPT-5.4", + availability_state: "available_live", + available: true, + stale: false, + refreshed_at: "2026-05-07T10:00:00Z", + sources: [ + { + source_id: "config", + source_kind: "config", + priority: 120, + refreshed_at: "2026-05-07T10:00:00Z", + stale: false, + }, + ], + supports_reasoning: true, + reasoning_efforts: ["low", "medium", "high"], + default_reasoning_effort: "medium", + }, + { + provider_id: "codex", + model_id: "gpt-5.4-mini", + display_name: "GPT-5.4 Mini", + availability_state: "available_stale", + available: true, + stale: true, + refreshed_at: "2026-05-06T10:00:00Z", + sources: [ + { + source_id: "models_dev", + source_kind: "models_dev", + priority: 50, + refreshed_at: "2026-05-06T10:00:00Z", + stale: true, + }, + ], + supports_reasoning: false, + }, + ], +}; + +function createWrapper() { + const queryClient = new QueryClient({ + defaultOptions: { queries: { retry: false }, mutations: { retry: false } }, + }); + + const wrapper = ({ children }: { children: ReactNode }) => + createElement(QueryClientProvider, { client: queryClient }, children); + + return { queryClient, wrapper }; +} + describe("useSessionCreateDialog", () => { beforeEach(() => { mockNavigate.mockReset(); @@ -99,9 +176,25 @@ describe("useSessionCreateDialog", () => { }; mockWorkspaceQuery.mockImplementation(() => workspaceQueryResult); + mockListProviderModels.mockReset(); + mockListProviderModels.mockResolvedValue(codexCatalog); + mockRefreshProviderModels.mockReset(); + mockRefreshProviderModels.mockResolvedValue({ + sources: [ + { + source_id: "models_dev", + source_kind: "models_dev", + priority: 50, + provider_id: "codex", + refresh_state: "succeeded", + row_count: 2, + stale: false, + }, + ], + }); }); - it("derives the default provider once workspace providers arrive after opening", async () => { + it("Should derive the default provider once workspace providers arrive after opening", async () => { workspaceQueryResult = { data: { workspace: activeWorkspace, @@ -111,8 +204,10 @@ describe("useSessionCreateDialog", () => { error: null, }; - const { result, rerender } = renderHook(() => - useSessionCreateDialog({ agents, activeWorkspace }) + const { wrapper } = createWrapper(); + const { result, rerender } = renderHook( + () => useSessionCreateDialog({ agents, activeWorkspace }), + { wrapper } ); act(() => { @@ -150,8 +245,11 @@ describe("useSessionCreateDialog", () => { }); }); - it("clears an explicit provider override when the operator changes agents", () => { - const { result } = renderHook(() => useSessionCreateDialog({ agents, activeWorkspace })); + it("Should clear an explicit provider override when the operator changes agents", () => { + const { wrapper } = createWrapper(); + const { result } = renderHook(() => useSessionCreateDialog({ agents, activeWorkspace }), { + wrapper, + }); act(() => { result.current.openForAgent("claude-agent"); @@ -173,18 +271,87 @@ describe("useSessionCreateDialog", () => { expect(result.current.selectedProvider).toBe("codex"); }); - it("submits selected model and reasoning overrides only when populated", async () => { - const { result } = renderHook(() => useSessionCreateDialog({ agents, activeWorkspace })); + it("Should expose deduped catalog models for the selected provider", async () => { + mockListProviderModels.mockResolvedValueOnce({ + models: [ + codexCatalog.models[0], + codexCatalog.models[1], + codexCatalog.models[0], + ] as ProviderModelsListResponse["models"], + }); + const { wrapper } = createWrapper(); + const { result } = renderHook(() => useSessionCreateDialog({ agents, activeWorkspace }), { + wrapper, + }); act(() => { result.current.openForAgent("codex-agent"); }); + + await waitFor(() => { + expect(result.current.modelOptions).toHaveLength(2); + }); + expect(result.current.modelOptions.map(option => option.id)).toEqual([ + "gpt-5.4", + "gpt-5.4-mini", + ]); + }); + + it("Should keep manual model entry available when the catalog is empty", async () => { + mockListProviderModels.mockResolvedValueOnce({ models: [] }); + + const { wrapper } = createWrapper(); + const { result } = renderHook(() => useSessionCreateDialog({ agents, activeWorkspace }), { + wrapper, + }); + + act(() => { + result.current.openForAgent("codex-agent"); + }); + + await waitFor(() => { + expect(result.current.catalogLoading).toBe(false); + }); expect(result.current.modelOptions).toEqual([]); - expect(result.current.reasoningSupported).toBe(true); + + act(() => { + result.current.onModelChange("custom-experimental"); + }); + + await act(async () => { + await result.current.submit(); + }); + + expect(mockMutateAsync).toHaveBeenCalledWith({ + agent_name: "codex-agent", + workspace: "ws_alpha", + provider: "codex", + model: "custom-experimental", + }); + }); + + it("Should expose stale catalog rows without blocking session creation", async () => { + const { wrapper } = createWrapper(); + const { result } = renderHook(() => useSessionCreateDialog({ agents, activeWorkspace }), { + wrapper, + }); + + act(() => { + result.current.openForAgent("codex-agent"); + }); + + await waitFor(() => { + expect(result.current.modelOptions).toHaveLength(2); + }); + + expect(result.current.catalogStale).toBe(true); + const staleOption = result.current.modelOptions.find(option => option.id === "gpt-5.4-mini"); + expect(staleOption?.availabilityState).toBe("available_stale"); + const liveOption = result.current.modelOptions.find(option => option.id === "gpt-5.4"); + expect(liveOption?.availabilityState).toBe("available_live"); act(() => { result.current.onModelChange("gpt-5.4-mini"); - result.current.onReasoningChange("high"); }); await act(async () => { @@ -196,6 +363,114 @@ describe("useSessionCreateDialog", () => { workspace: "ws_alpha", provider: "codex", model: "gpt-5.4-mini", + }); + }); + + it("Should surface catalog source errors without blocking manual entry", async () => { + mockListProviderModels.mockReset(); + mockListProviderModels.mockRejectedValue(new Error("catalog upstream failed")); + + const { wrapper } = createWrapper(); + const { result } = renderHook(() => useSessionCreateDialog({ agents, activeWorkspace }), { + wrapper, + }); + + act(() => { + result.current.openForAgent("codex-agent"); + }); + + await waitFor( + () => { + expect(result.current.catalogError).toContain("catalog upstream failed"); + }, + { timeout: 5000 } + ); + expect(result.current.modelOptions).toEqual([]); + + act(() => { + result.current.onModelChange("manual-fallback"); + }); + + await act(async () => { + await result.current.submit(); + }); + + expect(mockMutateAsync).toHaveBeenCalledWith({ + agent_name: "codex-agent", + workspace: "ws_alpha", + provider: "codex", + model: "manual-fallback", + }); + }); + + it("Should invalidate catalog queries on refresh", async () => { + const { queryClient, wrapper } = createWrapper(); + const invalidateSpy = vi.spyOn(queryClient, "invalidateQueries"); + + const { result } = renderHook(() => useSessionCreateDialog({ agents, activeWorkspace }), { + wrapper, + }); + + act(() => { + result.current.openForAgent("codex-agent"); + }); + + await waitFor(() => { + expect(result.current.modelOptions).toHaveLength(2); + }); + + act(() => { + result.current.refreshCatalog(); + }); + + await waitFor(() => { + expect(mockRefreshProviderModels).toHaveBeenCalledWith({ + providerId: "codex", + force: true, + }); + }); + await waitFor(() => { + expect(invalidateSpy).toHaveBeenCalled(); + }); + }); + + it("Should submit selected model and reasoning overrides only when populated", async () => { + const { wrapper } = createWrapper(); + const { result } = renderHook(() => useSessionCreateDialog({ agents, activeWorkspace }), { + wrapper, + }); + + act(() => { + result.current.openForAgent("codex-agent"); + }); + + await waitFor(() => { + expect(result.current.modelOptions.length).toBeGreaterThan(0); + }); + + act(() => { + result.current.onModelChange("gpt-5.4-mini"); + }); + expect(result.current.reasoningSupported).toBe(false); + + act(() => { + result.current.onModelChange("gpt-5.4"); + }); + expect(result.current.reasoningSupported).toBe(true); + + act(() => { + result.current.onReasoningChange("high"); + }); + + await act(async () => { + await result.current.submit(); + }); + + expect(mockMutateAsync).toHaveBeenCalledWith({ + agent_name: "codex-agent", + workspace: "ws_alpha", + provider: "codex", + model: "gpt-5.4", reasoning_effort: "high", }); }); diff --git a/web/src/systems/session/hooks/use-session-create-dialog.ts b/web/src/systems/session/hooks/use-session-create-dialog.ts index f96047edb..d37ad19a4 100644 --- a/web/src/systems/session/hooks/use-session-create-dialog.ts +++ b/web/src/systems/session/hooks/use-session-create-dialog.ts @@ -3,6 +3,14 @@ import { useNavigate } from "@tanstack/react-router"; import { toast } from "sonner"; import type { AgentPayload } from "@/systems/agent"; +import { + deriveActiveSessionOptions, + useProviderModels, + useRefreshProviderModels, + type ModelOption, + type ProviderModelPayload, + type ReasoningOption, +} from "@/systems/model-catalog"; import type { SessionProviderOption, WorkspacePayload } from "@/systems/workspace"; import { useWorkspace } from "@/systems/workspace"; @@ -32,8 +40,15 @@ export interface SessionCreateDialogState { selectedProviderOption: SessionProviderOption | undefined; selectedModel: string; selectedReasoning: string; - modelOptions: string[]; + modelOptions: ModelOption[]; + reasoningOptions: ReasoningOption[]; reasoningSupported: boolean; + catalogStale: boolean; + catalogLoading: boolean; + catalogError: string | null; + catalogRefreshing: boolean; + catalogRefreshError: string | null; + defaultReasoning: string | null; isSubmitting: boolean; submitError: string | null; pendingAgentName: string | null; @@ -47,6 +62,7 @@ export interface SessionCreateDialogApi extends SessionCreateDialogState { onProviderChange: (provider: string) => void; onModelChange: (model: string) => void; onReasoningChange: (effort: string) => void; + refreshCatalog: () => void; submit: () => Promise; } @@ -85,11 +101,11 @@ function describeWorkspaceError(error: unknown): string { return "Unable to load provider options for this workspace."; } -function describeSubmitError(error: unknown): string { +function describeError(fallback: string, error: unknown): string { if (error instanceof Error && error.message.trim().length > 0) { return error.message; } - return "Failed to create session."; + return fallback; } export function useSessionCreateDialog({ @@ -142,14 +158,40 @@ export function useSessionCreateDialog({ [providerOptions, selectedProvider] ); - const modelOptions = useMemo(() => [], []); + const catalogQuery = useProviderModels({ + providerId: selectedProvider, + includeStale: true, + enabled: open && selectedProvider.length > 0, + }); + + const refreshMutation = useRefreshProviderModels(); + + const catalogModels = useMemo( + () => catalogQuery.data?.models ?? [], + [catalogQuery.data?.models] + ); + + const trimmedSelectedModel = useMemo(() => draft.modelOverride.trim(), [draft.modelOverride]); - const reasoningSupported = selectedProviderOption != null; + const derived = useMemo( + () => + deriveActiveSessionOptions({ + catalog: catalogModels, + selectedModel: trimmedSelectedModel.length > 0 ? trimmedSelectedModel : null, + }), + [catalogModels, trimmedSelectedModel] + ); + + const catalogStale = useMemo(() => catalogModels.some(model => model.stale), [catalogModels]); + const catalogLoading = catalogQuery.isLoading || catalogQuery.isFetching; + const catalogError = catalogQuery.error + ? describeError("Failed to load provider models.", catalogQuery.error) + : null; + const catalogRefreshError = refreshMutation.error + ? describeError("Failed to refresh provider models.", refreshMutation.error) + : null; - const selectedModel = useMemo(() => { - const trimmed = draft.modelOverride.trim(); - return trimmed.length === 0 ? "" : trimmed; - }, [draft.modelOverride]); + const reasoningSupported = derived.reasoningSupported; const selectedReasoning = useMemo(() => { if (!reasoningSupported) return ""; @@ -204,13 +246,20 @@ export function useSessionCreateDialog({ }, []); const onModelChange = useCallback((model: string) => { - setDraft(current => ({ ...current, modelOverride: model })); + setDraft(current => ({ ...current, modelOverride: model, reasoningEffort: "" })); }, []); const onReasoningChange = useCallback((effort: string) => { setDraft(current => ({ ...current, reasoningEffort: effort })); }, []); + const refreshCatalog = useCallback(() => { + if (selectedProvider.length === 0) { + return; + } + refreshMutation.mutate({ providerId: selectedProvider, force: true }); + }, [refreshMutation, selectedProvider]); + const submit = useCallback(async () => { if (!activeWorkspace) return; const agentName = draft.agentName.trim(); @@ -221,7 +270,7 @@ export function useSessionCreateDialog({ setPendingAgentName(agentName); setPendingWorkspaceId(activeWorkspace.id); - const trimmedModel = selectedModel.trim(); + const trimmedModel = trimmedSelectedModel; const trimmedReasoning = selectedReasoning.trim(); try { @@ -238,7 +287,7 @@ export function useSessionCreateDialog({ params: { name: session.agent_name, id: session.id }, }); } catch (error) { - const message = describeSubmitError(error); + const message = describeError("Failed to create session.", error); setSubmitError(message); toast.error(message); } finally { @@ -251,8 +300,8 @@ export function useSessionCreateDialog({ draft.agentName, navigate, selectedProvider, - selectedModel, selectedReasoning, + trimmedSelectedModel, ]); const providersError = workspaceDetailError ? describeWorkspaceError(workspaceDetailError) : null; @@ -267,10 +316,17 @@ export function useSessionCreateDialog({ selectedAgentName: draft.agentName, selectedProvider, selectedProviderOption, - selectedModel, + selectedModel: trimmedSelectedModel, selectedReasoning, - modelOptions, + modelOptions: derived.modelOptions, + reasoningOptions: derived.reasoningOptions, reasoningSupported, + catalogStale, + catalogLoading, + catalogError, + catalogRefreshing: refreshMutation.isPending, + catalogRefreshError, + defaultReasoning: derived.defaultReasoning, isSubmitting: createSession.isPending, submitError, pendingAgentName, @@ -281,6 +337,7 @@ export function useSessionCreateDialog({ onProviderChange, onModelChange, onReasoningChange, + refreshCatalog, submit, }; } diff --git a/web/src/systems/settings/components/index.ts b/web/src/systems/settings/components/index.ts index 8badaab4e..3ce24f20d 100644 --- a/web/src/systems/settings/components/index.ts +++ b/web/src/systems/settings/components/index.ts @@ -4,6 +4,7 @@ export { SettingsEditorDialog } from "./settings-editor-dialog"; export type { EditorMode } from "./settings-editor-dialog"; export { ProviderCard, providerStateTone } from "./provider-card"; export { ProviderLogo } from "./provider-logo"; +export { ProviderModelCatalogStatus } from "./provider-model-catalog-status"; export { ProvidersGrid } from "./providers-grid"; export { SettingsPageActions } from "./settings-page-actions"; export { SettingsDecimalInput } from "./settings-decimal-input"; diff --git a/web/src/systems/settings/components/provider-card.tsx b/web/src/systems/settings/components/provider-card.tsx index b745dbb07..91a65e626 100644 --- a/web/src/systems/settings/components/provider-card.tsx +++ b/web/src/systems/settings/components/provider-card.tsx @@ -14,6 +14,7 @@ import type { ReactNode } from "react"; import type { SettingsProviderEntry } from "@/systems/settings"; import { ProviderLogo } from "./provider-logo"; +import { ProviderModelCatalogStatus } from "./provider-model-catalog-status"; import { SettingsSourceBadge } from "./settings-source-badge"; interface ProviderCardProps { @@ -111,6 +112,12 @@ export function ProviderCard({ provider, onEdit, onDelete }: ProviderCardProps) shadowed={shadowed} /> +
+ + Catalog + + +
diff --git a/web/src/systems/settings/components/provider-model-catalog-status.tsx b/web/src/systems/settings/components/provider-model-catalog-status.tsx new file mode 100644 index 000000000..0fc91a220 --- /dev/null +++ b/web/src/systems/settings/components/provider-model-catalog-status.tsx @@ -0,0 +1,126 @@ +import { Loader2, RefreshCw } from "lucide-react"; + +import { Button, Pill, type PillTone } from "@agh/ui"; + +import { + useProviderModelStatus, + useRefreshProviderModels, + type ProviderModelSourceStatus, +} from "@/systems/model-catalog"; + +interface ProviderModelCatalogStatusProps { + providerId: string; + testId: string; +} + +const REFRESH_STATE_TONE: Record = { + idle: "neutral", + refreshing: "info", + succeeded: "success", + failed: "danger", +}; + +export function ProviderModelCatalogStatus({ + providerId, + testId, +}: ProviderModelCatalogStatusProps) { + const statusQuery = useProviderModelStatus({ providerId }); + const refreshMutation = useRefreshProviderModels(); + + if (statusQuery.isLoading) { + return ( +
+ + Loading catalog status… +
+ ); + } + + const sources = statusQuery.data?.sources ?? []; + const refreshError = errorMessage(refreshMutation.error); + const queryError = errorMessage(statusQuery.error); + + const handleRefresh = () => { + refreshMutation.mutate({ providerId, force: true }); + }; + + return ( +
+ {queryError ? ( +

+ {queryError} +

+ ) : null} + {sources.length === 0 && !queryError ? ( +

+ No catalog sources reporting yet. +

+ ) : ( +
    + {sources.map(source => ( +
  • + {source.source_id} + + {source.refresh_state} + + {source.stale ? ( + + stale + + ) : null} + + {formatRowCount(source)} + +
  • + ))} +
+ )} + {refreshError ? ( +

+ {refreshError} +

+ ) : null} + +
+ ); +} + +function formatRowCount(source: ProviderModelSourceStatus): string { + return `${source.row_count} rows`; +} + +function errorMessage(error: unknown): string | null { + if (error instanceof Error && error.message.trim().length > 0) { + return error.message; + } + return null; +} From c2e0d3d5c6ad46fe5544208f508ee83d30bef17b Mon Sep 17 00:00:00 2001 From: Pedro Nauck Date: Thu, 7 May 2026 09:28:28 -0300 Subject: [PATCH 10/13] docs: hard-cut runtime docs to nested provider models block Removes flat default_model/supported_models/supports_reasoning_effort claims from provider/config/agent docs and documents the daemon-owned model catalog (native HTTP/UDS endpoints, /api/openai/v1/models projection, refresh lifetime/coalescing rules, extension model.source contract) plus the new model_catalog and provider models.discovery config sections. Regenerates the provider models CLI reference and ride-along cli-reference reformat output from make codegen + make cli-docs. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../cli-reference/agent/heartbeat/index.mdx | 19 +- .../runtime/cli-reference/agent/index.mdx | 11 +- .../cli-reference/agent/soul/index.mdx | 15 +- .../content/runtime/cli-reference/agh.mdx | 64 ++-- .../cli-reference/automation/index.mdx | 10 +- .../cli-reference/automation/jobs/index.mdx | 14 +- .../cli-reference/automation/runs/index.mdx | 4 +- .../automation/triggers/index.mdx | 12 +- .../runtime/cli-reference/bridge/index.mdx | 25 +- .../bridge/secret-bindings/index.mdx | 10 +- .../runtime/cli-reference/bundle/index.mdx | 21 +- .../runtime/cli-reference/ch/index.mdx | 12 +- .../runtime/cli-reference/completion/bash.mdx | 4 +- .../runtime/cli-reference/completion/fish.mdx | 3 +- .../cli-reference/completion/index.mdx | 12 +- .../cli-reference/completion/powershell.mdx | 2 +- .../runtime/cli-reference/completion/zsh.mdx | 7 +- .../runtime/cli-reference/config/index.mdx | 21 +- .../runtime/cli-reference/daemon/index.mdx | 11 +- .../runtime/cli-reference/extension/index.mdx | 19 +- .../runtime/cli-reference/hooks/index.mdx | 13 +- .../runtime/cli-reference/mcp/auth/index.mdx | 9 +- .../runtime/cli-reference/mcp/index.mdx | 5 +- .../runtime/cli-reference/me/index.mdx | 4 +- .../cli-reference/memory/adhoc/index.mdx | 5 +- .../cli-reference/memory/daily/index.mdx | 15 +- .../cli-reference/memory/decisions/index.mdx | 9 +- .../cli-reference/memory/dream/index.mdx | 13 +- .../cli-reference/memory/extractor/index.mdx | 13 +- .../cli-reference/memory/extractor/replay.mdx | 1 - .../runtime/cli-reference/memory/index.mdx | 45 +-- .../cli-reference/memory/provider/index.mdx | 11 +- .../cli-reference/memory/recall/index.mdx | 5 +- .../runtime/cli-reference/memory/reset.mdx | 1 - .../cli-reference/network/directs/index.mdx | 13 +- .../runtime/cli-reference/network/index.mdx | 21 +- .../cli-reference/network/threads/index.mdx | 9 +- .../cli-reference/network/work/index.mdx | 5 +- .../runtime/cli-reference/observe/index.mdx | 7 +- .../cli-reference/provider/auth/index.mdx | 9 +- .../runtime/cli-reference/provider/index.mdx | 6 +- .../runtime/cli-reference/provider/meta.json | 2 +- .../cli-reference/provider/models/index.mdx | 39 +++ .../cli-reference/provider/models/list.mdx | 43 +++ .../cli-reference/provider/models/meta.json | 4 + .../cli-reference/provider/models/refresh.mdx | 43 +++ .../cli-reference/provider/models/status.mdx | 40 +++ .../runtime/cli-reference/resource/index.mdx | 12 +- .../runtime/cli-reference/session/index.mdx | 31 +- .../runtime/cli-reference/session/new.mdx | 19 +- .../cli-reference/session/soul/index.mdx | 5 +- .../runtime/cli-reference/skill/index.mdx | 25 +- .../cli-reference/task/child/index.mdx | 4 +- .../cli-reference/task/dependency/index.mdx | 6 +- .../runtime/cli-reference/task/index.mdx | 46 +-- .../cli-reference/task/notification/index.mdx | 12 +- .../cli-reference/task/profile/index.mdx | 10 +- .../cli-reference/task/review/index.mdx | 12 +- .../runtime/cli-reference/task/run/index.mdx | 18 +- .../runtime/cli-reference/tool/index.mdx | 13 +- .../runtime/cli-reference/toolsets/index.mdx | 7 +- .../runtime/cli-reference/vault/index.mdx | 13 +- .../runtime/cli-reference/workspace/index.mdx | 15 +- .../runtime/core/agents/definitions.mdx | 4 +- .../content/runtime/core/agents/meta.json | 10 +- .../runtime/core/agents/model-catalog.mdx | 295 ++++++++++++++++++ .../content/runtime/core/agents/providers.mdx | 81 ++++- .../runtime/core/configuration/agent-md.mdx | 8 +- .../core/configuration/config-toml.mdx | 225 +++++++++---- .../runtime/core/extensions/develop.mdx | 55 +++- .../provider-model-catalog-docs.test.ts | 134 ++++++++ 71 files changed, 1301 insertions(+), 445 deletions(-) create mode 100644 packages/site/content/runtime/cli-reference/provider/models/index.mdx create mode 100644 packages/site/content/runtime/cli-reference/provider/models/list.mdx create mode 100644 packages/site/content/runtime/cli-reference/provider/models/meta.json create mode 100644 packages/site/content/runtime/cli-reference/provider/models/refresh.mdx create mode 100644 packages/site/content/runtime/cli-reference/provider/models/status.mdx create mode 100644 packages/site/content/runtime/core/agents/model-catalog.mdx create mode 100644 packages/site/lib/__tests__/provider-model-catalog-docs.test.ts diff --git a/packages/site/content/runtime/cli-reference/agent/heartbeat/index.mdx b/packages/site/content/runtime/cli-reference/agent/heartbeat/index.mdx index 9e11a52b8..80ed61c57 100644 --- a/packages/site/content/runtime/cli-reference/agent/heartbeat/index.mdx +++ b/packages/site/content/runtime/cli-reference/agent/heartbeat/index.mdx @@ -29,15 +29,16 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ------------------------------------------------------------------------------- | -------------------------------------------------------------- | -| [agh agent heartbeat delete](/runtime/cli-reference/agent/heartbeat/delete) | Delete HEARTBEAT.md through managed authoring | -| [agh agent heartbeat history](/runtime/cli-reference/agent/heartbeat/history) | List managed Heartbeat authoring revisions | -| [agh agent heartbeat inspect](/runtime/cli-reference/agent/heartbeat/inspect) | Inspect one agent's resolved Heartbeat policy | +| Command | Description | +| ------- | ----------- | +| [agh agent heartbeat delete](/runtime/cli-reference/agent/heartbeat/delete) | Delete HEARTBEAT.md through managed authoring | +| [agh agent heartbeat history](/runtime/cli-reference/agent/heartbeat/history) | List managed Heartbeat authoring revisions | +| [agh agent heartbeat inspect](/runtime/cli-reference/agent/heartbeat/inspect) | Inspect one agent's resolved Heartbeat policy | | [agh agent heartbeat rollback](/runtime/cli-reference/agent/heartbeat/rollback) | Rollback HEARTBEAT.md to a managed revision or snapshot digest | -| [agh agent heartbeat status](/runtime/cli-reference/agent/heartbeat/status) | Read Heartbeat policy status and wake eligibility | -| [agh agent heartbeat validate](/runtime/cli-reference/agent/heartbeat/validate) | Validate a proposed Heartbeat policy body | -| [agh agent heartbeat wake](/runtime/cli-reference/agent/heartbeat/wake) | Request one manual advisory Heartbeat wake | -| [agh agent heartbeat write](/runtime/cli-reference/agent/heartbeat/write) | Create or replace HEARTBEAT.md through managed authoring | +| [agh agent heartbeat status](/runtime/cli-reference/agent/heartbeat/status) | Read Heartbeat policy status and wake eligibility | +| [agh agent heartbeat validate](/runtime/cli-reference/agent/heartbeat/validate) | Validate a proposed Heartbeat policy body | +| [agh agent heartbeat wake](/runtime/cli-reference/agent/heartbeat/wake) | Request one manual advisory Heartbeat wake | +| [agh agent heartbeat write](/runtime/cli-reference/agent/heartbeat/write) | Create or replace HEARTBEAT.md through managed authoring | diff --git a/packages/site/content/runtime/cli-reference/agent/index.mdx b/packages/site/content/runtime/cli-reference/agent/index.mdx index e4a911d2e..75bffed50 100644 --- a/packages/site/content/runtime/cli-reference/agent/index.mdx +++ b/packages/site/content/runtime/cli-reference/agent/index.mdx @@ -29,11 +29,12 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ------------------------------------------------------------- | ------------------------------------------- | +| Command | Description | +| ------- | ----------- | | [agh agent heartbeat](/runtime/cli-reference/agent/heartbeat) | Inspect and manage agent HEARTBEAT.md files | -| [agh agent info](/runtime/cli-reference/agent/info) | Show one agent definition | -| [agh agent list](/runtime/cli-reference/agent/list) | List installed agent definitions | -| [agh agent soul](/runtime/cli-reference/agent/soul) | Inspect and manage agent SOUL.md files | +| [agh agent info](/runtime/cli-reference/agent/info) | Show one agent definition | +| [agh agent list](/runtime/cli-reference/agent/list) | List installed agent definitions | +| [agh agent soul](/runtime/cli-reference/agent/soul) | Inspect and manage agent SOUL.md files | diff --git a/packages/site/content/runtime/cli-reference/agent/soul/index.mdx b/packages/site/content/runtime/cli-reference/agent/soul/index.mdx index 2a8bc8582..0d44fe4d5 100644 --- a/packages/site/content/runtime/cli-reference/agent/soul/index.mdx +++ b/packages/site/content/runtime/cli-reference/agent/soul/index.mdx @@ -29,13 +29,14 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| --------------------------------------------------------------------- | ---------------------------------------------------- | -| [agh agent soul delete](/runtime/cli-reference/agent/soul/delete) | Delete SOUL.md through managed authoring | -| [agh agent soul history](/runtime/cli-reference/agent/soul/history) | List managed Soul authoring revisions | -| [agh agent soul inspect](/runtime/cli-reference/agent/soul/inspect) | Inspect one agent's resolved Soul | -| [agh agent soul rollback](/runtime/cli-reference/agent/soul/rollback) | Rollback SOUL.md to a managed revision | +| Command | Description | +| ------- | ----------- | +| [agh agent soul delete](/runtime/cli-reference/agent/soul/delete) | Delete SOUL.md through managed authoring | +| [agh agent soul history](/runtime/cli-reference/agent/soul/history) | List managed Soul authoring revisions | +| [agh agent soul inspect](/runtime/cli-reference/agent/soul/inspect) | Inspect one agent's resolved Soul | +| [agh agent soul rollback](/runtime/cli-reference/agent/soul/rollback) | Rollback SOUL.md to a managed revision | | [agh agent soul validate](/runtime/cli-reference/agent/soul/validate) | Validate a proposed Soul body or the current SOUL.md | -| [agh agent soul write](/runtime/cli-reference/agent/soul/write) | Create or replace SOUL.md through managed authoring | +| [agh agent soul write](/runtime/cli-reference/agent/soul/write) | Create or replace SOUL.md through managed authoring | diff --git a/packages/site/content/runtime/cli-reference/agh.mdx b/packages/site/content/runtime/cli-reference/agh.mdx index 43c5bec12..e1ffdf91a 100644 --- a/packages/site/content/runtime/cli-reference/agh.mdx +++ b/packages/site/content/runtime/cli-reference/agh.mdx @@ -47,35 +47,35 @@ agh -o json ## Subcommands -| Command | Description | -| --------------------------------------------------- | --------------------------------------------------------------- | -| [agh agent](/runtime/cli-reference/agent) | Inspect AGH agent definitions | -| [agh automation](/runtime/cli-reference/automation) | Manage automation jobs, triggers, and runs | -| [agh bridge](/runtime/cli-reference/bridge) | Manage bridge instances | -| [agh bundle](/runtime/cli-reference/bundle) | Manage extension bundle presets | -| [agh ch](/runtime/cli-reference/ch) | Use agent-facing coordination channels | -| [agh completion](/runtime/cli-reference/completion) | Generate the autocompletion script for the specified shell | -| [agh config](/runtime/cli-reference/config) | Inspect and mutate AGH configuration | -| [agh daemon](/runtime/cli-reference/daemon) | Manage the AGH daemon | -| [agh extension](/runtime/cli-reference/extension) | Manage AGH extensions | -| [agh hooks](/runtime/cli-reference/hooks) | Inspect configured and executed hooks | -| [agh install](/runtime/cli-reference/install) | Bootstrap AGH and create the default general agent | -| [agh mcp](/runtime/cli-reference/mcp) | Manage MCP integrations | -| [agh me](/runtime/cli-reference/me) | Inspect the current AGH-managed agent session | -| [agh memory](/runtime/cli-reference/memory) | Show, write, search, and operate Memory v2 durable context | -| [agh network](/runtime/cli-reference/network) | Operate the daemon-owned network runtime | -| [agh observe](/runtime/cli-reference/observe) | Query global observability state | -| [agh provider](/runtime/cli-reference/provider) | Inspect and manage provider authentication | -| [agh resource](/runtime/cli-reference/resource) | Manage desired-state resources | -| [agh session](/runtime/cli-reference/session) | Manage AGH sessions | -| [agh skill](/runtime/cli-reference/skill) | Manage local AgentSkills | -| [agh spawn](/runtime/cli-reference/spawn) | Spawn a bounded child agent session | -| [agh task](/runtime/cli-reference/task) | Manage tasks and task runs | -| [agh tool](/runtime/cli-reference/tool) | Inspect and invoke registry tools | -| [agh toolsets](/runtime/cli-reference/toolsets) | Inspect registry toolsets | -| [agh uninstall](/runtime/cli-reference/uninstall) | Stop AGH and remove runtime launch artifacts | -| [agh update](/runtime/cli-reference/update) | Check for and apply the latest stable AGH release | -| [agh vault](/runtime/cli-reference/vault) | Manage encrypted daemon vault metadata and write-only secrets | -| [agh version](/runtime/cli-reference/version) | Print the AGH version | -| [agh whoami](/runtime/cli-reference/whoami) | Print the current AGH agent identity from environment variables | -| [agh workspace](/runtime/cli-reference/workspace) | Manage registered workspaces | +| Command | Description | +| ------- | ----------- | +| [agh agent](/runtime/cli-reference/agent) | Inspect AGH agent definitions | +| [agh automation](/runtime/cli-reference/automation) | Manage automation jobs, triggers, and runs | +| [agh bridge](/runtime/cli-reference/bridge) | Manage bridge instances | +| [agh bundle](/runtime/cli-reference/bundle) | Manage extension bundle presets | +| [agh ch](/runtime/cli-reference/ch) | Use agent-facing coordination channels | +| [agh completion](/runtime/cli-reference/completion) | Generate the autocompletion script for the specified shell | +| [agh config](/runtime/cli-reference/config) | Inspect and mutate AGH configuration | +| [agh daemon](/runtime/cli-reference/daemon) | Manage the AGH daemon | +| [agh extension](/runtime/cli-reference/extension) | Manage AGH extensions | +| [agh hooks](/runtime/cli-reference/hooks) | Inspect configured and executed hooks | +| [agh install](/runtime/cli-reference/install) | Bootstrap AGH and create the default general agent | +| [agh mcp](/runtime/cli-reference/mcp) | Manage MCP integrations | +| [agh me](/runtime/cli-reference/me) | Inspect the current AGH-managed agent session | +| [agh memory](/runtime/cli-reference/memory) | Show, write, search, and operate Memory v2 durable context | +| [agh network](/runtime/cli-reference/network) | Operate the daemon-owned network runtime | +| [agh observe](/runtime/cli-reference/observe) | Query global observability state | +| [agh provider](/runtime/cli-reference/provider) | Inspect and manage provider authentication | +| [agh resource](/runtime/cli-reference/resource) | Manage desired-state resources | +| [agh session](/runtime/cli-reference/session) | Manage AGH sessions | +| [agh skill](/runtime/cli-reference/skill) | Manage local AgentSkills | +| [agh spawn](/runtime/cli-reference/spawn) | Spawn a bounded child agent session | +| [agh task](/runtime/cli-reference/task) | Manage tasks and task runs | +| [agh tool](/runtime/cli-reference/tool) | Inspect and invoke registry tools | +| [agh toolsets](/runtime/cli-reference/toolsets) | Inspect registry toolsets | +| [agh uninstall](/runtime/cli-reference/uninstall) | Stop AGH and remove runtime launch artifacts | +| [agh update](/runtime/cli-reference/update) | Check for and apply the latest stable AGH release | +| [agh vault](/runtime/cli-reference/vault) | Manage encrypted daemon vault metadata and write-only secrets | +| [agh version](/runtime/cli-reference/version) | Print the AGH version | +| [agh whoami](/runtime/cli-reference/whoami) | Print the current AGH agent identity from environment variables | +| [agh workspace](/runtime/cli-reference/workspace) | Manage registered workspaces | diff --git a/packages/site/content/runtime/cli-reference/automation/index.mdx b/packages/site/content/runtime/cli-reference/automation/index.mdx index 251207b59..857974d0c 100644 --- a/packages/site/content/runtime/cli-reference/automation/index.mdx +++ b/packages/site/content/runtime/cli-reference/automation/index.mdx @@ -41,8 +41,8 @@ agh automation -o json ## Subcommands -| Command | Description | -| --------------------------------------------------------------------- | ------------------------------ | -| [agh automation jobs](/runtime/cli-reference/automation/jobs) | Manage automation jobs | -| [agh automation runs](/runtime/cli-reference/automation/runs) | Inspect automation run history | -| [agh automation triggers](/runtime/cli-reference/automation/triggers) | Manage automation triggers | +| Command | Description | +| ------- | ----------- | +| [agh automation jobs](/runtime/cli-reference/automation/jobs) | Manage automation jobs | +| [agh automation runs](/runtime/cli-reference/automation/runs) | Inspect automation run history | +| [agh automation triggers](/runtime/cli-reference/automation/triggers) | Manage automation triggers | diff --git a/packages/site/content/runtime/cli-reference/automation/jobs/index.mdx b/packages/site/content/runtime/cli-reference/automation/jobs/index.mdx index 5795e889a..e1474db78 100644 --- a/packages/site/content/runtime/cli-reference/automation/jobs/index.mdx +++ b/packages/site/content/runtime/cli-reference/automation/jobs/index.mdx @@ -45,11 +45,11 @@ agh automation jobs -o json ## Subcommands -| Command | Description | -| ----------------------------------------------------------------------------- | --------------------------------------- | -| [agh automation jobs create](/runtime/cli-reference/automation/jobs/create) | Create an automation job | -| [agh automation jobs delete](/runtime/cli-reference/automation/jobs/delete) | Delete an automation job | -| [agh automation jobs get](/runtime/cli-reference/automation/jobs/get) | Show one automation job | +| Command | Description | +| ------- | ----------- | +| [agh automation jobs create](/runtime/cli-reference/automation/jobs/create) | Create an automation job | +| [agh automation jobs delete](/runtime/cli-reference/automation/jobs/delete) | Delete an automation job | +| [agh automation jobs get](/runtime/cli-reference/automation/jobs/get) | Show one automation job | | [agh automation jobs history](/runtime/cli-reference/automation/jobs/history) | Show run history for one automation job | -| [agh automation jobs trigger](/runtime/cli-reference/automation/jobs/trigger) | Force an immediate automation job run | -| [agh automation jobs update](/runtime/cli-reference/automation/jobs/update) | Update an automation job | +| [agh automation jobs trigger](/runtime/cli-reference/automation/jobs/trigger) | Force an immediate automation job run | +| [agh automation jobs update](/runtime/cli-reference/automation/jobs/update) | Update an automation job | diff --git a/packages/site/content/runtime/cli-reference/automation/runs/index.mdx b/packages/site/content/runtime/cli-reference/automation/runs/index.mdx index d7708eb72..031f6db87 100644 --- a/packages/site/content/runtime/cli-reference/automation/runs/index.mdx +++ b/packages/site/content/runtime/cli-reference/automation/runs/index.mdx @@ -47,6 +47,6 @@ agh automation runs -o json ## Subcommands -| Command | Description | -| --------------------------------------------------------------------- | ----------------------- | +| Command | Description | +| ------- | ----------- | | [agh automation runs get](/runtime/cli-reference/automation/runs/get) | Show one automation run | diff --git a/packages/site/content/runtime/cli-reference/automation/triggers/index.mdx b/packages/site/content/runtime/cli-reference/automation/triggers/index.mdx index a35ae3c35..ed17c0e3a 100644 --- a/packages/site/content/runtime/cli-reference/automation/triggers/index.mdx +++ b/packages/site/content/runtime/cli-reference/automation/triggers/index.mdx @@ -46,10 +46,10 @@ agh automation triggers -o json ## Subcommands -| Command | Description | -| ------------------------------------------------------------------------------------- | ------------------------------------------- | -| [agh automation triggers create](/runtime/cli-reference/automation/triggers/create) | Create an automation trigger | -| [agh automation triggers delete](/runtime/cli-reference/automation/triggers/delete) | Delete an automation trigger | -| [agh automation triggers get](/runtime/cli-reference/automation/triggers/get) | Show one automation trigger | +| Command | Description | +| ------- | ----------- | +| [agh automation triggers create](/runtime/cli-reference/automation/triggers/create) | Create an automation trigger | +| [agh automation triggers delete](/runtime/cli-reference/automation/triggers/delete) | Delete an automation trigger | +| [agh automation triggers get](/runtime/cli-reference/automation/triggers/get) | Show one automation trigger | | [agh automation triggers history](/runtime/cli-reference/automation/triggers/history) | Show run history for one automation trigger | -| [agh automation triggers update](/runtime/cli-reference/automation/triggers/update) | Update an automation trigger | +| [agh automation triggers update](/runtime/cli-reference/automation/triggers/update) | Update an automation trigger | diff --git a/packages/site/content/runtime/cli-reference/bridge/index.mdx b/packages/site/content/runtime/cli-reference/bridge/index.mdx index aea68d9d3..201b4ba96 100644 --- a/packages/site/content/runtime/cli-reference/bridge/index.mdx +++ b/packages/site/content/runtime/cli-reference/bridge/index.mdx @@ -29,17 +29,18 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| --------------------------------------------------------------------------- | ---------------------------------------------------------------- | -| [agh bridge create](/runtime/cli-reference/bridge/create) | Create a bridge instance | -| [agh bridge disable](/runtime/cli-reference/bridge/disable) | Disable a bridge instance | -| [agh bridge enable](/runtime/cli-reference/bridge/enable) | Enable a bridge instance | -| [agh bridge get](/runtime/cli-reference/bridge/get) | Show one bridge instance | -| [agh bridge list](/runtime/cli-reference/bridge/list) | List bridge instances | -| [agh bridge restart](/runtime/cli-reference/bridge/restart) | Restart a bridge instance | -| [agh bridge routes](/runtime/cli-reference/bridge/routes) | Inspect routes for one bridge instance | -| [agh bridge secret-bindings](/runtime/cli-reference/bridge/secret-bindings) | Manage bridge secret bindings | -| [agh bridge test-delivery](/runtime/cli-reference/bridge/test-delivery) | Resolve a typed outbound delivery target for one bridge instance | -| [agh bridge update](/runtime/cli-reference/bridge/update) | Update mutable bridge fields | +| Command | Description | +| ------- | ----------- | +| [agh bridge create](/runtime/cli-reference/bridge/create) | Create a bridge instance | +| [agh bridge disable](/runtime/cli-reference/bridge/disable) | Disable a bridge instance | +| [agh bridge enable](/runtime/cli-reference/bridge/enable) | Enable a bridge instance | +| [agh bridge get](/runtime/cli-reference/bridge/get) | Show one bridge instance | +| [agh bridge list](/runtime/cli-reference/bridge/list) | List bridge instances | +| [agh bridge restart](/runtime/cli-reference/bridge/restart) | Restart a bridge instance | +| [agh bridge routes](/runtime/cli-reference/bridge/routes) | Inspect routes for one bridge instance | +| [agh bridge secret-bindings](/runtime/cli-reference/bridge/secret-bindings) | Manage bridge secret bindings | +| [agh bridge test-delivery](/runtime/cli-reference/bridge/test-delivery) | Resolve a typed outbound delivery target for one bridge instance | +| [agh bridge update](/runtime/cli-reference/bridge/update) | Update mutable bridge fields | diff --git a/packages/site/content/runtime/cli-reference/bridge/secret-bindings/index.mdx b/packages/site/content/runtime/cli-reference/bridge/secret-bindings/index.mdx index 11ac3b267..74be516cb 100644 --- a/packages/site/content/runtime/cli-reference/bridge/secret-bindings/index.mdx +++ b/packages/site/content/runtime/cli-reference/bridge/secret-bindings/index.mdx @@ -41,8 +41,8 @@ agh bridge secret-bindings -o json ## Subcommands -| Command | Description | -| ----------------------------------------------------------------------------------------- | -------------------------------------------- | -| [agh bridge secret-bindings delete](/runtime/cli-reference/bridge/secret-bindings/delete) | Delete one bridge secret binding | -| [agh bridge secret-bindings list](/runtime/cli-reference/bridge/secret-bindings/list) | List secret bindings for one bridge instance | -| [agh bridge secret-bindings put](/runtime/cli-reference/bridge/secret-bindings/put) | Create or update one bridge secret binding | +| Command | Description | +| ------- | ----------- | +| [agh bridge secret-bindings delete](/runtime/cli-reference/bridge/secret-bindings/delete) | Delete one bridge secret binding | +| [agh bridge secret-bindings list](/runtime/cli-reference/bridge/secret-bindings/list) | List secret bindings for one bridge instance | +| [agh bridge secret-bindings put](/runtime/cli-reference/bridge/secret-bindings/put) | Create or update one bridge secret binding | diff --git a/packages/site/content/runtime/cli-reference/bundle/index.mdx b/packages/site/content/runtime/cli-reference/bundle/index.mdx index 125a6a9cf..fa1a7c3c6 100644 --- a/packages/site/content/runtime/cli-reference/bundle/index.mdx +++ b/packages/site/content/runtime/cli-reference/bundle/index.mdx @@ -29,15 +29,16 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ----------------------------------------------------------------------------- | ----------------------------------------------------- | -| [agh bundle activate](/runtime/cli-reference/bundle/activate) | Activate a bundle preset | -| [agh bundle catalog](/runtime/cli-reference/bundle/catalog) | List available extension bundle presets | -| [agh bundle deactivate](/runtime/cli-reference/bundle/deactivate) | Deactivate a bundle preset and remove owned resources | -| [agh bundle get](/runtime/cli-reference/bundle/get) | Show one bundle activation | -| [agh bundle list](/runtime/cli-reference/bundle/list) | List active bundle presets | -| [agh bundle network-settings](/runtime/cli-reference/bundle/network-settings) | Show bundle-derived network settings | -| [agh bundle preview](/runtime/cli-reference/bundle/preview) | Preview a bundle activation without writing resources | -| [agh bundle update](/runtime/cli-reference/bundle/update) | Update bundle activation overlays | +| Command | Description | +| ------- | ----------- | +| [agh bundle activate](/runtime/cli-reference/bundle/activate) | Activate a bundle preset | +| [agh bundle catalog](/runtime/cli-reference/bundle/catalog) | List available extension bundle presets | +| [agh bundle deactivate](/runtime/cli-reference/bundle/deactivate) | Deactivate a bundle preset and remove owned resources | +| [agh bundle get](/runtime/cli-reference/bundle/get) | Show one bundle activation | +| [agh bundle list](/runtime/cli-reference/bundle/list) | List active bundle presets | +| [agh bundle network-settings](/runtime/cli-reference/bundle/network-settings) | Show bundle-derived network settings | +| [agh bundle preview](/runtime/cli-reference/bundle/preview) | Preview a bundle activation without writing resources | +| [agh bundle update](/runtime/cli-reference/bundle/update) | Update bundle activation overlays | diff --git a/packages/site/content/runtime/cli-reference/ch/index.mdx b/packages/site/content/runtime/cli-reference/ch/index.mdx index bb0b63084..6104c3204 100644 --- a/packages/site/content/runtime/cli-reference/ch/index.mdx +++ b/packages/site/content/runtime/cli-reference/ch/index.mdx @@ -47,9 +47,9 @@ agh ch list -o json ## Subcommands -| Command | Description | -| ----------------------------------------------- | --------------------------------------------------------------- | -| [agh ch list](/runtime/cli-reference/ch/list) | List coordination channels visible to the current agent session | -| [agh ch recv](/runtime/cli-reference/ch/recv) | Receive queued coordination messages for a channel | -| [agh ch reply](/runtime/cli-reference/ch/reply) | Reply to a received coordination message | -| [agh ch send](/runtime/cli-reference/ch/send) | Send one task-run coordination message | +| Command | Description | +| ------- | ----------- | +| [agh ch list](/runtime/cli-reference/ch/list) | List coordination channels visible to the current agent session | +| [agh ch recv](/runtime/cli-reference/ch/recv) | Receive queued coordination messages for a channel | +| [agh ch reply](/runtime/cli-reference/ch/reply) | Reply to a received coordination message | +| [agh ch send](/runtime/cli-reference/ch/send) | Send one task-run coordination message | diff --git a/packages/site/content/runtime/cli-reference/completion/bash.mdx b/packages/site/content/runtime/cli-reference/completion/bash.mdx index 459e57e1f..bc1c5b248 100644 --- a/packages/site/content/runtime/cli-reference/completion/bash.mdx +++ b/packages/site/content/runtime/cli-reference/completion/bash.mdx @@ -20,7 +20,6 @@ To load completions in your current shell session: source <(agh completion bash) ``` - To load completions for every new session, execute once: #### Linux: @@ -29,16 +28,15 @@ To load completions for every new session, execute once: agh completion bash > /etc/bash_completion.d/agh ``` - #### macOS: ``` agh completion bash > $(brew --prefix)/etc/bash_completion.d/agh ``` - You will need to start a new shell for this setup to take effect. + ``` agh completion bash ``` diff --git a/packages/site/content/runtime/cli-reference/completion/fish.mdx b/packages/site/content/runtime/cli-reference/completion/fish.mdx index 7cd2fed41..0e238c948 100644 --- a/packages/site/content/runtime/cli-reference/completion/fish.mdx +++ b/packages/site/content/runtime/cli-reference/completion/fish.mdx @@ -17,16 +17,15 @@ To load completions in your current shell session: agh completion fish | source ``` - To load completions for every new session, execute once: ``` agh completion fish > ~/.config/fish/completions/agh.fish ``` - You will need to start a new shell for this setup to take effect. + ``` agh completion fish [flags] ``` diff --git a/packages/site/content/runtime/cli-reference/completion/index.mdx b/packages/site/content/runtime/cli-reference/completion/index.mdx index f7e3562a1..610062738 100644 --- a/packages/site/content/runtime/cli-reference/completion/index.mdx +++ b/packages/site/content/runtime/cli-reference/completion/index.mdx @@ -12,6 +12,7 @@ Generate the autocompletion script for the specified shell Generate the autocompletion script for agh for the specified shell. See each sub-command's help for details on how to use the generated script. + ### Options ``` @@ -34,11 +35,12 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ------------------------------------------------------------------------- | ------------------------------------------------- | -| [agh completion bash](/runtime/cli-reference/completion/bash) | Generate the autocompletion script for bash | -| [agh completion fish](/runtime/cli-reference/completion/fish) | Generate the autocompletion script for fish | +| Command | Description | +| ------- | ----------- | +| [agh completion bash](/runtime/cli-reference/completion/bash) | Generate the autocompletion script for bash | +| [agh completion fish](/runtime/cli-reference/completion/fish) | Generate the autocompletion script for fish | | [agh completion powershell](/runtime/cli-reference/completion/powershell) | Generate the autocompletion script for powershell | -| [agh completion zsh](/runtime/cli-reference/completion/zsh) | Generate the autocompletion script for zsh | +| [agh completion zsh](/runtime/cli-reference/completion/zsh) | Generate the autocompletion script for zsh | diff --git a/packages/site/content/runtime/cli-reference/completion/powershell.mdx b/packages/site/content/runtime/cli-reference/completion/powershell.mdx index 8c3bf965d..7a8991c28 100644 --- a/packages/site/content/runtime/cli-reference/completion/powershell.mdx +++ b/packages/site/content/runtime/cli-reference/completion/powershell.mdx @@ -17,10 +17,10 @@ To load completions in your current shell session: agh completion powershell | Out-String | Invoke-Expression ``` - To load completions for every new session, add the output of the above command to your powershell profile. + ``` agh completion powershell [flags] ``` diff --git a/packages/site/content/runtime/cli-reference/completion/zsh.mdx b/packages/site/content/runtime/cli-reference/completion/zsh.mdx index 1ce4c4700..783e3da2b 100644 --- a/packages/site/content/runtime/cli-reference/completion/zsh.mdx +++ b/packages/site/content/runtime/cli-reference/completion/zsh.mdx @@ -12,20 +12,18 @@ Generate the autocompletion script for zsh Generate the autocompletion script for the zsh shell. If shell completion is not already enabled in your environment you will need -to enable it. You can execute the following once: +to enable it. You can execute the following once: ``` echo "autoload -U compinit; compinit" >> ~/.zshrc ``` - To load completions in your current shell session: ``` source <(agh completion zsh) ``` - To load completions for every new session, execute once: #### Linux: @@ -34,16 +32,15 @@ To load completions for every new session, execute once: agh completion zsh > "${fpath[1]}/_agh" ``` - #### macOS: ``` agh completion zsh > $(brew --prefix)/share/zsh/site-functions/_agh ``` - You will need to start a new shell for this setup to take effect. + ``` agh completion zsh [flags] ``` diff --git a/packages/site/content/runtime/cli-reference/config/index.mdx b/packages/site/content/runtime/cli-reference/config/index.mdx index d7992a043..388b83c35 100644 --- a/packages/site/content/runtime/cli-reference/config/index.mdx +++ b/packages/site/content/runtime/cli-reference/config/index.mdx @@ -29,15 +29,16 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ------------------------------------------------------------- | -------------------------------------------------------- | -| [agh config check](/runtime/cli-reference/config/check) | Alias for config validate | -| [agh config edit](/runtime/cli-reference/config/edit) | Open the selected config overlay in $VISUAL or $EDITOR | -| [agh config get](/runtime/cli-reference/config/get) | Get one redacted effective config value | -| [agh config list](/runtime/cli-reference/config/list) | List redacted effective config values | -| [agh config path](/runtime/cli-reference/config/path) | Show resolved AGH config paths | -| [agh config set](/runtime/cli-reference/config/set) | Set one config value through the validated config writer | -| [agh config show](/runtime/cli-reference/config/show) | Show the redacted effective config | -| [agh config validate](/runtime/cli-reference/config/validate) | Validate AGH configuration | +| Command | Description | +| ------- | ----------- | +| [agh config check](/runtime/cli-reference/config/check) | Alias for config validate | +| [agh config edit](/runtime/cli-reference/config/edit) | Open the selected config overlay in $VISUAL or $EDITOR | +| [agh config get](/runtime/cli-reference/config/get) | Get one redacted effective config value | +| [agh config list](/runtime/cli-reference/config/list) | List redacted effective config values | +| [agh config path](/runtime/cli-reference/config/path) | Show resolved AGH config paths | +| [agh config set](/runtime/cli-reference/config/set) | Set one config value through the validated config writer | +| [agh config show](/runtime/cli-reference/config/show) | Show the redacted effective config | +| [agh config validate](/runtime/cli-reference/config/validate) | Validate AGH configuration | diff --git a/packages/site/content/runtime/cli-reference/daemon/index.mdx b/packages/site/content/runtime/cli-reference/daemon/index.mdx index fd9e886fe..e58057d73 100644 --- a/packages/site/content/runtime/cli-reference/daemon/index.mdx +++ b/packages/site/content/runtime/cli-reference/daemon/index.mdx @@ -29,10 +29,11 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| --------------------------------------------------------- | -------------------- | -| [agh daemon start](/runtime/cli-reference/daemon/start) | Start the AGH daemon | -| [agh daemon status](/runtime/cli-reference/daemon/status) | Show daemon status | -| [agh daemon stop](/runtime/cli-reference/daemon/stop) | Stop the AGH daemon | +| Command | Description | +| ------- | ----------- | +| [agh daemon start](/runtime/cli-reference/daemon/start) | Start the AGH daemon | +| [agh daemon status](/runtime/cli-reference/daemon/status) | Show daemon status | +| [agh daemon stop](/runtime/cli-reference/daemon/stop) | Stop the AGH daemon | diff --git a/packages/site/content/runtime/cli-reference/extension/index.mdx b/packages/site/content/runtime/cli-reference/extension/index.mdx index bc3dd7a59..c1495841d 100644 --- a/packages/site/content/runtime/cli-reference/extension/index.mdx +++ b/packages/site/content/runtime/cli-reference/extension/index.mdx @@ -29,15 +29,16 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ----------------------------------------------------------------- | --------------------------------------------------------- | -| [agh extension disable](/runtime/cli-reference/extension/disable) | Disable an installed extension | -| [agh extension enable](/runtime/cli-reference/extension/enable) | Enable an installed extension | +| Command | Description | +| ------- | ----------- | +| [agh extension disable](/runtime/cli-reference/extension/disable) | Disable an installed extension | +| [agh extension enable](/runtime/cli-reference/extension/enable) | Enable an installed extension | | [agh extension install](/runtime/cli-reference/extension/install) | Install a local extension or download one from a registry | -| [agh extension list](/runtime/cli-reference/extension/list) | List installed extensions | -| [agh extension remove](/runtime/cli-reference/extension/remove) | Remove an installed extension from disk and the registry | -| [agh extension search](/runtime/cli-reference/extension/search) | Search remote extension registries | -| [agh extension status](/runtime/cli-reference/extension/status) | Show extension runtime status | -| [agh extension update](/runtime/cli-reference/extension/update) | Check for or install updates for marketplace extensions | +| [agh extension list](/runtime/cli-reference/extension/list) | List installed extensions | +| [agh extension remove](/runtime/cli-reference/extension/remove) | Remove an installed extension from disk and the registry | +| [agh extension search](/runtime/cli-reference/extension/search) | Search remote extension registries | +| [agh extension status](/runtime/cli-reference/extension/status) | Show extension runtime status | +| [agh extension update](/runtime/cli-reference/extension/update) | Check for or install updates for marketplace extensions | diff --git a/packages/site/content/runtime/cli-reference/hooks/index.mdx b/packages/site/content/runtime/cli-reference/hooks/index.mdx index f9cd7728c..c7e2b67e8 100644 --- a/packages/site/content/runtime/cli-reference/hooks/index.mdx +++ b/packages/site/content/runtime/cli-reference/hooks/index.mdx @@ -29,11 +29,12 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ------------------------------------------------------- | ------------------------------------------------------- | -| [agh hooks events](/runtime/cli-reference/hooks/events) | List supported hook events | -| [agh hooks info](/runtime/cli-reference/hooks/info) | Show detailed information for one or more hooks by name | -| [agh hooks list](/runtime/cli-reference/hooks/list) | List resolved hooks in pipeline order | -| [agh hooks runs](/runtime/cli-reference/hooks/runs) | Show persisted hook execution history | +| Command | Description | +| ------- | ----------- | +| [agh hooks events](/runtime/cli-reference/hooks/events) | List supported hook events | +| [agh hooks info](/runtime/cli-reference/hooks/info) | Show detailed information for one or more hooks by name | +| [agh hooks list](/runtime/cli-reference/hooks/list) | List resolved hooks in pipeline order | +| [agh hooks runs](/runtime/cli-reference/hooks/runs) | Show persisted hook execution history | diff --git a/packages/site/content/runtime/cli-reference/mcp/auth/index.mdx b/packages/site/content/runtime/cli-reference/mcp/auth/index.mdx index b69e3e3d9..c90338fa0 100644 --- a/packages/site/content/runtime/cli-reference/mcp/auth/index.mdx +++ b/packages/site/content/runtime/cli-reference/mcp/auth/index.mdx @@ -29,10 +29,11 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ------------------------------------------------------------- | --------------------------------------- | -| [agh mcp auth login](/runtime/cli-reference/mcp/auth/login) | Run OAuth login for a remote MCP server | +| Command | Description | +| ------- | ----------- | +| [agh mcp auth login](/runtime/cli-reference/mcp/auth/login) | Run OAuth login for a remote MCP server | | [agh mcp auth logout](/runtime/cli-reference/mcp/auth/logout) | Revoke or delete remote MCP auth tokens | -| [agh mcp auth status](/runtime/cli-reference/mcp/auth/status) | Show redacted remote MCP auth status | +| [agh mcp auth status](/runtime/cli-reference/mcp/auth/status) | Show redacted remote MCP auth status | diff --git a/packages/site/content/runtime/cli-reference/mcp/index.mdx b/packages/site/content/runtime/cli-reference/mcp/index.mdx index a9f70ee85..7163a2a18 100644 --- a/packages/site/content/runtime/cli-reference/mcp/index.mdx +++ b/packages/site/content/runtime/cli-reference/mcp/index.mdx @@ -29,8 +29,9 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ----------------------------------------------- | ------------------------------- | +| Command | Description | +| ------- | ----------- | | [agh mcp auth](/runtime/cli-reference/mcp/auth) | Authenticate remote MCP servers | diff --git a/packages/site/content/runtime/cli-reference/me/index.mdx b/packages/site/content/runtime/cli-reference/me/index.mdx index 01502917a..834e4db45 100644 --- a/packages/site/content/runtime/cli-reference/me/index.mdx +++ b/packages/site/content/runtime/cli-reference/me/index.mdx @@ -51,6 +51,6 @@ agh me -o json ## Subcommands -| Command | Description | -| --------------------------------------------------- | ------------------------------------------------------------------- | +| Command | Description | +| ------- | ----------- | | [agh me context](/runtime/cli-reference/me/context) | Inspect the bounded situation context for the current agent session | diff --git a/packages/site/content/runtime/cli-reference/memory/adhoc/index.mdx b/packages/site/content/runtime/cli-reference/memory/adhoc/index.mdx index 17f82854d..0b6d4f77b 100644 --- a/packages/site/content/runtime/cli-reference/memory/adhoc/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/adhoc/index.mdx @@ -29,9 +29,10 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ----------------------------------------------------------------- | -------------------------- | +| Command | Description | +| ------- | ----------- | | [agh memory adhoc list](/runtime/cli-reference/memory/adhoc/list) | Reserved Memory v2 command | | [agh memory adhoc show](/runtime/cli-reference/memory/adhoc/show) | Reserved Memory v2 command | diff --git a/packages/site/content/runtime/cli-reference/memory/daily/index.mdx b/packages/site/content/runtime/cli-reference/memory/daily/index.mdx index 4551ea22b..7702e7b35 100644 --- a/packages/site/content/runtime/cli-reference/memory/daily/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/daily/index.mdx @@ -29,12 +29,13 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ----------------------------------------------------------------------- | ----------------------------------- | -| [agh memory daily archive](/runtime/cli-reference/memory/daily/archive) | Reserved Memory v2 command | -| [agh memory daily ls](/runtime/cli-reference/memory/daily/ls) | List Memory v2 daily operation logs | -| [agh memory daily purge](/runtime/cli-reference/memory/daily/purge) | Reserved Memory v2 command | -| [agh memory daily restore](/runtime/cli-reference/memory/daily/restore) | Reserved Memory v2 command | -| [agh memory daily show](/runtime/cli-reference/memory/daily/show) | Reserved Memory v2 command | +| Command | Description | +| ------- | ----------- | +| [agh memory daily archive](/runtime/cli-reference/memory/daily/archive) | Reserved Memory v2 command | +| [agh memory daily ls](/runtime/cli-reference/memory/daily/ls) | List Memory v2 daily operation logs | +| [agh memory daily purge](/runtime/cli-reference/memory/daily/purge) | Reserved Memory v2 command | +| [agh memory daily restore](/runtime/cli-reference/memory/daily/restore) | Reserved Memory v2 command | +| [agh memory daily show](/runtime/cli-reference/memory/daily/show) | Reserved Memory v2 command | diff --git a/packages/site/content/runtime/cli-reference/memory/decisions/index.mdx b/packages/site/content/runtime/cli-reference/memory/decisions/index.mdx index 842278ef4..6de535bbd 100644 --- a/packages/site/content/runtime/cli-reference/memory/decisions/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/decisions/index.mdx @@ -29,10 +29,11 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ----------------------------------------------------------------------------- | ---------------------------------------- | -| [agh memory decisions list](/runtime/cli-reference/memory/decisions/list) | List Memory v2 controller decisions | +| Command | Description | +| ------- | ----------- | +| [agh memory decisions list](/runtime/cli-reference/memory/decisions/list) | List Memory v2 controller decisions | | [agh memory decisions revert](/runtime/cli-reference/memory/decisions/revert) | Revert one Memory v2 controller decision | -| [agh memory decisions show](/runtime/cli-reference/memory/decisions/show) | Show one Memory v2 controller decision | +| [agh memory decisions show](/runtime/cli-reference/memory/decisions/show) | Show one Memory v2 controller decision | diff --git a/packages/site/content/runtime/cli-reference/memory/dream/index.mdx b/packages/site/content/runtime/cli-reference/memory/dream/index.mdx index 205c1533f..3da7d4919 100644 --- a/packages/site/content/runtime/cli-reference/memory/dream/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/dream/index.mdx @@ -29,11 +29,12 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ----------------------------------------------------------------------- | --------------------------------------- | -| [agh memory dream retry](/runtime/cli-reference/memory/dream/retry) | Retry one failed Memory v2 dreaming run | -| [agh memory dream show](/runtime/cli-reference/memory/dream/show) | Show one Memory v2 dreaming run | -| [agh memory dream status](/runtime/cli-reference/memory/dream/status) | Show Memory v2 dreaming runtime status | -| [agh memory dream trigger](/runtime/cli-reference/memory/dream/trigger) | Trigger Memory v2 dreaming | +| Command | Description | +| ------- | ----------- | +| [agh memory dream retry](/runtime/cli-reference/memory/dream/retry) | Retry one failed Memory v2 dreaming run | +| [agh memory dream show](/runtime/cli-reference/memory/dream/show) | Show one Memory v2 dreaming run | +| [agh memory dream status](/runtime/cli-reference/memory/dream/status) | Show Memory v2 dreaming runtime status | +| [agh memory dream trigger](/runtime/cli-reference/memory/dream/trigger) | Trigger Memory v2 dreaming | diff --git a/packages/site/content/runtime/cli-reference/memory/extractor/index.mdx b/packages/site/content/runtime/cli-reference/memory/extractor/index.mdx index d15fc6d32..8ec17c122 100644 --- a/packages/site/content/runtime/cli-reference/memory/extractor/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/extractor/index.mdx @@ -29,12 +29,13 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ----------------------------------------------------------------------------------------- | -------------------------------------------- | -| [agh memory extractor disable](/runtime/cli-reference/memory/extractor/disable) | Reserved Memory v2 command | -| [agh memory extractor drain](/runtime/cli-reference/memory/extractor/drain) | Drain Memory v2 extractor work | +| Command | Description | +| ------- | ----------- | +| [agh memory extractor disable](/runtime/cli-reference/memory/extractor/disable) | Reserved Memory v2 command | +| [agh memory extractor drain](/runtime/cli-reference/memory/extractor/drain) | Drain Memory v2 extractor work | | [agh memory extractor list-pending](/runtime/cli-reference/memory/extractor/list-pending) | List Memory v2 extractor pending/DLQ records | -| [agh memory extractor replay](/runtime/cli-reference/memory/extractor/replay) | Replay Memory v2 extractor work | -| [agh memory extractor status](/runtime/cli-reference/memory/extractor/status) | Show Memory v2 extractor runtime status | +| [agh memory extractor replay](/runtime/cli-reference/memory/extractor/replay) | Replay Memory v2 extractor work | +| [agh memory extractor status](/runtime/cli-reference/memory/extractor/status) | Show Memory v2 extractor runtime status | diff --git a/packages/site/content/runtime/cli-reference/memory/extractor/replay.mdx b/packages/site/content/runtime/cli-reference/memory/extractor/replay.mdx index 2e97cf5d4..0a0ba32c1 100644 --- a/packages/site/content/runtime/cli-reference/memory/extractor/replay.mdx +++ b/packages/site/content/runtime/cli-reference/memory/extractor/replay.mdx @@ -14,7 +14,6 @@ agh memory extractor replay --session [flags] ### Options ``` - --from-dlq Replay from dead-letter queue records -h, --help help for replay --session string Session whose extractor work should be replayed ``` diff --git a/packages/site/content/runtime/cli-reference/memory/index.mdx b/packages/site/content/runtime/cli-reference/memory/index.mdx index 1b00cfae6..c0f138225 100644 --- a/packages/site/content/runtime/cli-reference/memory/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/index.mdx @@ -29,27 +29,28 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ----------------------------------------------------------------- | ----------------------------------------------------------- | -| [agh memory adhoc](/runtime/cli-reference/memory/adhoc) | Inspect ad-hoc Memory v2 notes | -| [agh memory daily](/runtime/cli-reference/memory/daily) | Inspect Memory v2 daily operation logs | -| [agh memory decisions](/runtime/cli-reference/memory/decisions) | Inspect and revert Memory v2 controller decisions | -| [agh memory delete](/runtime/cli-reference/memory/delete) | Delete a Memory v2 entry through the controller | -| [agh memory dream](/runtime/cli-reference/memory/dream) | Operate Memory v2 dreaming runs | -| [agh memory edit](/runtime/cli-reference/memory/edit) | Edit a Memory v2 entry through the controller | -| [agh memory extractor](/runtime/cli-reference/memory/extractor) | Operate Memory v2 extractor runtime | -| [agh memory health](/runtime/cli-reference/memory/health) | Show Memory v2 health | -| [agh memory history](/runtime/cli-reference/memory/history) | Show redaction-safe Memory v2 operation history | -| [agh memory list](/runtime/cli-reference/memory/list) | List Memory v2 entries | -| [agh memory promote](/runtime/cli-reference/memory/promote) | Promote a memory entry across Memory v2 scopes | -| [agh memory provider](/runtime/cli-reference/memory/provider) | Operate Memory v2 providers | -| [agh memory recall](/runtime/cli-reference/memory/recall) | Inspect Memory v2 recall traces | -| [agh memory reindex](/runtime/cli-reference/memory/reindex) | Rebuild the derived Memory v2 search catalog | -| [agh memory reload](/runtime/cli-reference/memory/reload) | Invalidate frozen memory snapshots for future session boots | -| [agh memory reset](/runtime/cli-reference/memory/reset) | Reset derived Memory v2 state through the daemon | -| [agh memory scope-show](/runtime/cli-reference/memory/scope-show) | Show resolved Memory v2 precedence for a selector | -| [agh memory search](/runtime/cli-reference/memory/search) | Search deterministic Memory v2 recall | -| [agh memory show](/runtime/cli-reference/memory/show) | Show one Memory v2 entry | -| [agh memory write](/runtime/cli-reference/memory/write) | Create a Memory v2 entry through the controller | +| Command | Description | +| ------- | ----------- | +| [agh memory adhoc](/runtime/cli-reference/memory/adhoc) | Inspect ad-hoc Memory v2 notes | +| [agh memory daily](/runtime/cli-reference/memory/daily) | Inspect Memory v2 daily operation logs | +| [agh memory decisions](/runtime/cli-reference/memory/decisions) | Inspect and revert Memory v2 controller decisions | +| [agh memory delete](/runtime/cli-reference/memory/delete) | Delete a Memory v2 entry through the controller | +| [agh memory dream](/runtime/cli-reference/memory/dream) | Operate Memory v2 dreaming runs | +| [agh memory edit](/runtime/cli-reference/memory/edit) | Edit a Memory v2 entry through the controller | +| [agh memory extractor](/runtime/cli-reference/memory/extractor) | Operate Memory v2 extractor runtime | +| [agh memory health](/runtime/cli-reference/memory/health) | Show Memory v2 health | +| [agh memory history](/runtime/cli-reference/memory/history) | Show redaction-safe Memory v2 operation history | +| [agh memory list](/runtime/cli-reference/memory/list) | List Memory v2 entries | +| [agh memory promote](/runtime/cli-reference/memory/promote) | Promote a memory entry across Memory v2 scopes | +| [agh memory provider](/runtime/cli-reference/memory/provider) | Operate Memory v2 providers | +| [agh memory recall](/runtime/cli-reference/memory/recall) | Inspect Memory v2 recall traces | +| [agh memory reindex](/runtime/cli-reference/memory/reindex) | Rebuild the derived Memory v2 search catalog | +| [agh memory reload](/runtime/cli-reference/memory/reload) | Invalidate frozen memory snapshots for future session boots | +| [agh memory reset](/runtime/cli-reference/memory/reset) | Reset derived Memory v2 state through the daemon | +| [agh memory scope-show](/runtime/cli-reference/memory/scope-show) | Show resolved Memory v2 precedence for a selector | +| [agh memory search](/runtime/cli-reference/memory/search) | Search deterministic Memory v2 recall | +| [agh memory show](/runtime/cli-reference/memory/show) | Show one Memory v2 entry | +| [agh memory write](/runtime/cli-reference/memory/write) | Create a Memory v2 entry through the controller | diff --git a/packages/site/content/runtime/cli-reference/memory/provider/index.mdx b/packages/site/content/runtime/cli-reference/memory/provider/index.mdx index 0d109bef6..97bb2b35d 100644 --- a/packages/site/content/runtime/cli-reference/memory/provider/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/provider/index.mdx @@ -29,10 +29,11 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ----------------------------------------------------------------------------- | ---------------------------------------- | -| [agh memory provider disable](/runtime/cli-reference/memory/provider/disable) | Disable one Memory v2 provider | -| [agh memory provider enable](/runtime/cli-reference/memory/provider/enable) | Enable and select one Memory v2 provider | -| [agh memory provider list](/runtime/cli-reference/memory/provider/list) | List registered Memory v2 providers | +| Command | Description | +| ------- | ----------- | +| [agh memory provider disable](/runtime/cli-reference/memory/provider/disable) | Disable one Memory v2 provider | +| [agh memory provider enable](/runtime/cli-reference/memory/provider/enable) | Enable and select one Memory v2 provider | +| [agh memory provider list](/runtime/cli-reference/memory/provider/list) | List registered Memory v2 providers | diff --git a/packages/site/content/runtime/cli-reference/memory/recall/index.mdx b/packages/site/content/runtime/cli-reference/memory/recall/index.mdx index 5c3188ce0..ae5afcbf3 100644 --- a/packages/site/content/runtime/cli-reference/memory/recall/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/recall/index.mdx @@ -29,8 +29,9 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| --------------------------------------------------------------------- | ------------------------------------ | +| Command | Description | +| ------- | ----------- | | [agh memory recall trace](/runtime/cli-reference/memory/recall/trace) | Show one redaction-safe recall trace | diff --git a/packages/site/content/runtime/cli-reference/memory/reset.mdx b/packages/site/content/runtime/cli-reference/memory/reset.mdx index 9cb9ed3f0..6ea6cd486 100644 --- a/packages/site/content/runtime/cli-reference/memory/reset.mdx +++ b/packages/site/content/runtime/cli-reference/memory/reset.mdx @@ -19,7 +19,6 @@ agh memory reset [flags] --dry-run Show reset work without applying it -h, --help help for reset --include-daily Include daily memory artifacts - --include-system Include _system memory state --scope string Memory scope: global, workspace, or agent --workspace string Workspace ID or path for workspace-bound memory ``` diff --git a/packages/site/content/runtime/cli-reference/network/directs/index.mdx b/packages/site/content/runtime/cli-reference/network/directs/index.mdx index 8643f897e..5cb5684f4 100644 --- a/packages/site/content/runtime/cli-reference/network/directs/index.mdx +++ b/packages/site/content/runtime/cli-reference/network/directs/index.mdx @@ -29,11 +29,12 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ------------------------------------------------------------------------------- | ------------------------------------------------------------ | -| [agh network directs list](/runtime/cli-reference/network/directs/list) | List direct rooms in a channel | -| [agh network directs messages](/runtime/cli-reference/network/directs/messages) | List messages in one direct room | -| [agh network directs resolve](/runtime/cli-reference/network/directs/resolve) | Create or return the deterministic direct room for two peers | -| [agh network directs show](/runtime/cli-reference/network/directs/show) | Show one direct room | +| Command | Description | +| ------- | ----------- | +| [agh network directs list](/runtime/cli-reference/network/directs/list) | List direct rooms in a channel | +| [agh network directs messages](/runtime/cli-reference/network/directs/messages) | List messages in one direct room | +| [agh network directs resolve](/runtime/cli-reference/network/directs/resolve) | Create or return the deterministic direct room for two peers | +| [agh network directs show](/runtime/cli-reference/network/directs/show) | Show one direct room | diff --git a/packages/site/content/runtime/cli-reference/network/index.mdx b/packages/site/content/runtime/cli-reference/network/index.mdx index 0813df9f1..ac6fe1b8d 100644 --- a/packages/site/content/runtime/cli-reference/network/index.mdx +++ b/packages/site/content/runtime/cli-reference/network/index.mdx @@ -29,15 +29,16 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| --------------------------------------------------------------- | ---------------------------------------------------------- | -| [agh network channels](/runtime/cli-reference/network/channels) | List active runtime channels | -| [agh network directs](/runtime/cli-reference/network/directs) | Inspect restricted direct rooms | -| [agh network inbox](/runtime/cli-reference/network/inbox) | Show queued inbound messages for one session | -| [agh network peers](/runtime/cli-reference/network/peers) | List visible local and remote peers | -| [agh network send](/runtime/cli-reference/network/send) | Send one envelope through the daemon-owned network runtime | -| [agh network status](/runtime/cli-reference/network/status) | Show network runtime status and queue metrics | -| [agh network threads](/runtime/cli-reference/network/threads) | Inspect public network threads | -| [agh network work](/runtime/cli-reference/network/work) | Inspect lifecycle-bearing network work | +| Command | Description | +| ------- | ----------- | +| [agh network channels](/runtime/cli-reference/network/channels) | List active runtime channels | +| [agh network directs](/runtime/cli-reference/network/directs) | Inspect restricted direct rooms | +| [agh network inbox](/runtime/cli-reference/network/inbox) | Show queued inbound messages for one session | +| [agh network peers](/runtime/cli-reference/network/peers) | List visible local and remote peers | +| [agh network send](/runtime/cli-reference/network/send) | Send one envelope through the daemon-owned network runtime | +| [agh network status](/runtime/cli-reference/network/status) | Show network runtime status and queue metrics | +| [agh network threads](/runtime/cli-reference/network/threads) | Inspect public network threads | +| [agh network work](/runtime/cli-reference/network/work) | Inspect lifecycle-bearing network work | diff --git a/packages/site/content/runtime/cli-reference/network/threads/index.mdx b/packages/site/content/runtime/cli-reference/network/threads/index.mdx index bc4cddd0e..41fcfa52f 100644 --- a/packages/site/content/runtime/cli-reference/network/threads/index.mdx +++ b/packages/site/content/runtime/cli-reference/network/threads/index.mdx @@ -29,10 +29,11 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ------------------------------------------------------------------------------- | ---------------------------------- | -| [agh network threads list](/runtime/cli-reference/network/threads/list) | List public threads in a channel | +| Command | Description | +| ------- | ----------- | +| [agh network threads list](/runtime/cli-reference/network/threads/list) | List public threads in a channel | | [agh network threads messages](/runtime/cli-reference/network/threads/messages) | List messages in one public thread | -| [agh network threads show](/runtime/cli-reference/network/threads/show) | Show one public thread | +| [agh network threads show](/runtime/cli-reference/network/threads/show) | Show one public thread | diff --git a/packages/site/content/runtime/cli-reference/network/work/index.mdx b/packages/site/content/runtime/cli-reference/network/work/index.mdx index 6db599f08..23bc9bd06 100644 --- a/packages/site/content/runtime/cli-reference/network/work/index.mdx +++ b/packages/site/content/runtime/cli-reference/network/work/index.mdx @@ -29,9 +29,10 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| --------------------------------------------------------------------- | -------------------------- | +| Command | Description | +| ------- | ----------- | | [agh network work lookup](/runtime/cli-reference/network/work/lookup) | Show one network work item | | [agh network work status](/runtime/cli-reference/network/work/status) | Show one network work item | diff --git a/packages/site/content/runtime/cli-reference/observe/index.mdx b/packages/site/content/runtime/cli-reference/observe/index.mdx index 23875b096..2be4091c0 100644 --- a/packages/site/content/runtime/cli-reference/observe/index.mdx +++ b/packages/site/content/runtime/cli-reference/observe/index.mdx @@ -29,9 +29,10 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ----------------------------------------------------------- | --------------------------------------- | +| Command | Description | +| ------- | ----------- | | [agh observe events](/runtime/cli-reference/observe/events) | Read cross-session observability events | -| [agh observe health](/runtime/cli-reference/observe/health) | Show observability health | +| [agh observe health](/runtime/cli-reference/observe/health) | Show observability health | diff --git a/packages/site/content/runtime/cli-reference/provider/auth/index.mdx b/packages/site/content/runtime/cli-reference/provider/auth/index.mdx index f90c388d9..cc9075979 100644 --- a/packages/site/content/runtime/cli-reference/provider/auth/index.mdx +++ b/packages/site/content/runtime/cli-reference/provider/auth/index.mdx @@ -29,9 +29,10 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ----------------------------------------------------------------------- | ------------------------------------- | -| [agh provider auth login](/runtime/cli-reference/provider/auth/login) | Run the provider native login command | -| [agh provider auth status](/runtime/cli-reference/provider/auth/status) | Show provider authentication status | +| Command | Description | +| ------- | ----------- | +| [agh provider auth login](/runtime/cli-reference/provider/auth/login) | Run the provider native login command | +| [agh provider auth status](/runtime/cli-reference/provider/auth/status) | Show provider authentication status | diff --git a/packages/site/content/runtime/cli-reference/provider/index.mdx b/packages/site/content/runtime/cli-reference/provider/index.mdx index 4a1e8b05c..d48a33452 100644 --- a/packages/site/content/runtime/cli-reference/provider/index.mdx +++ b/packages/site/content/runtime/cli-reference/provider/index.mdx @@ -29,8 +29,10 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| --------------------------------------------------------- | ----------------------------------------------------------- | +| Command | Description | +| ------- | ----------- | | [agh provider auth](/runtime/cli-reference/provider/auth) | Inspect native CLI and bound-secret provider authentication | +| [agh provider models](/runtime/cli-reference/provider/models) | Inspect and refresh the provider model catalog | diff --git a/packages/site/content/runtime/cli-reference/provider/meta.json b/packages/site/content/runtime/cli-reference/provider/meta.json index 5edafedb0..9d1e47bb7 100644 --- a/packages/site/content/runtime/cli-reference/provider/meta.json +++ b/packages/site/content/runtime/cli-reference/provider/meta.json @@ -1,4 +1,4 @@ { "title": "Provider", - "pages": ["index", "auth"] + "pages": ["index", "auth", "models"] } diff --git a/packages/site/content/runtime/cli-reference/provider/models/index.mdx b/packages/site/content/runtime/cli-reference/provider/models/index.mdx new file mode 100644 index 000000000..345d14560 --- /dev/null +++ b/packages/site/content/runtime/cli-reference/provider/models/index.mdx @@ -0,0 +1,39 @@ +--- +title: "agh provider models" +description: "Inspect and refresh the provider model catalog" +--- + +## agh provider models + +Inspect and refresh the provider model catalog + +### Options + +``` + -h, --help help for models +``` + +### Options inherited from parent commands + +``` + --json Emit JSON output + -o, --output string Output format: human, json, jsonl, or toon (default "human") +``` + +## Output Formats + +Every AGH command supports `-o, --output`: + +- `human` for interactive terminal use +- `json` for scripts and other machine-readable consumers +- `jsonl` for wait or streaming commands that emit one JSON record per line +- `toon` for compact agent-readable summaries + + +## Subcommands + +| Command | Description | +| ------- | ----------- | +| [agh provider models list](/runtime/cli-reference/provider/models/list) | List provider model catalog entries | +| [agh provider models refresh](/runtime/cli-reference/provider/models/refresh) | Refresh provider model catalog sources | +| [agh provider models status](/runtime/cli-reference/provider/models/status) | Show provider model catalog source status | diff --git a/packages/site/content/runtime/cli-reference/provider/models/list.mdx b/packages/site/content/runtime/cli-reference/provider/models/list.mdx new file mode 100644 index 000000000..c780fd34a --- /dev/null +++ b/packages/site/content/runtime/cli-reference/provider/models/list.mdx @@ -0,0 +1,43 @@ +--- +title: "agh provider models list" +description: "List provider model catalog entries" +--- + +## agh provider models list + +List provider model catalog entries + +``` +agh provider models list [provider] [flags] +``` + +### Options + +``` + -h, --help help for list + --include-stale Include stale source rows + --refresh Refresh sources before listing models + --source string Filter by catalog source id +``` + +### Options inherited from parent commands + +``` + --json Emit JSON output + -o, --output string Output format: human, json, jsonl, or toon (default "human") +``` + +## Output Formats + +Every AGH command supports `-o, --output`: + +- `human` for interactive terminal use +- `json` for scripts and other machine-readable consumers +- `jsonl` for wait or streaming commands that emit one JSON record per line +- `toon` for compact agent-readable summaries + +Example: + +```bash +agh provider models list [provider] -o json +``` diff --git a/packages/site/content/runtime/cli-reference/provider/models/meta.json b/packages/site/content/runtime/cli-reference/provider/models/meta.json new file mode 100644 index 000000000..fbf31434a --- /dev/null +++ b/packages/site/content/runtime/cli-reference/provider/models/meta.json @@ -0,0 +1,4 @@ +{ + "title": "Models", + "pages": ["index", "list", "refresh", "status"] +} diff --git a/packages/site/content/runtime/cli-reference/provider/models/refresh.mdx b/packages/site/content/runtime/cli-reference/provider/models/refresh.mdx new file mode 100644 index 000000000..062495d34 --- /dev/null +++ b/packages/site/content/runtime/cli-reference/provider/models/refresh.mdx @@ -0,0 +1,43 @@ +--- +title: "agh provider models refresh" +description: "Refresh provider model catalog sources" +--- + +## agh provider models refresh + +Refresh provider model catalog sources + +``` +agh provider models refresh [provider] [flags] +``` + +### Options + +``` + --force Force refresh even when cached status is fresh + -h, --help help for refresh + --request-id string Refresh request id for daemon logs + --source string Refresh only one catalog source id +``` + +### Options inherited from parent commands + +``` + --json Emit JSON output + -o, --output string Output format: human, json, jsonl, or toon (default "human") +``` + +## Output Formats + +Every AGH command supports `-o, --output`: + +- `human` for interactive terminal use +- `json` for scripts and other machine-readable consumers +- `jsonl` for wait or streaming commands that emit one JSON record per line +- `toon` for compact agent-readable summaries + +Example: + +```bash +agh provider models refresh [provider] -o json +``` diff --git a/packages/site/content/runtime/cli-reference/provider/models/status.mdx b/packages/site/content/runtime/cli-reference/provider/models/status.mdx new file mode 100644 index 000000000..7449def9e --- /dev/null +++ b/packages/site/content/runtime/cli-reference/provider/models/status.mdx @@ -0,0 +1,40 @@ +--- +title: "agh provider models status" +description: "Show provider model catalog source status" +--- + +## agh provider models status + +Show provider model catalog source status + +``` +agh provider models status [provider] [flags] +``` + +### Options + +``` + -h, --help help for status +``` + +### Options inherited from parent commands + +``` + --json Emit JSON output + -o, --output string Output format: human, json, jsonl, or toon (default "human") +``` + +## Output Formats + +Every AGH command supports `-o, --output`: + +- `human` for interactive terminal use +- `json` for scripts and other machine-readable consumers +- `jsonl` for wait or streaming commands that emit one JSON record per line +- `toon` for compact agent-readable summaries + +Example: + +```bash +agh provider models status [provider] -o json +``` diff --git a/packages/site/content/runtime/cli-reference/resource/index.mdx b/packages/site/content/runtime/cli-reference/resource/index.mdx index 8984e84ea..d0c23baf4 100644 --- a/packages/site/content/runtime/cli-reference/resource/index.mdx +++ b/packages/site/content/runtime/cli-reference/resource/index.mdx @@ -41,9 +41,9 @@ agh resource -o json ## Subcommands -| Command | Description | -| ------------------------------------------------------------- | ------------------------------------------- | -| [agh resource delete](/runtime/cli-reference/resource/delete) | Delete one desired-state resource | -| [agh resource get](/runtime/cli-reference/resource/get) | Show one desired-state resource | -| [agh resource list](/runtime/cli-reference/resource/list) | List desired-state resources | -| [agh resource put](/runtime/cli-reference/resource/put) | Create or update one desired-state resource | +| Command | Description | +| ------- | ----------- | +| [agh resource delete](/runtime/cli-reference/resource/delete) | Delete one desired-state resource | +| [agh resource get](/runtime/cli-reference/resource/get) | Show one desired-state resource | +| [agh resource list](/runtime/cli-reference/resource/list) | List desired-state resources | +| [agh resource put](/runtime/cli-reference/resource/put) | Create or update one desired-state resource | diff --git a/packages/site/content/runtime/cli-reference/session/index.mdx b/packages/site/content/runtime/cli-reference/session/index.mdx index 6e85ff786..c999cc675 100644 --- a/packages/site/content/runtime/cli-reference/session/index.mdx +++ b/packages/site/content/runtime/cli-reference/session/index.mdx @@ -29,21 +29,22 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ------------------------------------------------------------- | ---------------------------------------------------------- | -| [agh session approve](/runtime/cli-reference/session/approve) | Approve or reject a pending session permission request | -| [agh session events](/runtime/cli-reference/session/events) | Read session events | -| [agh session health](/runtime/cli-reference/session/health) | Read session health and wake eligibility | -| [agh session history](/runtime/cli-reference/session/history) | Show session history grouped by turn | +| Command | Description | +| ------- | ----------- | +| [agh session approve](/runtime/cli-reference/session/approve) | Approve or reject a pending session permission request | +| [agh session events](/runtime/cli-reference/session/events) | Read session events | +| [agh session health](/runtime/cli-reference/session/health) | Read session health and wake eligibility | +| [agh session history](/runtime/cli-reference/session/history) | Show session history grouped by turn | | [agh session inspect](/runtime/cli-reference/session/inspect) | Inspect session health, wake audit, and policy correlation | -| [agh session list](/runtime/cli-reference/session/list) | List sessions | -| [agh session new](/runtime/cli-reference/session/new) | Create a new session | -| [agh session prompt](/runtime/cli-reference/session/prompt) | Send a prompt to a session | -| [agh session repair](/runtime/cli-reference/session/repair) | Inspect and repair an interrupted session transcript | -| [agh session resume](/runtime/cli-reference/session/resume) | Resume a stopped session | -| [agh session soul](/runtime/cli-reference/session/soul) | Manage session Soul snapshots | -| [agh session status](/runtime/cli-reference/session/status) | Show session status | -| [agh session stop](/runtime/cli-reference/session/stop) | Stop a session | -| [agh session wait](/runtime/cli-reference/session/wait) | Block until a session stops | +| [agh session list](/runtime/cli-reference/session/list) | List sessions | +| [agh session new](/runtime/cli-reference/session/new) | Create a new session | +| [agh session prompt](/runtime/cli-reference/session/prompt) | Send a prompt to a session | +| [agh session repair](/runtime/cli-reference/session/repair) | Inspect and repair an interrupted session transcript | +| [agh session resume](/runtime/cli-reference/session/resume) | Resume a stopped session | +| [agh session soul](/runtime/cli-reference/session/soul) | Manage session Soul snapshots | +| [agh session status](/runtime/cli-reference/session/status) | Show session status | +| [agh session stop](/runtime/cli-reference/session/stop) | Stop a session | +| [agh session wait](/runtime/cli-reference/session/wait) | Block until a session stops | diff --git a/packages/site/content/runtime/cli-reference/session/new.mdx b/packages/site/content/runtime/cli-reference/session/new.mdx index 741e23fdf..29eb5aff9 100644 --- a/packages/site/content/runtime/cli-reference/session/new.mdx +++ b/packages/site/content/runtime/cli-reference/session/new.mdx @@ -20,6 +20,9 @@ agh session new [flags] # Start a named session for a specific registered workspace and agent agh session new --workspace checkout-api --agent reviewer --name review-api + # Override provider, model, and reasoning effort for this session only + agh session new --provider codex --model gpt-5.4 --reasoning-effort high + # Auto-register an absolute workspace path before creating the session agh session new --cwd "$PWD" --agent reviewer ``` @@ -27,13 +30,15 @@ agh session new [flags] ### Options ``` - --agent string Agent definition name (defaults to config default) - --channel string Optional network channel opt-in for the session - --cwd string Absolute workspace directory to auto-register - -h, --help help for new - --name string Optional session label - --provider string Optional provider override for this session - --workspace string Registered workspace name or ID + --agent string Agent definition name (defaults to config default) + --channel string Optional network channel opt-in for the session + --cwd string Absolute workspace directory to auto-register + -h, --help help for new + --model string Optional model override for this session + --name string Optional session label + --provider string Optional provider override for this session + --reasoning-effort string Optional reasoning effort hint (minimal|low|medium|high|xhigh) for providers that support it + --workspace string Registered workspace name or ID ``` ### Options inherited from parent commands diff --git a/packages/site/content/runtime/cli-reference/session/soul/index.mdx b/packages/site/content/runtime/cli-reference/session/soul/index.mdx index f53ed2612..ea96d3311 100644 --- a/packages/site/content/runtime/cli-reference/session/soul/index.mdx +++ b/packages/site/content/runtime/cli-reference/session/soul/index.mdx @@ -29,8 +29,9 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ----------------------------------------------------------------------- | --------------------------------------- | +| Command | Description | +| ------- | ----------- | | [agh session soul refresh](/runtime/cli-reference/session/soul/refresh) | Refresh an idle session's Soul snapshot | diff --git a/packages/site/content/runtime/cli-reference/skill/index.mdx b/packages/site/content/runtime/cli-reference/skill/index.mdx index fabc463e2..7e7a94200 100644 --- a/packages/site/content/runtime/cli-reference/skill/index.mdx +++ b/packages/site/content/runtime/cli-reference/skill/index.mdx @@ -29,17 +29,18 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| --------------------------------------------------------- | --------------------------------------------------- | -| [agh skill create](/runtime/cli-reference/skill/create) | Scaffold a new workspace skill | -| [agh skill disable](/runtime/cli-reference/skill/disable) | Disable a daemon-managed skill | -| [agh skill enable](/runtime/cli-reference/skill/enable) | Enable a daemon-managed skill | -| [agh skill info](/runtime/cli-reference/skill/info) | Show detailed metadata for one skill | -| [agh skill install](/runtime/cli-reference/skill/install) | Install a marketplace skill | -| [agh skill list](/runtime/cli-reference/skill/list) | List locally available skills | -| [agh skill remove](/runtime/cli-reference/skill/remove) | Remove an installed marketplace skill | -| [agh skill search](/runtime/cli-reference/skill/search) | Search marketplace skills | -| [agh skill update](/runtime/cli-reference/skill/update) | Check for or install updates for marketplace skills | -| [agh skill view](/runtime/cli-reference/skill/view) | Read a skill or one of its resource files | +| Command | Description | +| ------- | ----------- | +| [agh skill create](/runtime/cli-reference/skill/create) | Scaffold a new workspace skill | +| [agh skill disable](/runtime/cli-reference/skill/disable) | Disable a daemon-managed skill | +| [agh skill enable](/runtime/cli-reference/skill/enable) | Enable a daemon-managed skill | +| [agh skill info](/runtime/cli-reference/skill/info) | Show detailed metadata for one skill | +| [agh skill install](/runtime/cli-reference/skill/install) | Install a marketplace skill | +| [agh skill list](/runtime/cli-reference/skill/list) | List locally available skills | +| [agh skill remove](/runtime/cli-reference/skill/remove) | Remove an installed marketplace skill | +| [agh skill search](/runtime/cli-reference/skill/search) | Search marketplace skills | +| [agh skill update](/runtime/cli-reference/skill/update) | Check for or install updates for marketplace skills | +| [agh skill view](/runtime/cli-reference/skill/view) | Read a skill or one of its resource files | diff --git a/packages/site/content/runtime/cli-reference/task/child/index.mdx b/packages/site/content/runtime/cli-reference/task/child/index.mdx index 824d97c4f..117da3c23 100644 --- a/packages/site/content/runtime/cli-reference/task/child/index.mdx +++ b/packages/site/content/runtime/cli-reference/task/child/index.mdx @@ -41,6 +41,6 @@ agh task child -o json ## Subcommands -| Command | Description | -| ----------------------------------------------------------------- | ------------------------------------ | +| Command | Description | +| ------- | ----------- | | [agh task child create](/runtime/cli-reference/task/child/create) | Create a child task beneath a parent | diff --git a/packages/site/content/runtime/cli-reference/task/dependency/index.mdx b/packages/site/content/runtime/cli-reference/task/dependency/index.mdx index a0a7d5565..4dd17e7ae 100644 --- a/packages/site/content/runtime/cli-reference/task/dependency/index.mdx +++ b/packages/site/content/runtime/cli-reference/task/dependency/index.mdx @@ -41,7 +41,7 @@ agh task dependency -o json ## Subcommands -| Command | Description | -| --------------------------------------------------------------------------- | ------------------------------------ | -| [agh task dependency add](/runtime/cli-reference/task/dependency/add) | Add a dependency edge to a task | +| Command | Description | +| ------- | ----------- | +| [agh task dependency add](/runtime/cli-reference/task/dependency/add) | Add a dependency edge to a task | | [agh task dependency remove](/runtime/cli-reference/task/dependency/remove) | Remove a dependency edge from a task | diff --git a/packages/site/content/runtime/cli-reference/task/index.mdx b/packages/site/content/runtime/cli-reference/task/index.mdx index adcaea8f7..8b9e3b7ea 100644 --- a/packages/site/content/runtime/cli-reference/task/index.mdx +++ b/packages/site/content/runtime/cli-reference/task/index.mdx @@ -54,26 +54,26 @@ agh task -o json ## Subcommands -| Command | Description | -| ----------------------------------------------------------------- | ------------------------------------------------------------- | -| [agh task approve](/runtime/cli-reference/task/approve) | Approve a task and enqueue its first run | -| [agh task cancel](/runtime/cli-reference/task/cancel) | Cancel a task tree | -| [agh task child](/runtime/cli-reference/task/child) | Manage child tasks | -| [agh task complete](/runtime/cli-reference/task/complete) | Complete a claimed task run for the current agent session | -| [agh task create](/runtime/cli-reference/task/create) | Create a task | -| [agh task delete](/runtime/cli-reference/task/delete) | Delete a task | -| [agh task dependency](/runtime/cli-reference/task/dependency) | Manage task dependencies | -| [agh task fail](/runtime/cli-reference/task/fail) | Fail a claimed task run for the current agent session | -| [agh task get](/runtime/cli-reference/task/get) | Show one task with related detail | -| [agh task heartbeat](/runtime/cli-reference/task/heartbeat) | Extend a claimed task run lease for the current agent session | -| [agh task list](/runtime/cli-reference/task/list) | List tasks | -| [agh task next](/runtime/cli-reference/task/next) | Claim the next task run for the current agent session | -| [agh task notification](/runtime/cli-reference/task/notification) | Manage task terminal notifications | -| [agh task profile](/runtime/cli-reference/task/profile) | Manage task execution profiles | -| [agh task publish](/runtime/cli-reference/task/publish) | Publish a draft task and enqueue its first run | -| [agh task reject](/runtime/cli-reference/task/reject) | Reject a pending approval task | -| [agh task release](/runtime/cli-reference/task/release) | Release a claimed task run for the current agent session | -| [agh task review](/runtime/cli-reference/task/review) | Manage task-run reviews | -| [agh task run](/runtime/cli-reference/task/run) | Manage task runs | -| [agh task start](/runtime/cli-reference/task/start) | Enqueue a run for an executable task | -| [agh task update](/runtime/cli-reference/task/update) | Update mutable task fields | +| Command | Description | +| ------- | ----------- | +| [agh task approve](/runtime/cli-reference/task/approve) | Approve a task and enqueue its first run | +| [agh task cancel](/runtime/cli-reference/task/cancel) | Cancel a task tree | +| [agh task child](/runtime/cli-reference/task/child) | Manage child tasks | +| [agh task complete](/runtime/cli-reference/task/complete) | Complete a claimed task run for the current agent session | +| [agh task create](/runtime/cli-reference/task/create) | Create a task | +| [agh task delete](/runtime/cli-reference/task/delete) | Delete a task | +| [agh task dependency](/runtime/cli-reference/task/dependency) | Manage task dependencies | +| [agh task fail](/runtime/cli-reference/task/fail) | Fail a claimed task run for the current agent session | +| [agh task get](/runtime/cli-reference/task/get) | Show one task with related detail | +| [agh task heartbeat](/runtime/cli-reference/task/heartbeat) | Extend a claimed task run lease for the current agent session | +| [agh task list](/runtime/cli-reference/task/list) | List tasks | +| [agh task next](/runtime/cli-reference/task/next) | Claim the next task run for the current agent session | +| [agh task notification](/runtime/cli-reference/task/notification) | Manage task terminal notifications | +| [agh task profile](/runtime/cli-reference/task/profile) | Manage task execution profiles | +| [agh task publish](/runtime/cli-reference/task/publish) | Publish a draft task and enqueue its first run | +| [agh task reject](/runtime/cli-reference/task/reject) | Reject a pending approval task | +| [agh task release](/runtime/cli-reference/task/release) | Release a claimed task run for the current agent session | +| [agh task review](/runtime/cli-reference/task/review) | Manage task-run reviews | +| [agh task run](/runtime/cli-reference/task/run) | Manage task runs | +| [agh task start](/runtime/cli-reference/task/start) | Enqueue a run for an executable task | +| [agh task update](/runtime/cli-reference/task/update) | Update mutable task fields | diff --git a/packages/site/content/runtime/cli-reference/task/notification/index.mdx b/packages/site/content/runtime/cli-reference/task/notification/index.mdx index 0e821741b..fb26a0edf 100644 --- a/packages/site/content/runtime/cli-reference/task/notification/index.mdx +++ b/packages/site/content/runtime/cli-reference/task/notification/index.mdx @@ -41,9 +41,9 @@ agh task notification -o json ## Subcommands -| Command | Description | -| ------------------------------------------------------------------------------------- | ------------------------------------------------------------ | -| [agh task notification delete](/runtime/cli-reference/task/notification/delete) | Delete one bridge terminal notification subscription | -| [agh task notification list](/runtime/cli-reference/task/notification/list) | List bridge terminal notification subscriptions for one task | -| [agh task notification show](/runtime/cli-reference/task/notification/show) | Show one bridge terminal notification subscription | -| [agh task notification subscribe](/runtime/cli-reference/task/notification/subscribe) | Subscribe a bridge target to task terminal notifications | +| Command | Description | +| ------- | ----------- | +| [agh task notification delete](/runtime/cli-reference/task/notification/delete) | Delete one bridge terminal notification subscription | +| [agh task notification list](/runtime/cli-reference/task/notification/list) | List bridge terminal notification subscriptions for one task | +| [agh task notification show](/runtime/cli-reference/task/notification/show) | Show one bridge terminal notification subscription | +| [agh task notification subscribe](/runtime/cli-reference/task/notification/subscribe) | Subscribe a bridge target to task terminal notifications | diff --git a/packages/site/content/runtime/cli-reference/task/profile/index.mdx b/packages/site/content/runtime/cli-reference/task/profile/index.mdx index 1cb11b6a6..2cb28099f 100644 --- a/packages/site/content/runtime/cli-reference/task/profile/index.mdx +++ b/packages/site/content/runtime/cli-reference/task/profile/index.mdx @@ -41,8 +41,8 @@ agh task profile -o json ## Subcommands -| Command | Description | -| ----------------------------------------------------------------------- | ---------------------------------- | -| [agh task profile delete](/runtime/cli-reference/task/profile/delete) | Delete one task execution profile | -| [agh task profile inspect](/runtime/cli-reference/task/profile/inspect) | Show one task execution profile | -| [agh task profile update](/runtime/cli-reference/task/profile/update) | Replace one task execution profile | +| Command | Description | +| ------- | ----------- | +| [agh task profile delete](/runtime/cli-reference/task/profile/delete) | Delete one task execution profile | +| [agh task profile inspect](/runtime/cli-reference/task/profile/inspect) | Show one task execution profile | +| [agh task profile update](/runtime/cli-reference/task/profile/update) | Replace one task execution profile | diff --git a/packages/site/content/runtime/cli-reference/task/review/index.mdx b/packages/site/content/runtime/cli-reference/task/review/index.mdx index 5db3f783c..42eda9650 100644 --- a/packages/site/content/runtime/cli-reference/task/review/index.mdx +++ b/packages/site/content/runtime/cli-reference/task/review/index.mdx @@ -41,9 +41,9 @@ agh task review -o json ## Subcommands -| Command | Description | -| --------------------------------------------------------------------- | ---------------------------------- | -| [agh task review list](/runtime/cli-reference/task/review/list) | List task-run reviews | -| [agh task review request](/runtime/cli-reference/task/review/request) | Request review for a task run | -| [agh task review show](/runtime/cli-reference/task/review/show) | Show one task-run review | -| [agh task review submit](/runtime/cli-reference/task/review/submit) | Submit one task-run review verdict | +| Command | Description | +| ------- | ----------- | +| [agh task review list](/runtime/cli-reference/task/review/list) | List task-run reviews | +| [agh task review request](/runtime/cli-reference/task/review/request) | Request review for a task run | +| [agh task review show](/runtime/cli-reference/task/review/show) | Show one task-run review | +| [agh task review submit](/runtime/cli-reference/task/review/submit) | Submit one task-run review verdict | diff --git a/packages/site/content/runtime/cli-reference/task/run/index.mdx b/packages/site/content/runtime/cli-reference/task/run/index.mdx index 486e72762..75158b601 100644 --- a/packages/site/content/runtime/cli-reference/task/run/index.mdx +++ b/packages/site/content/runtime/cli-reference/task/run/index.mdx @@ -41,13 +41,13 @@ agh task run -o json ## Subcommands -| Command | Description | -| ----------------------------------------------------------------------------- | ------------------------------------------------------- | +| Command | Description | +| ------- | ----------- | | [agh task run attach-session](/runtime/cli-reference/task/run/attach-session) | Attach an existing session to a claimed or starting run | -| [agh task run cancel](/runtime/cli-reference/task/run/cancel) | Cancel a task run | -| [agh task run claim](/runtime/cli-reference/task/run/claim) | Claim a queued task run | -| [agh task run complete](/runtime/cli-reference/task/run/complete) | Complete a running task run | -| [agh task run enqueue](/runtime/cli-reference/task/run/enqueue) | Enqueue a task run | -| [agh task run fail](/runtime/cli-reference/task/run/fail) | Fail a task run | -| [agh task run list](/runtime/cli-reference/task/run/list) | List runs for a task | -| [agh task run start](/runtime/cli-reference/task/run/start) | Start a claimed task run | +| [agh task run cancel](/runtime/cli-reference/task/run/cancel) | Cancel a task run | +| [agh task run claim](/runtime/cli-reference/task/run/claim) | Claim a queued task run | +| [agh task run complete](/runtime/cli-reference/task/run/complete) | Complete a running task run | +| [agh task run enqueue](/runtime/cli-reference/task/run/enqueue) | Enqueue a task run | +| [agh task run fail](/runtime/cli-reference/task/run/fail) | Fail a task run | +| [agh task run list](/runtime/cli-reference/task/run/list) | List runs for a task | +| [agh task run start](/runtime/cli-reference/task/run/start) | Start a claimed task run | diff --git a/packages/site/content/runtime/cli-reference/tool/index.mdx b/packages/site/content/runtime/cli-reference/tool/index.mdx index a4de83ae2..d4b7c42e9 100644 --- a/packages/site/content/runtime/cli-reference/tool/index.mdx +++ b/packages/site/content/runtime/cli-reference/tool/index.mdx @@ -29,12 +29,13 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ------------------------------------------------------- | ------------------------------------------------------ | +| Command | Description | +| ------- | ----------- | | [agh tool approve](/runtime/cli-reference/tool/approve) | Mint a one-shot approval token for one tool invocation | -| [agh tool info](/runtime/cli-reference/tool/info) | Show one registry tool descriptor and diagnostics | -| [agh tool invoke](/runtime/cli-reference/tool/invoke) | Invoke one registry tool through daemon policy | -| [agh tool list](/runtime/cli-reference/tool/list) | List operator-visible registry tools | -| [agh tool search](/runtime/cli-reference/tool/search) | Search operator-visible registry tools | +| [agh tool info](/runtime/cli-reference/tool/info) | Show one registry tool descriptor and diagnostics | +| [agh tool invoke](/runtime/cli-reference/tool/invoke) | Invoke one registry tool through daemon policy | +| [agh tool list](/runtime/cli-reference/tool/list) | List operator-visible registry tools | +| [agh tool search](/runtime/cli-reference/tool/search) | Search operator-visible registry tools | diff --git a/packages/site/content/runtime/cli-reference/toolsets/index.mdx b/packages/site/content/runtime/cli-reference/toolsets/index.mdx index 281984342..8316a83e0 100644 --- a/packages/site/content/runtime/cli-reference/toolsets/index.mdx +++ b/packages/site/content/runtime/cli-reference/toolsets/index.mdx @@ -29,9 +29,10 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| --------------------------------------------------------- | ----------------------------------- | +| Command | Description | +| ------- | ----------- | | [agh toolsets info](/runtime/cli-reference/toolsets/info) | Show one registry toolset expansion | -| [agh toolsets list](/runtime/cli-reference/toolsets/list) | List registry toolsets | +| [agh toolsets list](/runtime/cli-reference/toolsets/list) | List registry toolsets | diff --git a/packages/site/content/runtime/cli-reference/vault/index.mdx b/packages/site/content/runtime/cli-reference/vault/index.mdx index 3a97a8150..093ef4af3 100644 --- a/packages/site/content/runtime/cli-reference/vault/index.mdx +++ b/packages/site/content/runtime/cli-reference/vault/index.mdx @@ -29,11 +29,12 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| ------------------------------------------------------- | -------------------------------------------- | -| [agh vault delete](/runtime/cli-reference/vault/delete) | Delete one vault secret | -| [agh vault get](/runtime/cli-reference/vault/get) | Show redacted metadata for one vault secret | -| [agh vault list](/runtime/cli-reference/vault/list) | List redacted vault secret metadata | -| [agh vault put](/runtime/cli-reference/vault/put) | Store one write-only vault secret from stdin | +| Command | Description | +| ------- | ----------- | +| [agh vault delete](/runtime/cli-reference/vault/delete) | Delete one vault secret | +| [agh vault get](/runtime/cli-reference/vault/get) | Show redacted metadata for one vault secret | +| [agh vault list](/runtime/cli-reference/vault/list) | List redacted vault secret metadata | +| [agh vault put](/runtime/cli-reference/vault/put) | Store one write-only vault secret from stdin | diff --git a/packages/site/content/runtime/cli-reference/workspace/index.mdx b/packages/site/content/runtime/cli-reference/workspace/index.mdx index ba5b2cde0..0d35b0f0e 100644 --- a/packages/site/content/runtime/cli-reference/workspace/index.mdx +++ b/packages/site/content/runtime/cli-reference/workspace/index.mdx @@ -29,12 +29,13 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries + ## Subcommands -| Command | Description | -| --------------------------------------------------------------- | ---------------------------------------- | -| [agh workspace add](/runtime/cli-reference/workspace/add) | Register a workspace | -| [agh workspace edit](/runtime/cli-reference/workspace/edit) | Edit a registered workspace | -| [agh workspace info](/runtime/cli-reference/workspace/info) | Show one workspace with resolved details | -| [agh workspace list](/runtime/cli-reference/workspace/list) | List registered workspaces | -| [agh workspace remove](/runtime/cli-reference/workspace/remove) | Remove a workspace registration | +| Command | Description | +| ------- | ----------- | +| [agh workspace add](/runtime/cli-reference/workspace/add) | Register a workspace | +| [agh workspace edit](/runtime/cli-reference/workspace/edit) | Edit a registered workspace | +| [agh workspace info](/runtime/cli-reference/workspace/info) | Show one workspace with resolved details | +| [agh workspace list](/runtime/cli-reference/workspace/list) | List registered workspaces | +| [agh workspace remove](/runtime/cli-reference/workspace/remove) | Remove a workspace registration | diff --git a/packages/site/content/runtime/core/agents/definitions.mdx b/packages/site/content/runtime/core/agents/definitions.mdx index edc2e08ff..086a1a0a1 100644 --- a/packages/site/content/runtime/core/agents/definitions.mdx +++ b/packages/site/content/runtime/core/agents/definitions.mdx @@ -73,7 +73,7 @@ Session creation first resolves the agent name, then resolves the provider and r | Agent name | explicit `--agent` or API `agent_name` -> `defaults.agent` | `agent name is required`; run `agh install` or set `defaults.agent` | | Provider | `agent.provider` -> `defaults.provider` | `agent provider is required`; run `agh install` or set `agent.provider`/`defaults.provider` | | Command | `agent.command` -> provider `command` | `provider "" command is required` | -| Model | `agent.model` -> provider `default_model` | Empty is allowed when the provider has no default. | +| Model | `agent.model` -> provider `models.default` | Empty is allowed when the provider has no default. | | Tools | `agent.tools` | Must be exact canonical ToolIDs or namespace-prefix wildcards. | | Toolsets | `agent.toolsets` | Must be canonical ToolsetIDs. | | Deny tools | `agent.deny_tools` | Same grammar as `tools`; denies narrow later policy evaluation. | @@ -100,7 +100,7 @@ These are the frontmatter fields accepted by the current `internal/config` parse | `name` | string | yes | none | Must be non-empty. `LoadAgentDef(name)` also requires the parsed name to match the requested directory/name. No lowercase or hyphen pattern is enforced today. | | `provider` | string | no | `defaults.provider` | Provider ID such as `claude`, `codex`, or a custom configured provider. Required at resolution time unless `defaults.provider` is set. | | `command` | string | no | provider `command` | Overrides the provider launch command for this agent. Parsed with shell-style quoting, but launched without a shell. | -| `model` | string | no | provider `default_model` | Stored as the resolved model metadata. The current ACP `session/new` and `session/load` payloads do not send this field. | +| `model` | string | no | provider `models.default` | Stored as the resolved model metadata. The current ACP `session/new` and `session/load` payloads do not send this field. | | `tools` | string array | no | empty | Additional exact canonical ToolIDs such as `agh__skill_view`, or namespace-prefix wildcards such as `agh__skill_*` and `mcp__github__*`. Default discovery is runtime-applied. | | `toolsets` | string array | no | empty | Additional canonical ToolsetIDs such as `agh__tasks` or `linear__read`. Runtime discovery adds `agh__bootstrap` and `agh__catalog` unless denied. | | `deny_tools` | string array | no | empty | Same grammar as `tools`. Denies can overlap allows and are interpreted as a narrowing layer by registry policy. | diff --git a/packages/site/content/runtime/core/agents/meta.json b/packages/site/content/runtime/core/agents/meta.json index 1179409c0..44b923458 100644 --- a/packages/site/content/runtime/core/agents/meta.json +++ b/packages/site/content/runtime/core/agents/meta.json @@ -1,5 +1,13 @@ { "title": "Agents", "icon": "FileText", - "pages": ["definitions", "capabilities", "soul", "heartbeat", "providers", "spawning"] + "pages": [ + "definitions", + "capabilities", + "soul", + "heartbeat", + "providers", + "model-catalog", + "spawning" + ] } diff --git a/packages/site/content/runtime/core/agents/model-catalog.mdx b/packages/site/content/runtime/core/agents/model-catalog.mdx new file mode 100644 index 000000000..da07a397a --- /dev/null +++ b/packages/site/content/runtime/core/agents/model-catalog.mdx @@ -0,0 +1,295 @@ +--- +title: Provider Model Catalog +description: Daemon-owned model catalog — sources, refresh lifecycle, native HTTP/UDS endpoints, OpenAI-compatible projection, and extension model.source contract. +--- + +The model catalog is the daemon-owned authority for **pre-session** provider model selection. The +new-session dialog, CLI, HTTP, UDS, Host API, web settings, and the OpenAI-compatible model list +all read the same projected rows. Active session controls keep flowing through ACP `configOptions` +once a session is running. + +The catalog never blocks session creation. Every refresh is detached from the request lifetime, +runs under an explicit deadline, falls back to stale rows when refresh fails, and exposes source +health through structured status. + +## Why a separate catalog + +ACP `models.availableModels` is observed only after `session/new` or `session/load`, which is too +late for the new-session dialog and for agents picking a model through CLI/HTTP/UDS. The catalog +splits two concepts that used to live together: + +- Pre-session catalog: which provider models AGH knows about before creating a session. +- Active session config: which controls the running ACP session exposes right now. + +The catalog is daemon-owned, persisted, refreshable, extensible, and agent-manageable. Active +session `configOptions` continue to govern the live session — catalog rows never override them. + +## Source priorities and merge + +The merge key is `(provider_id, model_id)`. Source rows are preserved separately and merged on read. + +| Source kind | Priority | Origin | +| --------------- | -------------- | ------------------------------------------------------------------ | +| `config` | 120 | `[providers..models]` operator config. | +| `provider_live` | 110 | Live discovery sources for the provider account/runtime. | +| `extension` | 100 | Extension model sources (capability `model.source`). | +| `models_dev` | 50 | Cross-provider enrichment from `models.dev` (with stale fallback). | +| `builtin` | 10 | Built-in defaults shipped with the daemon. | +| `acp_session` | session-scoped | Observed during an ACP session; never rewrites global authority. | + +Higher-priority non-empty fields win; lower-priority sources fill missing fields. Ties resolve by +fresher `refreshed_at`, then ascending `source_id`. `models.dev` and `builtin` rows can enrich +metadata but never prove account-level availability. + +## Merged availability + +The merged projection exposes both nullable `available` and string `availability_state` so stale +live truth is visible instead of collapsed: + +| `availability_state` | Meaning | API `available` | API `stale` | +| -------------------- | ------------------------------------------------------------------ | --------------- | ----------- | +| `available_live` | Live or extension row confirmed availability with fresh data. | `true` | `false` | +| `available_stale` | Live or extension row confirmed availability but the row is stale. | `true` | `true` | +| `unavailable_live` | Live or extension row denied availability with fresh data. | `false` | `false` | +| `unavailable_stale` | Live or extension row denied availability but the row is stale. | `false` | `true` | +| `unknown` | Only catalog/builtin/config metadata is known. | `null` | depends | + +Manual model entry remains valid even when no source advertises the model. `models.curated` is +metadata, never an allowlist. + +## Native HTTP and UDS endpoints + +| Method | Path | Transports | Description | +| ------ | --------------------------------------------- | ---------- | -------------------------------------------------------- | +| GET | `/api/providers/models` | HTTP, UDS | List merged provider model catalog entries. | +| GET | `/api/providers/{provider_id}/models` | HTTP, UDS | List merged catalog entries for one provider. | +| POST | `/api/providers/models/refresh` | HTTP, UDS | Refresh sources across providers; returns source status. | +| POST | `/api/providers/{provider_id}/models/refresh` | HTTP, UDS | Refresh sources for one provider. | +| GET | `/api/providers/models/status` | HTTP, UDS | Source status across providers. | +| GET | `/api/providers/{provider_id}/models/status` | HTTP, UDS | Source status for one provider. | + +List and status endpoints accept these query parameters: + +- `provider_id`: filter by AGH provider id (only on the cross-provider list). +- `source_id`: filter by catalog source id (`config`, `models_dev`, `provider_live:`, `extension:`). +- `refresh=true`: refresh sources before listing. +- `include_stale=true`: include stale source rows in the merged projection. + +Refresh requests accept an optional JSON body: + +```json +{ + "source_id": "provider_live:codex", + "force": true, + "request_id": "rfsh-2026-05-07-abc" +} +``` + +`request_id` (or a daemon-generated value) is the `refresh_request_id` correlation key surfaced in +logs and source status events. Refresh work runs under daemon-owned lifetime: the request's cancel +does not cancel refresh work, and the daemon joins outstanding refresh workers during shutdown. + +The native list response shape is: + +```json +{ + "models": [ + { + "provider_id": "codex", + "model_id": "gpt-5.4", + "display_name": "GPT-5.4", + "sources": [ + { + "source_id": "config", + "source_kind": "config", + "priority": 120, + "stale": false, + "refreshed_at": "2026-05-07T18:32:11Z" + }, + { + "source_id": "models_dev", + "source_kind": "models_dev", + "priority": 50, + "stale": false, + "refreshed_at": "2026-05-07T03:00:00Z" + } + ], + "available": true, + "availability_state": "available_live", + "stale": false, + "refreshed_at": "2026-05-07T18:32:11Z", + "context_window": 256000, + "max_output_tokens": 32000, + "supports_tools": true, + "supports_reasoning": true, + "reasoning_efforts": ["minimal", "low", "medium", "high", "xhigh"], + "default_reasoning_effort": "medium" + } + ] +} +``` + +Source status payloads carry `source_id`, `provider_id`, `source_kind`, `refresh_state` +(`idle | refreshing | succeeded | failed`), `last_refresh`, `next_refresh`, `last_success`, +`row_count`, `stale`, and a redacted `last_error`. Raw secrets, command lines, OAuth material, or +provider response bodies never appear in `last_error`. + +The HTTP and UDS transports return canonical, byte-equal JSON for the same projection so cross- +transport regression tests can compare daemon output directly. + +## OpenAI-compatible projection + +A list-only OpenAI-compatible endpoint is registered on HTTP only. UDS does not expose this route. + +```http +GET /api/openai/v1/models +GET /api/openai/v1/models?provider_id=codex +``` + +Authentication uses the same bearer-auth and middleware contract as the rest of `/api/*`. CORS and +rate-limit behavior follow HTTP defaults. Errors are wrapped in the OpenAI envelope shape +(`{"error": {...}}`) but reuse AGH's normal status-code semantics. Refresh work is **not** +available through this endpoint; clients use the native catalog endpoints, the CLI, the Host API, +or the web for refreshes. + +```json +{ + "object": "list", + "data": [ + { + "id": "gpt-5.4", + "object": "model", + "created": 0, + "owned_by": "codex", + "agh": { + "provider_id": "codex", + "model_id": "gpt-5.4", + "display_name": "GPT-5.4", + "sources": ["config", "models_dev"], + "available": true, + "availability_state": "available_live", + "stale": false, + "supports_tools": true, + "supports_reasoning": true, + "reasoning_efforts": ["minimal", "low", "medium", "high", "xhigh"], + "context_window": 256000, + "max_output_tokens": 32000 + } + } + ] +} +``` + +The `agh` extension key carries AGH-specific metadata. Generated OpenAPI/SDK contracts treat it as +a typed object (`OpenAIModelAGHPayload`), not a free-form blob. + +## Provider models CLI + +The CLI surface lives under the singular `provider` namespace because the catalog is provider- +scoped and already neighbors `agh provider auth`. A top-level `agh models …` alias is intentionally +out of scope for the MVP to avoid forking the command contract before the first one is stable. + +```bash +agh provider models list [provider] -o json +agh provider models list [provider] --source models_dev --refresh --include-stale +agh provider models refresh [provider] -o json +agh provider models refresh [provider] --source provider_live:codex --force --request-id rfsh-abc +agh provider models status [provider] -o json +``` + +`refresh` returns the same source-status payloads as the native HTTP/UDS refresh endpoints — not a +single success line — so CI scripts and agents can act on partial-source failures without parsing +stderr. JSON output is canonical: identical between `agh provider models …` and the daemon HTTP +response for the same projection. + +See [`agh provider models`](/runtime/cli-reference/provider/models) for full flag and output +documentation generated from the cobra source. + +## Extension model.source contract + +Extensions can provide model rows by declaring the manifest provide capability `model.source`. The +daemon validates rows, persists them, and applies the normal merge policy. Extensions cannot own +global catalog state; they only contribute source rows. + +```toml +[capabilities] +provides = ["model.source"] + +[actions] +requires = ["models/list", "models/refresh", "models/status"] + +[security] +capabilities = ["model.read", "model.write"] + +[subprocess] +command = "node" +args = ["dist/index.js"] +``` + +Extensions that provide `model.source` must implement the AGH-to-extension service method +`models/list`. The daemon calls it with a deadline-bound context; the extension returns rows scoped +to provider IDs the extension declares. + +| Method | Direction | Purpose | +| ---------------- | --------------- | -------------------------------------------------------------------------- | +| `models/list` | AGH → extension | Extension returns provider model rows; daemon validates and persists them. | +| `models/list` | Host API | Extension reads the daemon-owned merged projection. | +| `models/refresh` | Host API | Extension triggers a daemon-owned source refresh. | +| `models/status` | Host API | Extension reads daemon-owned source status. | + +Capability grants follow the same area-based scheme as other Host API methods: + +| Method | Area | Notes | +| ---------------- | ------------- | ----------------------------------------------------------- | +| `models/list` | `model.read` | Returns the daemon-owned merged projection, not raw rows. | +| `models/status` | `model.read` | Returns daemon-owned source status. | +| `models/refresh` | `model.write` | Triggers daemon-owned refresh; rate-limited and serialized. | + +Marketplace extensions are limited to read-oriented grants by policy, so a marketplace extension +can declare `model.read` and read the projection but must request `model.write` explicitly to +trigger refresh, and refresh grants stay subject to the marketplace policy review. + +Extension source rows are always validated through `internal/modelcatalog`. Invalid rows produce a +recorded source status (with redacted error) instead of corrupting the merged projection. + +## Refresh lifetime and serialization + +- Catalog list calls return cached rows immediately when present. +- Refresh detaches from request cancellation via `context.WithoutCancel(ctx)` and re-attaches an + explicit deadline through `context.WithDeadline`. +- Refresh work is **serialized** per `provider_id` before any subprocess or provider-home work. +- Concurrent refresh requests for the same provider **coalesce** behind the in-flight refresh and + return identical source statuses when it finishes. +- Refreshes for different providers can run concurrently. +- The daemon joins outstanding refresh workers during shutdown. + +Discovery never creates an ACP session. Live provider sources fail closed by recording source +status; session creation is never blocked on a successful network refresh. Stale rows remain +available as a fallback while the catalog labels them stale. + +## Observability + +Catalog operations emit structured events with the following correlation keys: + +- `refresh_request_id` +- `provider_id` +- `source_id` +- `source_kind` +- `model_id` for row-scoped events +- `extension_name` for extension sources +- `session_id` only for ACP session config observations + +Tracked events include refresh started/succeeded/failed, source row count changes, stale fallback +usage, all-source failure, extension source denied/unavailable, and ACP config option captured/ +updated transitions. + +## Related pages + +- [Providers](/runtime/core/agents/providers) covers `[providers..models]` and the + per-provider `models.discovery` shape. +- [config.toml](/runtime/core/configuration/config-toml#modelcatalogsourcesmodelsdev) documents + `[model_catalog.sources.models_dev]` defaults. +- [`agh provider models`](/runtime/cli-reference/provider/models) is the CLI generated from the + cobra source. +- [Develop Extensions](/runtime/core/extensions/develop#model-source-extensions) covers the + manifest provide capability `model.source` and Host API model methods. diff --git a/packages/site/content/runtime/core/agents/providers.mdx b/packages/site/content/runtime/core/agents/providers.mdx index ad26fa0ba..b40133e77 100644 --- a/packages/site/content/runtime/core/agents/providers.mdx +++ b/packages/site/content/runtime/core/agents/providers.mdx @@ -29,7 +29,7 @@ provider = "claude" The built-in registry lives in `internal/config/provider.go`. -| Provider ID | Harness | Runtime provider | Command | Default model | Auth mode | Credential target | +| Provider ID | Harness | Runtime provider | Command | `models.default` | Auth mode | Credential target | | ------------------- | -------- | ------------------- | ---------------------------------------------------------------- | --------------------------- | -------------- | -------------------- | | `claude` | `acp` | `claude` | `npx -y @agentclientprotocol/claude-agent-acp@latest` | `claude-sonnet-4-6` | `native_cli` | provider login | | `codex` | `acp` | `codex` | `npx -y @zed-industries/codex-acp@latest` | `gpt-5.4` | `native_cli` | provider login | @@ -94,7 +94,7 @@ Provider overrides and custom providers are configured in `config.toml`. | --------------------- | ------ | ------------------------ | ------------------------------------------------------------------------------------------------------------- | | `command` | string | yes for custom providers | Launch command for the ACP subprocess. Overrides a built-in command when set. | | `display_name` | string | no | Operator-facing label shown in settings and provider pickers. | -| `default_model` | string | no | Used when an `AGENT.md` omits `model`. Native `pi` receives it through ACP model selection. | +| `models` | table | no | Nested model config block (`models.default`, `models.curated`, `models.discovery`). See "Provider models". | | `harness` | string | no | `acp` for direct ACP launch or `pi_acp` for providers launched through the Pi ACP adapter. Defaults to `acp`. | | `runtime_provider` | string | no | Downstream provider id used by harnesses such as Pi. Defaults to the AGH provider id. | | `transport` | string | no | Optional Pi model-provider API family hint for custom providers. | @@ -110,6 +110,51 @@ Provider overrides and custom providers are configured in `config.toml`. AGH overlays provider config on top of a built-in provider when the name matches. Unknown provider names are accepted only when they have a `[providers.]` entry. +The flat keys `default_model`, `supported_models`, and `supports_reasoning_effort` are no longer +accepted. Config that still sets them is rejected at load time with a deterministic hard-cut error +that names the exact path. Move every value into the nested `[providers..models]` block below. + +## Provider models + +Each provider declares pre-session model defaults and curated metadata under `[providers..models]`. +Pre-session model selection is served by the daemon-owned model catalog. The catalog merges builtin +defaults, the operator config, the optional `models.dev` enrichment source, live provider discovery +sources, and extension model sources, then projects them through HTTP, UDS, CLI, the OpenAI-compatible +projection, the Host API, and the web. Active ACP `configOptions` continue to govern model and +reasoning controls inside a running session. + +| Field | Type | Required | Runtime behavior | +| ------------------------------------------- | ------- | --------------------------- | --------------------------------------------------------------------------------------------------------------------------------- | +| `models.default` | string | no | Default model when an `AGENT.md` omits `model`. Free-form: it does not need to appear in `models.curated`. | +| `models.curated` | array | no | Curated entries shown in pickers and projected as `config` rows in the catalog. Not an allowlist. | +| `models.curated[].id` | string | yes per entry | Provider model identifier sent to the runtime. Must be unique inside the provider. | +| `models.curated[].display_name` | string | no | Optional human label. | +| `models.curated[].context_window` | integer | no | Context window in tokens. | +| `models.curated[].max_input_tokens` | integer | no | Maximum input tokens. | +| `models.curated[].max_output_tokens` | integer | no | Maximum output tokens. | +| `models.curated[].supports_tools` | bool | no | Whether the model supports tool calls. | +| `models.curated[].supports_reasoning` | bool | no | Whether the model supports reasoning effort. | +| `models.curated[].reasoning_efforts` | array | no | Allowed reasoning levels (`minimal`, `low`, `medium`, `high`, `xhigh`). Blank entries are rejected. | +| `models.curated[].default_reasoning_effort` | string | no | Per-model default reasoning level. Must appear in `reasoning_efforts` when both are set. | +| `models.curated[].cost_input_per_million` | number | no | Display-only input cost per million tokens. | +| `models.curated[].cost_output_per_million` | number | no | Display-only output cost per million tokens. | +| `models.discovery.enabled` | bool | no | Enables the side-effect-free discovery adapter for this provider. Defaults to `false` for providers without a built-in safe path. | +| `models.discovery.command` | string | required for some providers | Side-effect-free discovery command (mutually exclusive with `endpoint` unless the adapter documents both). | +| `models.discovery.endpoint` | string | required for some providers | Side-effect-free discovery endpoint URL. | +| `models.discovery.timeout` | string | no | Per-discovery timeout duration (defaults to the model catalog timeout). | + +Discovery uses the resolved provider auth, env, and home policy and never creates an ACP session. +When a discovery path is unavailable or fails, the model catalog records a source status and falls +back to stale or lower-priority rows; session creation never depends on a successful discovery. + +Session creation can override `provider`, `model`, and `reasoning_effort` for one launch. `model` is +free-form: `models.curated` is operator metadata, not an allowlist, so a manual model ID outside the +curated list is accepted. Reasoning effort is validated against `minimal`, `low`, `medium`, `high`, +and `xhigh`, and AGH only forwards it when the resolved curated entry advertises it through +`supports_reasoning` and `reasoning_efforts`. After the session starts, AGH switches to active ACP +`configOptions` (or legacy `session/set_model` when the agent does not advertise config options) and +the catalog metadata stops governing the live session. + Native provider auth state belongs to the provider. Run the provider's own login command, such as `claude auth login`, `codex login`, `opencode auth login`, or Pi's `/login`, outside AGH or through a configured `auth_login_command`. The built-in `pi` provider exposes @@ -124,11 +169,21 @@ agents that omit `model`. ```toml [providers.claude] -default_model = "claude-sonnet-4-6" auth_mode = "native_cli" auth_status_command = "claude auth status" auth_login_command = "claude auth login" +[providers.claude.models] +default = "claude-sonnet-4-6" + +[[providers.claude.models.curated]] +id = "claude-sonnet-4-6" +display_name = "Claude Sonnet 4.6" + +[[providers.claude.models.curated]] +id = "claude-haiku-4-5" +display_name = "Claude Haiku 4.5" + [[providers.claude.mcp_servers]] name = "github" command = "npx" @@ -153,7 +208,13 @@ harness = "pi_acp" auth_mode = "bound_secret" runtime_provider = "openrouter" command = "npx -y pi-acp@latest" -default_model = "openai/gpt-5.4" + +[providers.openrouter.models] +default = "openai/gpt-5.4" + +[[providers.openrouter.models.curated]] +id = "openai/gpt-5.4" +display_name = "OpenAI GPT-5.4" [[providers.openrouter.credential_slots]] name = "api_key" @@ -188,11 +249,17 @@ built-ins. ```toml [providers.local-agent] command = "local-agent --acp --stdio" -default_model = "local-default" auth_mode = "native_cli" auth_status_command = "local-agent auth status" auth_login_command = "local-agent auth login" +[providers.local-agent.models] +default = "local-default" + +[[providers.local-agent.models.curated]] +id = "local-default" +display_name = "Local Default" + [[providers.local-agent.mcp_servers]] name = "filesystem-index" command = "local-index-mcp" @@ -254,7 +321,7 @@ Set environment variables before starting the daemon instead. ## Models and authentication -`model` and `default_model` are resolved and exposed as runtime metadata. Direct `acp` providers +`model` and `models.default` are resolved and exposed as runtime metadata. Direct `acp` providers receive the normal ACP startup flow. Native `pi` sessions receive the resolved `runtime_provider/model` through ACP model selection. Wrapped Pi-backed API-key providers receive session-local Pi `settings.json` and `models.json` so Pi can run with the AGH-selected provider, @@ -327,4 +394,6 @@ agent's role, permissions, MCP servers, and startup prompt in `AGENT.md`. - [Agent Definitions](/runtime/core/agents/definitions) explains the `AGENT.md` fields that reference providers. - [Spawning](/runtime/core/agents/spawning) shows exactly how the resolved provider command becomes a running ACP process. +- [Provider Model Catalog](/runtime/core/agents/model-catalog) covers the daemon-owned catalog, sources, refresh lifecycle, OpenAI-compatible projection, and extension `model.source`. - [CLI agent reference](/runtime/cli-reference/agent) lists the current `agh agent` inspection commands. +- [`agh provider models` CLI](/runtime/cli-reference/provider/models) inspects and refreshes the catalog without a UI. diff --git a/packages/site/content/runtime/core/configuration/agent-md.mdx b/packages/site/content/runtime/core/configuration/agent-md.mdx index bbb0345d6..497a9d6dd 100644 --- a/packages/site/content/runtime/core/configuration/agent-md.mdx +++ b/packages/site/content/runtime/core/configuration/agent-md.mdx @@ -27,7 +27,7 @@ The parser is strict. Unknown frontmatter fields fail loading. | `name` | string | required | Non-empty. Must match the requested agent name when loaded by name. | Agent identity and discovery key. | | `provider` | string | `[defaults].provider` | Built-in provider key or custom provider key. | Provider used to resolve command, model, auth mode, and runtime metadata. | | `command` | string | Provider `command` | Non-empty when overriding. | Agent-specific ACP launch command. | -| `model` | string | Provider `default_model` | Any string. | Agent-specific model metadata. | +| `model` | string | Provider `models.default` | Any string. | Agent-specific model metadata. | | `tools` | string array | empty | Exact canonical ToolIDs or namespace-prefix wildcards. | Additional agent tool allowlist grammar. | | `toolsets` | string array | empty | Canonical ToolsetIDs. | Additional named tool bundles allowed for the agent. | | `deny_tools` | string array | empty | Exact canonical ToolIDs or namespace-prefix wildcards. | Tool denies that always narrow the agent grants. | @@ -80,7 +80,7 @@ name: reviewer # Optional if [defaults].provider is set in config.toml. provider: claude -# Optional. Defaults to the provider default_model. +# Optional. Defaults to the provider models.default. model: claude-sonnet-4-6 # Optional. Add only extra ToolIDs beyond default discovery. @@ -163,7 +163,7 @@ Put blocking findings first and cite the relevant file or symbol. | Attribute | Value | | ------------ | ------------------------------------------------------------ | | Type | string | -| Default | Selected provider `default_model` | +| Default | Selected provider `models.default` | | Required | no | | Valid values | Any string. Empty is allowed if the provider has no default. | | Description | Agent-specific model metadata. | @@ -453,7 +453,7 @@ Sidecar behavior: | Agent name | explicit CLI/API agent name, then `[defaults].agent` | Fails if empty. | | Provider | `AGENT.md` `provider`, then `[defaults].provider` | Fails if still empty. | | Command | `AGENT.md` `command`, then provider `command` | Fails if empty after provider resolution. | -| Model | `AGENT.md` `model`, then provider `default_model` | Empty is allowed. | +| Model | `AGENT.md` `model`, then provider `models.default` | Empty is allowed. | | Tools | `AGENT.md` `tools` | Must be exact canonical ToolIDs or approved namespace-prefix wildcards. | | Toolsets | `AGENT.md` `toolsets` | Must be canonical ToolsetIDs. | | Deny tools | `AGENT.md` `deny_tools` | Same grammar as `tools`; denies narrow later policy evaluation. | diff --git a/packages/site/content/runtime/core/configuration/config-toml.mdx b/packages/site/content/runtime/core/configuration/config-toml.mdx index c3884e6e7..60ac2e156 100644 --- a/packages/site/content/runtime/core/configuration/config-toml.mdx +++ b/packages/site/content/runtime/core/configuration/config-toml.mdx @@ -24,51 +24,52 @@ Use only `[sandboxes.]` for session execution boundaries. ## Quick Reference -| Section | Purpose | Default | -| ------------------------------ | ------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------- | -| `[daemon]` | Unix domain socket path for CLI and UDS API traffic. | `socket = "$AGH_HOME/daemon.sock"` | -| `[http]` | HTTP and SSE bind address. | `host = "localhost"`, `port = 2123` | -| `[defaults]` | Default agent, provider, and sandbox resolution. | `agent = "general"`, `provider = ""`, `sandbox = ""` | -| `[limits]` | Daemon-level session and agent caps. | `max_sessions = 10`, `max_concurrent_agents = 20` | -| `[session.limits]` | Session-scoped wall-clock timeout. | `timeout = "0s"` | -| `[session.supervision]` | Runtime activity heartbeat, progress, warning, and inactivity timeout controls. | heartbeat 30 seconds, progress 10 minutes, warning 15 minutes, timeout 30 minutes | -| `[agents.soul]` | Optional `SOUL.md` parsing, body limits, and compact projection budget. | enabled, 32 KiB body, 2 KiB compact projection | -| `[agents.heartbeat]` | Optional `HEARTBEAT.md` policy bounds, wake cadence/limits, and health timing. | enabled, 32 KiB body, 5 min/30 min intervals, 25 wakes per cycle, 168 h retention | -| `[permissions]` | Default permission mode. | `mode = "approve-all"` | -| `[tools]` | Tool registry lifecycle, hosted MCP enablement, and result budget defaults. | enabled, hosted MCP enabled, 256 KiB result default | -| `[tools.hosted_mcp]` | Hosted MCP session bind nonce lifecycle. | 30 seconds | -| `[tools.policy]` | External tool source defaults, approval timeout, and trusted sources. | external tools disabled, 120 second approval timeout, no trusted sources | -| `[[mcp_servers]]` | Top-level MCP servers passed to agents. | empty list | -| `[providers.]` | Built-in provider override or custom provider definition. | empty map plus built-ins | -| `[sandboxes.]` | Local or provider-backed execution sandbox profiles. | local backend when no profile is selected | -| `[observability]` | Event summary retention and global byte cap. | enabled, 7 days, 1 GiB | -| `[observability.transcripts]` | Transcript segment sizing and per-session cap. | enabled, 1 MiB segments, 256 MiB per session | -| `[log]` | Structured log level. | `level = "info"` | -| `[memory]` | Persistent memory runtime and global memory directory. | enabled, `$AGH_HOME/memory` | -| `[memory.controller]` | Hybrid write controller mode, latency, and fallback op. | hybrid, 300 ms, noop | -| `[memory.controller.llm]` | Controller LLM tiebreaker. | enabled, `anthropic/claude-haiku-4`, 250 ms, top_k 5 | -| `[memory.controller.policy]` | Content/rate caps and allowed write origins. | 4096 chars, 60 writes/min, all canonical origins | -| `[memory.recall]` | Deterministic recall: top-K, weights, freshness, signal queue. | top-K 5, raw 50, weighted fusion | -| `[memory.decisions]` | Decision WAL retention and per-row body cap. | 90 days, audit summary on, 64 KiB body cap | -| `[memory.extractor]` | Post-message extractor and bounded queue. | enabled, post_message mode, capacity 1, coalesce 16 | -| `[memory.dream]` | Dreaming runtime, gates, and scoring. | enabled, agent `dreaming-curator`, 24 h, 3 sessions, 30 min ticker | -| `[memory.session]` | Forensic session ledger materialization, archive, and unbound partition. | jsonl, `$AGH_HOME/sessions`, 24 h grace, 30-day cold archive, `_unbound` partition | -| `[memory.daily]` | Daily-log retention and rotation. | 1 MiB, 5000 lines, 7-day window, 30-day cold archive, sweep at 03:00 | -| `[memory.file]` | Curated memory file body limits. | 200 lines, 25 KiB | -| `[memory.provider]` | Active memory provider selection and circuit breaker. | bundled local, 2 s timeout, 5 failures, 30 s cooldown | -| `[memory.workspace]` | Workspace identity file location and auto-creation. | `/.agh/workspace.toml`, auto-create on first touch | -| `[skills]` | Skill discovery, polling, disable list, and marketplace trust gates. | enabled, poll every 3 seconds | -| `[skills.marketplace]` | Skill registry override. | unset | -| `[extensions.marketplace]` | Extension registry override. | unset | -| `[automation]` | Automation scheduler defaults. | enabled, UTC, 5 concurrent jobs | -| `[[automation.jobs]]` | Scheduled automation jobs. | empty list | -| `[[automation.triggers]]` | Event-driven automation triggers. | empty list | -| `[autonomy.coordinator]` | Coordinator session bootstrap for workspace-scoped task runs. | disabled, agent `coordinator`, TTL 2 hours, 5 children, 1 active per workspace | -| `[task.orchestration]` | Bounds for run summaries, context bundles, scheduler health, and max-runtime. | 4 KiB summaries, 8 KiB context, prior 5/recent 50 events, spawn fail limit 5 | -| `[task.orchestration.profile]` | Defaults and gates for task execution profiles. | inherit coordinator/worker/sandbox; provider override + sandbox `none` allowed | -| `[task.orchestration.review]` | Defaults and bounds for the post-terminal review gate. | policy `none`, max rounds 3, max attempts 2, timeout 20m, failure `block_task` | -| `[[hooks.declarations]]` | Config-defined runtime hooks. | empty list | -| `[network]` | Experimental AGH network runtime. | enabled, channel `default` | +| Section | Purpose | Default | +| ------------------------------------ | ------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------- | +| `[daemon]` | Unix domain socket path for CLI and UDS API traffic. | `socket = "$AGH_HOME/daemon.sock"` | +| `[http]` | HTTP and SSE bind address. | `host = "localhost"`, `port = 2123` | +| `[defaults]` | Default agent, provider, and sandbox resolution. | `agent = "general"`, `provider = ""`, `sandbox = ""` | +| `[limits]` | Daemon-level session and agent caps. | `max_sessions = 10`, `max_concurrent_agents = 20` | +| `[session.limits]` | Session-scoped wall-clock timeout. | `timeout = "0s"` | +| `[session.supervision]` | Runtime activity heartbeat, progress, warning, and inactivity timeout controls. | heartbeat 30 seconds, progress 10 minutes, warning 15 minutes, timeout 30 minutes | +| `[agents.soul]` | Optional `SOUL.md` parsing, body limits, and compact projection budget. | enabled, 32 KiB body, 2 KiB compact projection | +| `[agents.heartbeat]` | Optional `HEARTBEAT.md` policy bounds, wake cadence/limits, and health timing. | enabled, 32 KiB body, 5 min/30 min intervals, 25 wakes per cycle, 168 h retention | +| `[permissions]` | Default permission mode. | `mode = "approve-all"` | +| `[tools]` | Tool registry lifecycle, hosted MCP enablement, and result budget defaults. | enabled, hosted MCP enabled, 256 KiB result default | +| `[tools.hosted_mcp]` | Hosted MCP session bind nonce lifecycle. | 30 seconds | +| `[tools.policy]` | External tool source defaults, approval timeout, and trusted sources. | external tools disabled, 120 second approval timeout, no trusted sources | +| `[[mcp_servers]]` | Top-level MCP servers passed to agents. | empty list | +| `[providers.]` | Built-in provider override or custom provider definition. | empty map plus built-ins | +| `[model_catalog.sources.models_dev]` | `models.dev` enrichment source (cross-provider). | enabled, `https://models.dev/api.json`, 24 h TTL, 10 s timeout | +| `[sandboxes.]` | Local or provider-backed execution sandbox profiles. | local backend when no profile is selected | +| `[observability]` | Event summary retention and global byte cap. | enabled, 7 days, 1 GiB | +| `[observability.transcripts]` | Transcript segment sizing and per-session cap. | enabled, 1 MiB segments, 256 MiB per session | +| `[log]` | Structured log level. | `level = "info"` | +| `[memory]` | Persistent memory runtime and global memory directory. | enabled, `$AGH_HOME/memory` | +| `[memory.controller]` | Hybrid write controller mode, latency, and fallback op. | hybrid, 300 ms, noop | +| `[memory.controller.llm]` | Controller LLM tiebreaker. | enabled, `anthropic/claude-haiku-4`, 250 ms, top_k 5 | +| `[memory.controller.policy]` | Content/rate caps and allowed write origins. | 4096 chars, 60 writes/min, all canonical origins | +| `[memory.recall]` | Deterministic recall: top-K, weights, freshness, signal queue. | top-K 5, raw 50, weighted fusion | +| `[memory.decisions]` | Decision WAL retention and per-row body cap. | 90 days, audit summary on, 64 KiB body cap | +| `[memory.extractor]` | Post-message extractor and bounded queue. | enabled, post_message mode, capacity 1, coalesce 16 | +| `[memory.dream]` | Dreaming runtime, gates, and scoring. | enabled, agent `dreaming-curator`, 24 h, 3 sessions, 30 min ticker | +| `[memory.session]` | Forensic session ledger materialization, archive, and unbound partition. | jsonl, `$AGH_HOME/sessions`, 24 h grace, 30-day cold archive, `_unbound` partition | +| `[memory.daily]` | Daily-log retention and rotation. | 1 MiB, 5000 lines, 7-day window, 30-day cold archive, sweep at 03:00 | +| `[memory.file]` | Curated memory file body limits. | 200 lines, 25 KiB | +| `[memory.provider]` | Active memory provider selection and circuit breaker. | bundled local, 2 s timeout, 5 failures, 30 s cooldown | +| `[memory.workspace]` | Workspace identity file location and auto-creation. | `/.agh/workspace.toml`, auto-create on first touch | +| `[skills]` | Skill discovery, polling, disable list, and marketplace trust gates. | enabled, poll every 3 seconds | +| `[skills.marketplace]` | Skill registry override. | unset | +| `[extensions.marketplace]` | Extension registry override. | unset | +| `[automation]` | Automation scheduler defaults. | enabled, UTC, 5 concurrent jobs | +| `[[automation.jobs]]` | Scheduled automation jobs. | empty list | +| `[[automation.triggers]]` | Event-driven automation triggers. | empty list | +| `[autonomy.coordinator]` | Coordinator session bootstrap for workspace-scoped task runs. | disabled, agent `coordinator`, TTL 2 hours, 5 children, 1 active per workspace | +| `[task.orchestration]` | Bounds for run summaries, context bundles, scheduler health, and max-runtime. | 4 KiB summaries, 8 KiB context, prior 5/recent 50 events, spawn fail limit 5 | +| `[task.orchestration.profile]` | Defaults and gates for task execution profiles. | inherit coordinator/worker/sandbox; provider override + sandbox `none` allowed | +| `[task.orchestration.review]` | Defaults and bounds for the post-terminal review gate. | policy `none`, max rounds 3, max attempts 2, timeout 20m, failure `block_task` | +| `[[hooks.declarations]]` | Config-defined runtime hooks. | empty list | +| `[network]` | Experimental AGH network runtime. | enabled, channel `default` | ## Load And Merge Order @@ -208,13 +209,31 @@ client_secret_ref = "env:REMOTE_DOCS_MCP_CLIENT_SECRET" scopes = ["mcp.read", "mcp.write"] [providers.claude] -# Overrides the built-in Claude provider command/model and records native auth diagnostics. +# Overrides the built-in Claude provider command and records native auth diagnostics. command = "npx -y @agentclientprotocol/claude-agent-acp@latest" -default_model = "claude-sonnet-4-6" auth_mode = "native_cli" auth_status_command = "claude auth status" auth_login_command = "claude auth login" +[providers.claude.models] +# Pre-session catalog defaults consumed by the daemon-owned model catalog. +default = "claude-sonnet-4-6" + +[[providers.claude.models.curated]] +id = "claude-sonnet-4-6" +display_name = "Claude Sonnet 4.6" + +[[providers.claude.models.curated]] +id = "claude-haiku-4-5" +display_name = "Claude Haiku 4.5" + +[model_catalog.sources.models_dev] +# Optional models.dev enrichment source for catalog metadata. +enabled = true +endpoint = "https://models.dev/api.json" +ttl = "24h" +timeout = "10s" + [[providers.claude.mcp_servers]] name = "github" command = "npx" @@ -698,23 +717,67 @@ Provider keys override built-ins with the same name or create a custom provider. `goose`, `hermes`, `junie`, `kimi-cli`, `openclaw`, `openhands`, `qoder`, `qwen-code`, `pi`, `openrouter`, `zai`, `moonshot`, `vercel-ai-gateway`, `xai`, `minimax`, `mistral`, and `groq`. -| Field | Type | Default | Valid values | Description | -| --------------------- | --------------------------- | --------------------------------------------------------------------------------- | ------------------------------------------------- | -------------------------------------------------------------------------------------------- | -| `command` | string | Built-in command or empty for custom providers. | Required after built-in plus override resolution. | ACP launch command for this provider. | -| `display_name` | string | Built-in label or empty. | Any string. | Operator-facing label shown in settings and provider pickers. | -| `default_model` | string | Built-in model or empty. | Any string. | Model used when an agent omits `model`; native `pi` receives it through ACP model selection. | -| `harness` | string | `acp` unless a built-in sets `pi_acp`. | `acp`, `pi_acp`. | Launch strategy. `pi_acp` routes the provider through Pi's ACP adapter. | -| `runtime_provider` | string | Provider key. | Harness-specific provider id. | Downstream provider id used by Pi and other harnesses. | -| `transport` | string | empty. | Harness-specific string. | Optional Pi models override transport/API family. | -| `base_url` | string | empty. | URL string. | Optional Pi models override base URL for custom gateways. | -| `auth_mode` | string | `bound_secret` only when credential slots are configured; otherwise `native_cli`. | `native_cli`, `bound_secret`, `none`. | Declares whether auth belongs to the provider CLI, AGH secret binding, or no auth. | -| `env_policy` | string | `filtered`. | `filtered`, `isolated`. | Controls which daemon environment variables the provider subprocess inherits. | -| `home_policy` | string | `operator`. | `operator`, `isolated`. | Controls whether native CLI state comes from the operator home or an AGH provider home. | -| `auth_status_command` | string | empty. | Shell-style command string. | Optional status probe run by `agh provider auth status `. | -| `auth_login_command` | string | empty. | Shell-style command string. | Optional login command run by `agh provider auth login `. | -| `session_mcp` | boolean | `true` unless a provider disables it. | `true`, `false`. | Enables AGH session MCP injection for providers that support it. | -| `credential_slots` | array | empty. | See below. | Bound secret refs injected into provider subprocess environment variables at launch. | -| `mcp_servers` | array of MCP server objects | empty. | Same shape as `[[mcp_servers]]`. | Provider-specific MCP servers merged after top-level config and before agent MCP servers. | +| Field | Type | Default | Valid values | Description | +| --------------------- | --------------------------- | --------------------------------------------------------------------------------- | ------------------------------------------------- | ----------------------------------------------------------------------------------------- | +| `command` | string | Built-in command or empty for custom providers. | Required after built-in plus override resolution. | ACP launch command for this provider. | +| `display_name` | string | Built-in label or empty. | Any string. | Operator-facing label shown in settings and provider pickers. | +| `models` | table | Built-in defaults or empty. | Nested model config block (see below). | Pre-session model defaults, curated metadata, and optional discovery wiring. | +| `harness` | string | `acp` unless a built-in sets `pi_acp`. | `acp`, `pi_acp`. | Launch strategy. `pi_acp` routes the provider through Pi's ACP adapter. | +| `runtime_provider` | string | Provider key. | Harness-specific provider id. | Downstream provider id used by Pi and other harnesses. | +| `transport` | string | empty. | Harness-specific string. | Optional Pi models override transport/API family. | +| `base_url` | string | empty. | URL string. | Optional Pi models override base URL for custom gateways. | +| `auth_mode` | string | `bound_secret` only when credential slots are configured; otherwise `native_cli`. | `native_cli`, `bound_secret`, `none`. | Declares whether auth belongs to the provider CLI, AGH secret binding, or no auth. | +| `env_policy` | string | `filtered`. | `filtered`, `isolated`. | Controls which daemon environment variables the provider subprocess inherits. | +| `home_policy` | string | `operator`. | `operator`, `isolated`. | Controls whether native CLI state comes from the operator home or an AGH provider home. | +| `auth_status_command` | string | empty. | Shell-style command string. | Optional status probe run by `agh provider auth status `. | +| `auth_login_command` | string | empty. | Shell-style command string. | Optional login command run by `agh provider auth login `. | +| `session_mcp` | boolean | `true` unless a provider disables it. | `true`, `false`. | Enables AGH session MCP injection for providers that support it. | +| `credential_slots` | array | empty. | See below. | Bound secret refs injected into provider subprocess environment variables at launch. | +| `mcp_servers` | array of MCP server objects | empty. | Same shape as `[[mcp_servers]]`. | Provider-specific MCP servers merged after top-level config and before agent MCP servers. | + +The flat keys `default_model`, `supported_models`, and `supports_reasoning_effort` are no longer +accepted. Config that still sets them is rejected with a deterministic hard-cut error citing the +exact path. Move every value into `[providers..models]` below. + +### `[providers..models]` + +Pre-session model defaults and curated metadata are owned by the daemon-owned model catalog. The +catalog merges builtin defaults, the operator config, the optional `models.dev` enrichment source, +live provider discovery sources, and extension model sources, and exposes the result through HTTP, +UDS, CLI, the OpenAI-compatible projection, the Host API, and the web. Active ACP `configOptions` +continue to govern model and reasoning controls inside a running session, so curated entries are +metadata, never an allowlist. + +| Field | Type | Default | Valid values | Description | +| ------------------------------------------- | ------- | ---------------------------------- | ----------------------------------------------------------------------- | ------------------------------------------------------------------------------------------------------------- | +| `models.default` | string | Built-in default or empty. | Any non-blank model id. | Model used when an agent omits `model`. Free-form: it does not need to appear in `models.curated`. | +| `models.curated` | array | empty. | See entry fields below. | Curated model entries projected as `config` rows. Not an allowlist; manual model ids stay valid. | +| `models.curated[].id` | string | required per entry. | Unique provider model id. | Provider model identifier sent to the runtime. | +| `models.curated[].display_name` | string | empty. | Any string. | Optional human label. | +| `models.curated[].context_window` | integer | empty. | Positive integer. | Context window in tokens. | +| `models.curated[].max_input_tokens` | integer | empty. | Positive integer. | Maximum input tokens. | +| `models.curated[].max_output_tokens` | integer | empty. | Positive integer. | Maximum output tokens. | +| `models.curated[].supports_tools` | boolean | empty. | `true`, `false`. | Whether the model supports tool calls. | +| `models.curated[].supports_reasoning` | boolean | empty. | `true`, `false`. | Whether the model supports reasoning effort. | +| `models.curated[].reasoning_efforts` | array | empty. | Subset of `minimal`, `low`, `medium`, `high`, `xhigh`. Blanks rejected. | Allowed reasoning levels for this model. | +| `models.curated[].default_reasoning_effort` | string | empty. | Member of `reasoning_efforts` when both are set. | Per-model default reasoning level. Must appear in `reasoning_efforts` when both are set. | +| `models.curated[].cost_input_per_million` | number | empty. | Non-negative number. | Display-only input cost per million tokens. | +| `models.curated[].cost_output_per_million` | number | empty. | Non-negative number. | Display-only output cost per million tokens. | +| `models.discovery.enabled` | boolean | `false` unless a built-in opts in. | `true`, `false`. | Enables the side-effect-free model discovery adapter for this provider. | +| `models.discovery.command` | string | empty. | Shell-style command string. | Side-effect-free discovery command. Mutually exclusive with `endpoint` unless the adapter documents both. | +| `models.discovery.endpoint` | string | empty. | Absolute HTTP(S) URL. | Side-effect-free discovery endpoint URL. Mutually exclusive with `command` unless the adapter documents both. | +| `models.discovery.timeout` | string | model catalog source timeout. | Positive duration (`10s`, `45s`, `2m`). | Per-discovery timeout. | + +Discovery adapters use the resolved provider auth, env, and home policy. Discovery never creates an +ACP session; if discovery is unavailable or fails, the catalog records source status and falls back +to stale or lower-priority rows. Session creation never depends on a successful discovery refresh. + +`OpenClaw`, `Hermes`, and `Pi` only register a live provider source when `models.discovery.enabled += true` and either `command` or `endpoint` is set; with no discovery wiring, those providers stay +on builtin/config rows plus the optional `models.dev` enrichment. + +Top-level `[model_catalog.sources.models_dev]` controls the cross-provider `models.dev` enrichment +source documented in the next section. `env_policy = "filtered"` preserves ordinary operator context such as `PATH`, `HOME`, and locale while stripping secret-shaped daemon variables before launch. `env_policy = "isolated"` starts from @@ -750,7 +813,7 @@ direct `pi` provider use `native_cli` by default and do not preflight provider A API-key wrappers such as OpenRouter, z.ai, Moonshot/Kimi, Vercel AI Gateway, xAI, MiniMax, Mistral, and Groq default to AGH-managed `bound_secret` slots while AGH runs Pi under the hood. -| Built-in | Harness | Runtime provider | Default model | Auth mode | Credential target | +| Built-in | Harness | Runtime provider | `models.default` | Auth mode | Credential target | | ------------------- | -------- | ------------------- | --------------------------- | -------------- | -------------------- | | `claude` | `acp` | `claude` | `claude-sonnet-4-6` | `native_cli` | provider login | | `codex` | `acp` | `codex` | `gpt-5.4` | `native_cli` | provider login | @@ -779,6 +842,36 @@ and Groq default to AGH-managed `bound_secret` slots while AGH runs Pi under the | `mistral` | `pi_acp` | `mistral` | `devstral-medium-latest` | `bound_secret` | `MISTRAL_API_KEY` | | `groq` | `pi_acp` | `groq` | `openai/gpt-oss-120b` | `bound_secret` | `GROQ_API_KEY` | +## `[model_catalog.sources.models_dev]` + +The `models.dev` enrichment source feeds catalog metadata such as token windows, tool support, and +reasoning hints. It is cross-provider, daemon-owned, and never proves account-level availability. + +| Field | Type | Default | Valid values | Description | +| ---------- | -------- | ----------------------------- | --------------------- | ------------------------------------------------------------------------------------------------------------------------ | +| `enabled` | boolean | `true` | `true`, `false` | Toggles the `models.dev` source. When `false`, the source still appears in status but performs no outbound calls. | +| `endpoint` | string | `https://models.dev/api.json` | Absolute HTTP(S) URL. | Endpoint queried for the cross-provider model index. Must be HTTP(S) when set. | +| `ttl` | duration | `24h` | Positive Go duration. | Cache lifetime before catalog rows are flagged stale and a refresh is scheduled. | +| `timeout` | duration | `10s` | Positive Go duration. | Per-call HTTP timeout for the source. The HTTP client always uses an explicit deadline (no shared `http.DefaultClient`). | + +```toml +[model_catalog.sources.models_dev] +enabled = true +endpoint = "https://models.dev/api.json" +ttl = "24h" +timeout = "10s" +``` + +`models.dev` rows write provider-scoped status: a single `models.dev` refresh records one +`(source_id="models_dev", provider_id)` status row per AGH provider mapped, never a global +empty-provider sentinel. Disabled sources still expose status but skip outbound calls. Refresh +lifetime is daemon-owned: requests trigger refreshes but the daemon detaches the work, applies an +explicit deadline, and joins outstanding refresh workers during shutdown. + +Live provider discovery is configured per provider through `[providers..models.discovery]`. +Discovery refresh is serialized per `provider_id` before any subprocess or provider-home work, and +concurrent refresh requests for the same provider coalesce behind the in-flight refresh. + ## `[observability]` | Field | Type | Default | Valid values | Description | @@ -1265,4 +1358,6 @@ context.post_compact - [mcp.json](/runtime/core/configuration/mcp-json) documents JSON sidecars and whole-object replacement. - [AGENT.md](/runtime/core/configuration/agent-md) documents agent-local `mcp_servers` and hooks. +- [Providers](/runtime/core/agents/providers) covers `[providers..models]` shape and `models.discovery` defaults. +- [Provider Model Catalog](/runtime/core/agents/model-catalog) explains the catalog projection, OpenAI-compatible endpoint, and extension `model.source`. - [File Locations](/runtime/core/configuration/file-locations) lists the exact global and workspace paths. diff --git a/packages/site/content/runtime/core/extensions/develop.mdx b/packages/site/content/runtime/core/extensions/develop.mdx index 96a8f4883..0e9af0ab0 100644 --- a/packages/site/content/runtime/core/extensions/develop.mdx +++ b/packages/site/content/runtime/core/extensions/develop.mdx @@ -213,6 +213,7 @@ Important current provide surfaces: | ---------------- | ------------------------------------------------ | | `memory.backend` | `memory/store`, `memory/recall`, `memory/forget` | | `bridge.adapter` | `bridges/deliver` | +| `model.source` | `models/list` | `bridge.adapter` extensions must also declare bridge metadata: @@ -226,7 +227,59 @@ display_name = "Slack" ``` Marketplace extensions run under a stricter policy. They are constrained to read-oriented grants: -`memory.read`, `observe.read`, `session.read`, `skills.read`, and `tool.read`. +`memory.read`, `model.read`, `observe.read`, `session.read`, `skills.read`, and `tool.read`. + +### Model Source Extensions + +Extensions that declare the provide capability `model.source` enrich the daemon-owned provider +model catalog. The daemon owns persistence and merge, so the extension only contributes source +rows; it cannot rewrite global catalog state. + +```toml +[capabilities] +provides = ["model.source"] + +[actions] +requires = ["models/list", "models/refresh", "models/status"] + +[security] +capabilities = ["model.read", "model.write"] + +[subprocess] +command = "node" +args = ["dist/index.js"] +``` + +The extension service method `models/list` is dispatched by AGH whenever the catalog refreshes the +`extension:` source for a provider the extension declares. The slug is derived from the +extension name and must match `^[a-z0-9][a-z0-9_-]*$`; manifests that do not normalize cleanly are +rejected at install time. + +| Direction | Method | Purpose | +| --------------- | ---------------- | ---------------------------------------------------------------------------- | +| AGH → extension | `models/list` | Returns provider model rows for the extension's declared providers. | +| Host API call | `models/list` | Reads the daemon-owned merged catalog projection, scoped by capability. | +| Host API call | `models/refresh` | Triggers a daemon-owned source refresh; serialized per provider. | +| Host API call | `models/status` | Reads daemon-owned source status, including `last_refresh` and `last_error`. | + +Capability areas align with the Host API authorization layer: + +| Method | Area | Default in marketplace policy | +| ---------------- | ------------- | ------------------------------------------ | +| `models/list` | `model.read` | Allowed (read-oriented grant). | +| `models/status` | `model.read` | Allowed (read-oriented grant). | +| `models/refresh` | `model.write` | Requires explicit grant; marketplace gate. | + +Extensions return validated rows. Invalid rows produce a recorded source status (with redacted +`last_error`) instead of corrupting the merged projection. Refresh runs under a daemon-enforced +deadline using the provider's auth/env/home policy, and refresh work for the same provider is +coalesced — concurrent refreshes return identical source statuses when the in-flight refresh +finishes. + +Generated TypeScript SDK and Go SDK helpers (`ProviderModel*`, `ModelCatalogSource*`, +`ModelSource*`) are published from the same OpenAPI/contract source as the daemon. Extensions +should depend on those helpers instead of hand-rolling JSON shapes so contract drift is caught at +typecheck time. ### Authored Context Host API diff --git a/packages/site/lib/__tests__/provider-model-catalog-docs.test.ts b/packages/site/lib/__tests__/provider-model-catalog-docs.test.ts new file mode 100644 index 000000000..c4e8e4722 --- /dev/null +++ b/packages/site/lib/__tests__/provider-model-catalog-docs.test.ts @@ -0,0 +1,134 @@ +import { readFileSync } from "node:fs"; +import { dirname, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +import { describe, expect, it } from "vitest"; + +const siteRoot = resolve(dirname(fileURLToPath(import.meta.url)), "..", ".."); +const runtimeRoot = resolve(siteRoot, "content/runtime"); + +const providersDoc = resolve(runtimeRoot, "core/agents/providers.mdx"); +const modelCatalogDoc = resolve(runtimeRoot, "core/agents/model-catalog.mdx"); +const configTomlDoc = resolve(runtimeRoot, "core/configuration/config-toml.mdx"); +const developExtensionsDoc = resolve(runtimeRoot, "core/extensions/develop.mdx"); +const cliProviderModelsIndex = resolve(runtimeRoot, "cli-reference/provider/models/index.mdx"); +const cliProviderModelsList = resolve(runtimeRoot, "cli-reference/provider/models/list.mdx"); +const cliProviderModelsRefresh = resolve(runtimeRoot, "cli-reference/provider/models/refresh.mdx"); +const cliProviderModelsStatus = resolve(runtimeRoot, "cli-reference/provider/models/status.mdx"); + +function read(path: string): string { + return readFileSync(path, "utf8"); +} + +function nonHardCutMatches(source: string, pattern: RegExp): string[] { + return source.split(/\r?\n/).flatMap(line => { + if ( + line.match(/no longer|hard-cut|rejected with|deterministic hard-cut|are rejected|reject the/) + ) { + return []; + } + return line.match(pattern) ? [line] : []; + }); +} + +describe("provider model catalog docs", () => { + it("removes old provider model field claims from the providers doc", () => { + const source = read(providersDoc); + const offending = nonHardCutMatches( + source, + /\b(default_model|supported_models|supports_reasoning_effort)\b/ + ); + expect(offending).toEqual([]); + }); + + it("removes old provider model field claims from config.toml docs", () => { + const source = read(configTomlDoc); + const offending = nonHardCutMatches( + source, + /\b(default_model|supported_models|supports_reasoning_effort)\b/ + ); + expect(offending).toEqual([]); + }); + + it("documents the nested provider models block in the providers doc", () => { + const source = read(providersDoc); + expect(source).toContain("[providers..models]"); + expect(source).toContain("models.default"); + expect(source).toContain("models.curated"); + expect(source).toContain("models.discovery"); + }); + + it("shows nested provider models examples only in the providers doc", () => { + const source = read(providersDoc); + expect(source).toContain("[providers.claude.models]"); + expect(source).toContain("[[providers.claude.models.curated]]"); + expect(source).toContain("[providers.openrouter.models]"); + }); + + it("documents [model_catalog.sources.models_dev] in config.toml", () => { + const source = read(configTomlDoc); + expect(source).toContain("[model_catalog.sources.models_dev]"); + expect(source).toContain("https://models.dev/api.json"); + expect(source).toContain("ttl"); + expect(source).toContain("timeout"); + }); + + it("documents provider models.discovery keys in config.toml", () => { + const source = read(configTomlDoc); + expect(source).toContain("models.discovery.enabled"); + expect(source).toContain("models.discovery.command"); + expect(source).toContain("models.discovery.endpoint"); + expect(source).toContain("models.discovery.timeout"); + }); + + it("documents native model catalog endpoints", () => { + const source = read(modelCatalogDoc); + expect(source).toContain("/api/providers/models"); + expect(source).toContain("/api/providers/{provider_id}/models"); + expect(source).toContain("/api/providers/models/refresh"); + expect(source).toContain("/api/providers/models/status"); + }); + + it("documents the OpenAI-compatible /api/openai/v1/models projection", () => { + const source = read(modelCatalogDoc); + expect(source).toContain("/api/openai/v1/models"); + expect(source).toContain("availability_state"); + expect(source).toContain("HTTP only"); + }); + + it("documents the daemon-owned refresh lifetime and serialization rules", () => { + const source = read(modelCatalogDoc); + expect(source).toContain("context.WithoutCancel"); + expect(source).toContain("serialized"); + expect(source).toContain("coalesce"); + expect(source).toContain("refresh_request_id"); + }); + + it("documents the model.source extension contract", () => { + const source = read(developExtensionsDoc); + expect(source).toContain("model.source"); + expect(source).toContain("models/list"); + expect(source).toContain("models/refresh"); + expect(source).toContain("models/status"); + expect(source).toContain("model.read"); + expect(source).toContain("model.write"); + }); + + it("includes the regenerated provider models CLI reference", () => { + const indexSource = read(cliProviderModelsIndex); + expect(indexSource).toContain("agh provider models"); + expect(indexSource).toContain("/runtime/cli-reference/provider/models/list"); + expect(indexSource).toContain("/runtime/cli-reference/provider/models/refresh"); + expect(indexSource).toContain("/runtime/cli-reference/provider/models/status"); + + expect(read(cliProviderModelsList)).toContain("agh provider models list"); + expect(read(cliProviderModelsRefresh)).toContain("agh provider models refresh"); + expect(read(cliProviderModelsStatus)).toContain("agh provider models status"); + }); + + it("explains the agh provider models namespace choice in the model catalog doc", () => { + const source = read(modelCatalogDoc); + expect(source).toContain("agh provider models"); + expect(source).toContain("agh models"); + expect(source).toContain("out of scope"); + }); +}); From 7566e79d4aa80f2721a9e5491d502e73d67a7d2e Mon Sep 17 00:00:00 2001 From: Pedro Nauck Date: Thu, 7 May 2026 10:12:36 -0300 Subject: [PATCH 11/13] test: harden provider model catalog regressions --- internal/api/core/model_catalog.go | 2 +- .../api/core/model_catalog_conversions.go | 8 +- internal/api/core/model_catalog_test.go | 50 +++- .../api/testutil/model_catalog_parity_test.go | 39 +++ internal/daemon/model_catalog.go | 2 +- internal/daemon/model_catalog_test.go | 73 +++++- internal/extension/host_api_models.go | 10 +- internal/extension/host_api_models_test.go | 64 ++++- internal/modelcatalog/hardcut_residue_test.go | 164 ++++++++++++ internal/modelcatalog/redact.go | 5 +- internal/modelcatalog/redact_test.go | 51 ++++ .../modelcatalog/service_integration_test.go | 240 ++++++++++++++++++ internal/modelcatalog/service_test.go | 240 ++++++++++++++++++ .../acpmock/cmd/acpmock-driver/main.go | 124 ++++++++- .../acpmock/cmd/acpmock-driver/main_test.go | 204 ++++++++++----- internal/testutil/acpmock/fixture.go | 70 ++++- .../browser_session_lifecycle_fixture.json | 36 +++ .../cli-reference/agent/heartbeat/index.mdx | 19 +- .../runtime/cli-reference/agent/index.mdx | 11 +- .../cli-reference/agent/soul/index.mdx | 15 +- .../content/runtime/cli-reference/agh.mdx | 64 ++--- .../cli-reference/automation/index.mdx | 10 +- .../cli-reference/automation/jobs/index.mdx | 14 +- .../cli-reference/automation/runs/index.mdx | 4 +- .../automation/triggers/index.mdx | 12 +- .../runtime/cli-reference/bridge/index.mdx | 25 +- .../bridge/secret-bindings/index.mdx | 10 +- .../runtime/cli-reference/bundle/index.mdx | 21 +- .../runtime/cli-reference/ch/index.mdx | 12 +- .../runtime/cli-reference/completion/bash.mdx | 4 +- .../runtime/cli-reference/completion/fish.mdx | 3 +- .../cli-reference/completion/index.mdx | 12 +- .../cli-reference/completion/powershell.mdx | 2 +- .../runtime/cli-reference/completion/zsh.mdx | 7 +- .../runtime/cli-reference/config/index.mdx | 21 +- .../runtime/cli-reference/daemon/index.mdx | 11 +- .../runtime/cli-reference/extension/index.mdx | 19 +- .../runtime/cli-reference/hooks/index.mdx | 13 +- .../runtime/cli-reference/mcp/auth/index.mdx | 9 +- .../runtime/cli-reference/mcp/index.mdx | 5 +- .../runtime/cli-reference/me/index.mdx | 4 +- .../cli-reference/memory/adhoc/index.mdx | 5 +- .../cli-reference/memory/daily/index.mdx | 15 +- .../cli-reference/memory/decisions/index.mdx | 9 +- .../cli-reference/memory/dream/index.mdx | 13 +- .../cli-reference/memory/extractor/index.mdx | 13 +- .../runtime/cli-reference/memory/index.mdx | 45 ++-- .../cli-reference/memory/provider/index.mdx | 11 +- .../cli-reference/memory/recall/index.mdx | 5 +- .../cli-reference/network/directs/index.mdx | 13 +- .../runtime/cli-reference/network/index.mdx | 21 +- .../cli-reference/network/threads/index.mdx | 9 +- .../cli-reference/network/work/index.mdx | 5 +- .../runtime/cli-reference/observe/index.mdx | 7 +- .../cli-reference/provider/auth/index.mdx | 9 +- .../runtime/cli-reference/provider/index.mdx | 9 +- .../cli-reference/provider/models/index.mdx | 11 +- .../runtime/cli-reference/resource/index.mdx | 12 +- .../runtime/cli-reference/session/index.mdx | 31 ++- .../cli-reference/session/soul/index.mdx | 5 +- .../runtime/cli-reference/skill/index.mdx | 25 +- .../cli-reference/task/child/index.mdx | 4 +- .../cli-reference/task/dependency/index.mdx | 6 +- .../runtime/cli-reference/task/index.mdx | 46 ++-- .../cli-reference/task/notification/index.mdx | 12 +- .../cli-reference/task/profile/index.mdx | 10 +- .../cli-reference/task/review/index.mdx | 12 +- .../runtime/cli-reference/task/run/index.mdx | 18 +- .../runtime/cli-reference/tool/index.mdx | 13 +- .../runtime/cli-reference/toolsets/index.mdx | 7 +- .../runtime/cli-reference/vault/index.mdx | 13 +- .../runtime/cli-reference/workspace/index.mdx | 15 +- .../session-provider-override.spec.ts | 22 +- .../fixtures/__tests__/runtime-seed.test.ts | 34 ++- 74 files changed, 1693 insertions(+), 501 deletions(-) create mode 100644 internal/modelcatalog/hardcut_residue_test.go create mode 100644 internal/modelcatalog/redact_test.go diff --git a/internal/api/core/model_catalog.go b/internal/api/core/model_catalog.go index d3140b033..c6fa5143f 100644 --- a/internal/api/core/model_catalog.go +++ b/internal/api/core/model_catalog.go @@ -129,7 +129,7 @@ func (h *BaseHandlers) refreshProviderModels(c *gin.Context, providerParam strin if err != nil { status := StatusForModelCatalogError(err) if len(payload.Sources) > 0 { - payload.Error = err.Error() + payload.Error = modelcatalog.RedactString(err.Error()) c.JSON(status, payload) return } diff --git a/internal/api/core/model_catalog_conversions.go b/internal/api/core/model_catalog_conversions.go index 3b8c271c3..661566981 100644 --- a/internal/api/core/model_catalog_conversions.go +++ b/internal/api/core/model_catalog_conversions.go @@ -34,7 +34,7 @@ func ProviderModelPayloadFromModel(model modelcatalog.Model) contract.ProviderMo SupportsReasoning: model.SupportsReasoning, ReasoningEfforts: reasoningEffortStrings(model.ReasoningEfforts), DefaultReasoningEffort: reasoningEffortStringPtr(model.DefaultReasoningEffort), - LastError: model.LastError, + LastError: modelcatalog.RedactString(model.LastError), } if model.CostInputPerMillion != nil || model.CostOutputPerMillion != nil { payload.Cost = &contract.ModelCatalogCostPayload{ @@ -54,7 +54,7 @@ func SourceRefPayloadsFromRefs(refs []modelcatalog.SourceRef) []contract.ModelCa Priority: ref.Priority, RefreshedAt: modelCatalogTimeString(ref.RefreshedAt), Stale: ref.Stale, - LastError: ref.LastError, + LastError: modelcatalog.RedactString(ref.LastError), }) } return payloads @@ -73,7 +73,7 @@ func SourceStatusPayloadsFromStatuses( LastRefresh: modelCatalogTimeString(status.LastRefresh), NextRefresh: modelCatalogTimeString(status.NextRefresh), LastSuccess: modelCatalogTimeString(status.LastSuccess), - LastError: status.LastError, + LastError: modelcatalog.RedactString(status.LastError), RefreshState: status.RefreshState, RowCount: status.RowCount, Stale: status.Stale, @@ -116,7 +116,7 @@ func OpenAIModelPayloadFromModel(model modelcatalog.Model) contract.OpenAIModelP ReasoningEfforts: reasoningEffortStrings(model.ReasoningEfforts), DefaultReasoningEffort: reasoningEffortStringPtr(model.DefaultReasoningEffort), Cost: costPayloadFromModel(model), - LastError: model.LastError, + LastError: modelcatalog.RedactString(model.LastError), }, } } diff --git a/internal/api/core/model_catalog_test.go b/internal/api/core/model_catalog_test.go index 0ab686070..9b2bd9dec 100644 --- a/internal/api/core/model_catalog_test.go +++ b/internal/api/core/model_catalog_test.go @@ -4,6 +4,7 @@ import ( "context" "encoding/json" "errors" + "fmt" "net/http" "net/http/httptest" "strings" @@ -18,7 +19,7 @@ import ( func TestBaseHandlersModelCatalogDependency(t *testing.T) { t.Parallel() - t.Run("ShouldCarryModelCatalogServiceFromConfig", func(t *testing.T) { + t.Run("Should carry model catalog service from config", func(t *testing.T) { t.Parallel() service := coreModelCatalogServiceStub{} @@ -80,6 +81,35 @@ func TestProviderModelPayloadConversion(t *testing.T) { t.Fatalf("payload JSON = %s, want nullable available field", encoded) } }) + + t.Run("Should redact source errors in native and OpenAI projections", func(t *testing.T) { + t.Parallel() + + model := seedModelCatalogModel("codex", "gpt-5.4") + model.LastError = "provider failed with api_key=sk-native-secret-token" + model.Sources[0].LastError = "source failed with OAUTH_TOKEN=oauth-secret-token" + + nativePayload := ProviderModelPayloadFromModel(model) + assertRedactedModelCatalogPayload(t, nativePayload.LastError, "sk-native-secret-token") + assertRedactedModelCatalogPayload(t, nativePayload.Sources[0].LastError, "oauth-secret-token") + + openAIPayload := OpenAIModelPayloadFromModel(model) + assertRedactedModelCatalogPayload(t, openAIPayload.AGH.LastError, "sk-native-secret-token") + + statusPayloads := SourceStatusPayloadsFromStatuses([]modelcatalog.SourceStatus{ + { + SourceID: modelcatalog.SourceIDModelsDev, + SourceKind: modelcatalog.SourceKindModelsDev, + ProviderID: "codex", + RefreshState: string(modelcatalog.RefreshStateFailed), + LastError: "models.dev failed with Bearer ya29.api-secret-token", + }, + }) + if got, want := len(statusPayloads), 1; got != want { + t.Fatalf("len(statusPayloads) = %d, want %d", got, want) + } + assertRedactedModelCatalogPayload(t, statusPayloads[0].LastError, "ya29.api-secret-token") + }) } func TestProviderModelCatalogHandlers(t *testing.T) { @@ -140,6 +170,7 @@ func TestProviderModelCatalogHandlers(t *testing.T) { t.Run("Should return source statuses when refresh fails", func(t *testing.T) { t.Parallel() + secret := "sk-refresh-secret-token" service := &modelCatalogServiceSpy{ refreshFn: func(_ context.Context, _ modelcatalog.RefreshOptions) ([]modelcatalog.SourceStatus, error) { return []modelcatalog.SourceStatus{ @@ -148,10 +179,10 @@ func TestProviderModelCatalogHandlers(t *testing.T) { SourceKind: modelcatalog.SourceKindConfig, ProviderID: "codex", RefreshState: string(modelcatalog.RefreshStateFailed), - LastError: "config source failed", + LastError: "config source failed with api_key=" + secret, Stale: true, }, - }, modelcatalog.ErrAllSourcesFailed + }, fmt.Errorf("%w: api_key=%s", modelcatalog.ErrAllSourcesFailed, secret) }, } engine := newModelCatalogCoreEngine(t, service) @@ -168,6 +199,8 @@ func TestProviderModelCatalogHandlers(t *testing.T) { if payload.Error == "" { t.Fatalf("payload.Error = empty, want refresh error") } + assertRedactedModelCatalogPayload(t, payload.Error, secret) + assertRedactedModelCatalogPayload(t, payload.Sources[0].LastError, secret) }) } @@ -339,6 +372,17 @@ func seedModelCatalogModel(providerID string, modelID string) modelcatalog.Model } } +func assertRedactedModelCatalogPayload(t *testing.T, value string, secret string) { + t.Helper() + + if strings.Contains(value, secret) { + t.Fatalf("payload value = %q, want secret redacted", value) + } + if !strings.Contains(value, "[REDACTED]") { + t.Fatalf("payload value = %q, want redaction marker", value) + } +} + func boolPtr(value bool) *bool { return &value } diff --git a/internal/api/testutil/model_catalog_parity_test.go b/internal/api/testutil/model_catalog_parity_test.go index 5b661be2f..24d117e17 100644 --- a/internal/api/testutil/model_catalog_parity_test.go +++ b/internal/api/testutil/model_catalog_parity_test.go @@ -2,6 +2,7 @@ package testutil_test import ( "context" + "encoding/json" "io" "log/slog" "net/http" @@ -11,9 +12,11 @@ import ( "time" "github.com/gin-gonic/gin" + "github.com/pedronauck/agh/internal/api/contract" "github.com/pedronauck/agh/internal/api/httpapi" "github.com/pedronauck/agh/internal/api/testutil" "github.com/pedronauck/agh/internal/api/udsapi" + "github.com/pedronauck/agh/internal/cli" aghconfig "github.com/pedronauck/agh/internal/config" "github.com/pedronauck/agh/internal/modelcatalog" ) @@ -44,6 +47,42 @@ func TestModelCatalogTransportParity(t *testing.T) { if got, want := httpResp.Body.String(), udsResp.Body.String(); got != want { t.Fatalf("HTTP body = %s, want UDS body %s", got, want) } + var cliRecord cli.ProviderModelListRecord + if err := json.Unmarshal(httpResp.Body.Bytes(), &cliRecord); err != nil { + t.Fatalf("json.Unmarshal(HTTP body as CLI record) error = %v", err) + } + cliJSON, err := json.Marshal(cliRecord) + if err != nil { + t.Fatalf("json.Marshal(CLI record) error = %v", err) + } + if got, want := string(cliJSON), httpResp.Body.String(); got != want { + t.Fatalf("CLI JSON = %s, want canonical native body %s", got, want) + } + + openAIResp := performParityRequest( + t, + httpEngine, + http.MethodGet, + "/api/openai/v1/models?provider_id=codex", + ) + if openAIResp.Code != http.StatusOK { + t.Fatalf("OpenAI status = %d, want 200; body=%s", openAIResp.Code, openAIResp.Body.String()) + } + var openAIPayload contract.OpenAIModelListResponse + if err := json.Unmarshal(openAIResp.Body.Bytes(), &openAIPayload); err != nil { + t.Fatalf("json.Unmarshal(OpenAI body) error = %v", err) + } + if len(openAIPayload.Data) != 1 { + t.Fatalf("OpenAI data = %#v, want one model", openAIPayload.Data) + } + openAIModel := openAIPayload.Data[0] + nativeModel := cliRecord.Models[0] + if openAIModel.ID != nativeModel.ModelID || + openAIModel.OwnedBy != nativeModel.ProviderID || + openAIModel.AGH.ProviderID != nativeModel.ProviderID || + openAIModel.AGH.ModelID != nativeModel.ModelID { + t.Fatalf("OpenAI model = %#v, want native catalog identity %#v", openAIModel, nativeModel) + } }) } diff --git a/internal/daemon/model_catalog.go b/internal/daemon/model_catalog.go index e7bc76a9b..f3d1c757f 100644 --- a/internal/daemon/model_catalog.go +++ b/internal/daemon/model_catalog.go @@ -131,7 +131,7 @@ func (r *modelCatalogRuntime) Refresh( } refreshCtx := context.WithoutCancel(ctx) - refreshCtx, cancel := context.WithDeadline(refreshCtx, runtimeNow.Add(r.timeout)) + refreshCtx, cancel := context.WithTimeout(refreshCtx, r.timeout) resultCh := make(chan modelCatalogRefreshResult, 1) r.wg.Go(func() { diff --git a/internal/daemon/model_catalog_test.go b/internal/daemon/model_catalog_test.go index 7d88c6974..07232b566 100644 --- a/internal/daemon/model_catalog_test.go +++ b/internal/daemon/model_catalog_test.go @@ -1,8 +1,11 @@ package daemon import ( + "bytes" "context" "errors" + "log/slog" + "strings" "sync" "testing" "time" @@ -15,7 +18,7 @@ import ( func TestDaemonModelCatalogWiring(t *testing.T) { t.Parallel() - t.Run("ShouldComposeCatalogServiceWhenGlobalDBAndConfigAreAvailable", func(t *testing.T) { + t.Run("Should compose catalog service when global DB and config are available", func(t *testing.T) { t.Parallel() daemonInstance, httpDeps, udsDeps := bootModelCatalogTestDaemon(t, nil) @@ -46,7 +49,7 @@ func TestDaemonModelCatalogWiring(t *testing.T) { } }) - t.Run("ShouldRecordLiveSourceStatusWhenOptionalDependencyIsMissing", func(t *testing.T) { + t.Run("Should record live source status when optional dependency is missing", func(t *testing.T) { t.Parallel() daemonInstance, _, _ := bootModelCatalogTestDaemon(t, nil) @@ -76,7 +79,7 @@ func TestDaemonModelCatalogWiring(t *testing.T) { } }) - t.Run("ShouldCancelAndJoinRefreshWorkOnShutdown", func(t *testing.T) { + t.Run("Should cancel and join refresh work on shutdown", func(t *testing.T) { t.Parallel() service := newBlockingModelCatalogService() @@ -124,7 +127,7 @@ func TestDaemonModelCatalogWiring(t *testing.T) { waitForCatalogTestSignal(t, service.released, "refresh release") }) - t.Run("ShouldReturnShutdownDeadlineWhenRefreshWorkerDoesNotStopInTime", func(t *testing.T) { + t.Run("Should return shutdown deadline when refresh worker does not stop in time", func(t *testing.T) { t.Parallel() service := newManuallyReleasedModelCatalogService() @@ -163,7 +166,59 @@ func TestDaemonModelCatalogWiring(t *testing.T) { waitForCatalogTestSignal(t, service.released, "manual refresh release") }) - t.Run("ShouldRefreshBeforeListingWhenListRequestsRefresh", func(t *testing.T) { + t.Run("Should apply runtime timeout to detached refresh work", func(t *testing.T) { + t.Parallel() + + service := newBlockingModelCatalogService() + runtime, err := newModelCatalogRuntime( + testutil.Context(t), + service, + discardLogger(), + func() time.Time { + return time.Date(2026, 5, 7, 12, 0, 0, 0, time.UTC) + }, + 20*time.Millisecond, + ) + if err != nil { + t.Fatalf("newModelCatalogRuntime() error = %v", err) + } + + _, err = runtime.Refresh(testutil.Context(t), modelcatalog.RefreshOptions{ + ProviderID: "codex", + SourceID: modelcatalog.SourceIDBuiltin, + Force: true, + }) + if !errors.Is(err, context.DeadlineExceeded) { + t.Fatalf("Refresh(timeout) error = %v, want context.DeadlineExceeded", err) + } + waitForCatalogTestSignal(t, service.released, "timed refresh release") + }) + + t.Run("Should redact source errors in refresh logs", func(t *testing.T) { + t.Parallel() + + var logs bytes.Buffer + runtime := &modelCatalogRuntime{ + logger: slog.New(slog.NewTextHandler(&logs, nil)), + } + runtime.logRefreshFailure( + modelcatalog.RefreshOptions{ + ProviderID: "codex", + SourceID: modelcatalog.SourceIDModelsDev, + RequestID: "req-redaction", + }, + errors.New("source failed with api_key=sk-super-secret-token-123"), + ) + output := logs.String() + if strings.Contains(output, "sk-super-secret-token-123") { + t.Fatalf("log output = %q, want secret redacted", output) + } + if !strings.Contains(output, "[REDACTED]") { + t.Fatalf("log output = %q, want redaction marker", output) + } + }) + + t.Run("Should refresh before listing when list requests refresh", func(t *testing.T) { t.Parallel() service := &recordingModelCatalogService{ @@ -206,7 +261,7 @@ func TestDaemonModelCatalogWiring(t *testing.T) { } }) - t.Run("ShouldValidateRuntimeDependencies", func(t *testing.T) { + t.Run("Should validate runtime dependencies", func(t *testing.T) { t.Parallel() if _, err := newModelCatalogRuntime(testutil.Context(t), nil, nil, nil, 0); err == nil { @@ -244,7 +299,7 @@ func TestDaemonModelCatalogWiring(t *testing.T) { } }) - t.Run("ShouldDisableCatalogWhenRegistryDoesNotExposeStore", func(t *testing.T) { + t.Run("Should disable catalog when registry does not expose store", func(t *testing.T) { t.Parallel() homePaths := testHomePaths(t) @@ -263,7 +318,7 @@ func TestDaemonModelCatalogWiring(t *testing.T) { } }) - t.Run("ShouldRejectInvalidTimeoutsDuringCatalogBoot", func(t *testing.T) { + t.Run("Should reject invalid timeouts during catalog boot", func(t *testing.T) { t.Parallel() homePaths := testHomePaths(t) @@ -288,7 +343,7 @@ func TestDaemonModelCatalogWiring(t *testing.T) { } }) - t.Run("ShouldUseEnvSecretResolverWhenVaultUnavailable", func(t *testing.T) { + t.Run("Should use env secret resolver when vault is unavailable", func(t *testing.T) { t.Parallel() homePaths := testHomePaths(t) diff --git a/internal/extension/host_api_models.go b/internal/extension/host_api_models.go index eaee13cc2..e9269fafc 100644 --- a/internal/extension/host_api_models.go +++ b/internal/extension/host_api_models.go @@ -91,7 +91,7 @@ func (h *HostAPIHandler) handleModelsRefresh( } if err != nil { if len(payload.Sources) > 0 { - payload.Error = err.Error() + payload.Error = modelcatalog.RedactString(err.Error()) return payload, nil } return nil, hostAPIModelCatalogRPCError(err) @@ -172,7 +172,7 @@ func hostAPIModelCatalogRPCError(err error) error { if errors.Is(err, modelcatalog.ErrSourceNotRegistered) { return invalidParamsRPCError(err) } - return unavailableRPCError(err) + return unavailableRPCError(errors.New(modelcatalog.RedactString(err.Error()))) } func hostAPIProviderModelListPayloadFromModels(models []modelcatalog.Model) apicontract.ProviderModelListResponse { @@ -203,7 +203,7 @@ func hostAPIProviderModelPayloadFromModel(model modelcatalog.Model) apicontract. ReasoningEfforts: hostAPIReasoningEffortStrings(model.ReasoningEfforts), DefaultReasoningEffort: hostAPIReasoningEffortStringPtr(model.DefaultReasoningEffort), Cost: hostAPICostPayloadFromModel(model), - LastError: model.LastError, + LastError: modelcatalog.RedactString(model.LastError), } } @@ -216,7 +216,7 @@ func hostAPISourceRefPayloadsFromRefs(refs []modelcatalog.SourceRef) []apicontra Priority: ref.Priority, RefreshedAt: hostAPIModelCatalogTimeString(ref.RefreshedAt), Stale: ref.Stale, - LastError: ref.LastError, + LastError: modelcatalog.RedactString(ref.LastError), }) } return payloads @@ -235,7 +235,7 @@ func hostAPISourceStatusPayloadsFromStatuses( LastRefresh: hostAPIModelCatalogTimeString(status.LastRefresh), NextRefresh: hostAPIModelCatalogTimeString(status.NextRefresh), LastSuccess: hostAPIModelCatalogTimeString(status.LastSuccess), - LastError: status.LastError, + LastError: modelcatalog.RedactString(status.LastError), RefreshState: status.RefreshState, RowCount: status.RowCount, Stale: status.Stale, diff --git a/internal/extension/host_api_models_test.go b/internal/extension/host_api_models_test.go index 94af84041..7f43b58aa 100644 --- a/internal/extension/host_api_models_test.go +++ b/internal/extension/host_api_models_test.go @@ -4,6 +4,8 @@ import ( "context" "encoding/json" "errors" + "fmt" + "strings" "testing" "time" @@ -38,12 +40,14 @@ func TestHostAPIModelsListShouldReturnDaemonProjection(t *testing.T) { SourceKind: modelcatalog.SourceKindConfig, Priority: modelcatalog.PriorityConfig, RefreshedAt: now, + LastError: "source failed with OAUTH_TOKEN=oauth-host-secret-token", }, }, ReasoningEfforts: []modelcatalog.ReasoningEffort{modelcatalog.ReasoningEffortHigh}, DefaultReasoningEffort: &defaultEffort, CostInputPerMillion: &cost, CostOutputPerMillion: &cost, + LastError: "model failed with api_key=sk-host-secret-token", }, }, } @@ -85,6 +89,8 @@ func TestHostAPIModelsListShouldReturnDaemonProjection(t *testing.T) { if model.DefaultReasoningEffort == nil || *model.DefaultReasoningEffort != "high" { t.Fatalf("models/list default reasoning effort = %#v, want high", model.DefaultReasoningEffort) } + assertRedactedHostAPIModelPayload(t, model.LastError, "sk-host-secret-token") + assertRedactedHostAPIModelPayload(t, model.Sources[0].LastError, "oauth-host-secret-token") if len(service.listOpts) != 1 { t.Fatalf("len(service.listOpts) = %d, want 1", len(service.listOpts)) } @@ -102,6 +108,7 @@ func TestHostAPIModelsRefreshShouldReturnStatusPayloadOnSourceFailure(t *testing t.Parallel() now := time.Date(2026, 5, 7, 12, 15, 0, 0, time.UTC) + secret := "sk-host-refresh-secret-token" service := &fakeHostAPIModelCatalogService{ statuses: []modelcatalog.SourceStatus{ { @@ -110,12 +117,12 @@ func TestHostAPIModelsRefreshShouldReturnStatusPayloadOnSourceFailure(t *testing ProviderID: "codex", Priority: modelcatalog.PriorityExtension, LastRefresh: now, - LastError: "extension unavailable", + LastError: "extension unavailable api_key=" + secret, RefreshState: string(modelcatalog.RefreshStateFailed), Stale: true, }, }, - refreshErr: modelcatalog.ErrAllSourcesFailed, + refreshErr: fmt.Errorf("%w: api_key=%s", modelcatalog.ErrAllSourcesFailed, secret), } handler := NewHostAPIHandler( nil, @@ -148,6 +155,8 @@ func TestHostAPIModelsRefreshShouldReturnStatusPayloadOnSourceFailure(t *testing if payload.Error == "" || len(payload.Sources) != 1 || payload.Sources[0].RefreshState != "failed" { t.Fatalf("models/refresh payload = %#v, want failed source status and error", payload) } + assertRedactedHostAPIModelPayload(t, payload.Error, secret) + assertRedactedHostAPIModelPayload(t, payload.Sources[0].LastError, secret) if len(service.refreshOpts) != 1 || !service.refreshOpts[0].Force { t.Fatalf("Refresh opts = %#v, want force refresh recorded", service.refreshOpts) } @@ -390,6 +399,46 @@ func TestHostAPIModelsShouldMapValidationAndAvailabilityErrors(t *testing.T) { } } +func TestHostAPIModelsShouldRedactUnavailableRPCErrorData(t *testing.T) { + t.Parallel() + + t.Run("Should redact unavailable RPC error data", func(t *testing.T) { + t.Parallel() + + secret := "oauth-rpc-secret-token" + handler := NewHostAPIHandler( + nil, + nil, + nil, + nil, + WithHostAPIModelCatalogService(&fakeHostAPIModelCatalogService{ + listErr: errors.New("catalog unavailable OAUTH_TOKEN=" + secret), + }), + WithHostAPICapabilityChecker(newTestCapabilityChecker( + "ext", + SourceUser, + []string{"models/list"}, + []string{"model.read"}, + )), + ) + _, err := handler.Handle(testutil.Context(t), "ext", "models/list", json.RawMessage(`{}`)) + if err == nil { + t.Fatal("Handle(models/list) error = nil, want RPC error") + } + var rpcErr *subprocess.RPCError + if !errors.As(err, &rpcErr) { + t.Fatalf("Handle(models/list) error = %T, want *RPCError", err) + } + data := string(rpcErr.Data) + if strings.Contains(data, secret) { + t.Fatalf("RPC error data = %s, want secret redacted", data) + } + if !strings.Contains(data, "[REDACTED]") { + t.Fatalf("RPC error data = %s, want redaction marker", data) + } + }) +} + func TestHostAPIModelHelpersShouldHandleEmptyValues(t *testing.T) { t.Parallel() @@ -443,3 +492,14 @@ func (s *fakeHostAPIModelCatalogService) ListSourceStatus( s.statusProviderIDs = append(s.statusProviderIDs, providerID) return append([]modelcatalog.SourceStatus(nil), s.statuses...), s.statusErr } + +func assertRedactedHostAPIModelPayload(t *testing.T, value string, secret string) { + t.Helper() + + if strings.Contains(value, secret) { + t.Fatalf("Host API payload value = %q, want secret redacted", value) + } + if !strings.Contains(value, "[REDACTED]") { + t.Fatalf("Host API payload value = %q, want redaction marker", value) + } +} diff --git a/internal/modelcatalog/hardcut_residue_test.go b/internal/modelcatalog/hardcut_residue_test.go new file mode 100644 index 000000000..0ae17a64a --- /dev/null +++ b/internal/modelcatalog/hardcut_residue_test.go @@ -0,0 +1,164 @@ +package modelcatalog + +import ( + "bufio" + "fmt" + "io/fs" + "os" + "path/filepath" + "runtime" + "strings" + "testing" +) + +func TestProviderModelHardCutResidueGuard(t *testing.T) { + t.Parallel() + + t.Run("Should find no old provider model config residue outside allowlisted surfaces", func(t *testing.T) { + t.Parallel() + + repoRoot := testRepoRoot(t) + fields := []string{ + "default_model", + "supported_models", + "supports_reasoning_effort", + } + var residues []string + for _, target := range []string{"cmd", "internal", "web", "packages/site", "openapi", "config.toml"} { + targetPath := filepath.Join(repoRoot, target) + info, err := os.Stat(targetPath) + if err != nil { + t.Fatalf("os.Stat(%q) error = %v", targetPath, err) + } + if !info.IsDir() { + residues = appendResiduesFromFile(t, residues, repoRoot, targetPath, fields) + continue + } + err = filepath.WalkDir(targetPath, func(path string, entry fs.DirEntry, walkErr error) error { + if walkErr != nil { + return walkErr + } + if entry.IsDir() { + if skipResidueGuardDir(entry.Name()) { + return filepath.SkipDir + } + return nil + } + residues = appendResiduesFromFile(t, residues, repoRoot, path, fields) + return nil + }) + if err != nil { + t.Fatalf("WalkDir(%q) error = %v", targetPath, err) + } + } + + if len(residues) > 0 { + t.Fatalf( + "provider model hard-cut residue found in non-test surfaces:\n%s", + strings.Join(residues, "\n"), + ) + } + }) +} + +func testRepoRoot(t *testing.T) string { + t.Helper() + + _, file, _, ok := runtime.Caller(0) + if !ok { + t.Fatal("runtime.Caller() failed") + } + return filepath.Clean(filepath.Join(filepath.Dir(file), "..", "..")) +} + +func appendResiduesFromFile( + t *testing.T, + residues []string, + repoRoot string, + path string, + fields []string, +) []string { + t.Helper() + + rel, err := filepath.Rel(repoRoot, path) + if err != nil { + t.Fatalf("filepath.Rel(%q, %q) error = %v", repoRoot, path, err) + } + rel = filepath.ToSlash(rel) + if skipResidueGuardFile(rel) { + return residues + } + file, err := os.Open(path) + if err != nil { + t.Fatalf("os.Open(%q) error = %v", path, err) + } + defer func() { + if closeErr := file.Close(); closeErr != nil { + t.Errorf("Close(%q) error = %v", path, closeErr) + } + }() + + scanner := bufio.NewScanner(file) + scanner.Buffer(make([]byte, 1024), 1024*1024) + lineNo := 0 + for scanner.Scan() { + lineNo++ + line := scanner.Text() + for _, field := range fields { + if !strings.Contains(line, field) { + continue + } + if allowedProviderModelResidue(rel, line, field) { + continue + } + residues = append(residues, fmt.Sprintf("%s:%d contains %s", rel, lineNo, field)) + } + } + if err := scanner.Err(); err != nil { + t.Fatalf("Scan(%q) error = %v", path, err) + } + return residues +} + +func skipResidueGuardDir(name string) bool { + switch name { + case ".git", ".next", ".tmp", ".turbo", "coverage", "dist", "node_modules", "out", "storybook-static": + return true + default: + return false + } +} + +func skipResidueGuardFile(rel string) bool { + base := filepath.Base(rel) + if strings.HasSuffix(base, "_test.go") || + strings.Contains(base, ".test.") || + strings.Contains(base, ".spec.") || + strings.HasSuffix(base, ".snap") { + return true + } + return strings.Contains(rel, "/__tests__/") || strings.Contains(rel, "/testdata/") +} + +func allowedProviderModelResidue(rel string, line string, field string) bool { + if rel == "internal/config/merge.go" { + return strings.Contains(line, fmt.Sprintf("%q", field)) + } + if rel == "packages/site/content/runtime/core/agents/providers.mdx" || + rel == "packages/site/content/runtime/core/configuration/config-toml.mdx" { + return strings.Contains(line, "flat keys") || strings.Contains(line, "are no longer") + } + if field != "supported_models" { + return false + } + switch rel { + case "internal/api/contract/contract.go", + "web/src/generated/agh-openapi.d.ts", + "openapi/agh.json", + "web/src/systems/session/mocks/fixtures.ts", + "web/src/systems/network/mocks/fixtures.ts": + return true + default: + return false + } +} diff --git a/internal/modelcatalog/redact.go b/internal/modelcatalog/redact.go index a39149d60..f9f03f9cc 100644 --- a/internal/modelcatalog/redact.go +++ b/internal/modelcatalog/redact.go @@ -10,7 +10,10 @@ var secretPatterns = []*regexp.Regexp{ regexp.MustCompile(`sk-[A-Za-z0-9_-]{8,}`), regexp.MustCompile(`gh[pousr]_[A-Za-z0-9_]{8,}`), regexp.MustCompile(`xox[baprs]-[A-Za-z0-9-]{8,}`), - regexp.MustCompile(`(?i)(api[_-]?key|access[_-]?token|refresh[_-]?token|secret|password|credential)=([^&\s]+)`), + regexp.MustCompile(`(?i)\bBearer\s+[A-Za-z0-9._~+/=-]{8,}`), + regexp.MustCompile( + `(?i)\b([A-Z0-9_-]*(?:api[_-]?key|auth[_-]?token|oauth[_-]?token|access[_-]?token|refresh[_-]?token|id[_-]?token|secret|password|credential|private[_-]?key)[A-Z0-9_-]*)=([^&\s]+)`, + ), } // RedactString removes secret-shaped values from catalog errors. diff --git a/internal/modelcatalog/redact_test.go b/internal/modelcatalog/redact_test.go new file mode 100644 index 000000000..464e5b462 --- /dev/null +++ b/internal/modelcatalog/redact_test.go @@ -0,0 +1,51 @@ +package modelcatalog + +import ( + "strings" + "testing" +) + +func TestRedactString(t *testing.T) { + t.Parallel() + + tests := []struct { + name string + input string + secret string + }{ + { + name: "Should redact OpenAI style API keys", + input: "models.dev failed with api_key=sk-super-secret-token-123", + secret: "sk-super-secret-token-123", + }, + { + name: "Should redact OAuth bearer tokens", + input: "provider returned Authorization: Bearer ya29.secret-oauth-token", + secret: "ya29.secret-oauth-token", + }, + { + name: "Should redact secret shaped environment values", + input: "discovery failed with OPENAI_API_KEY=env-secret-value CLIENT_SECRET=client-secret-value", + secret: "env-secret-value", + }, + { + name: "Should redact OAuth token environment values", + input: "extension failed with OAUTH_TOKEN=oauth-secret-value", + secret: "oauth-secret-value", + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + + redacted := RedactString(tc.input) + if strings.Contains(redacted, tc.secret) { + t.Fatalf("RedactString() = %q, want secret removed", redacted) + } + if !strings.Contains(redacted, "[REDACTED]") { + t.Fatalf("RedactString() = %q, want redaction marker", redacted) + } + }) + } +} diff --git a/internal/modelcatalog/service_integration_test.go b/internal/modelcatalog/service_integration_test.go index aa64a8725..db95cce43 100644 --- a/internal/modelcatalog/service_integration_test.go +++ b/internal/modelcatalog/service_integration_test.go @@ -1,11 +1,14 @@ package modelcatalog_test import ( + "context" "database/sql" "fmt" "net/http" "net/http/httptest" "path/filepath" + "slices" + "sync" "testing" "time" @@ -140,6 +143,106 @@ func TestCatalogServiceGlobalDBIntegration(t *testing.T) { t.Fatalf("raw marker persisted in %d catalog fields, want 0", matches) } }) + + t.Run("Should coalesce same provider refreshes without SQLite busy failures", func(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t) + store, _ := openCatalogGlobalDB(t) + source := newIntegrationBlockingSource(map[string][]modelcatalog.ModelRow{ + "codex": { + integrationRow("codex", "gpt-5.4", integrationTime(20)), + }, + }) + service, err := modelcatalog.NewService(store, []modelcatalog.Source{source}) + if err != nil { + t.Fatalf("NewService() error = %v", err) + } + + results := make(chan error, 2) + for range 2 { + go func() { + _, refreshErr := service.Refresh(ctx, modelcatalog.RefreshOptions{ + ProviderID: "codex", + SourceID: source.ID(), + Force: true, + Now: integrationTime(20), + }) + results <- refreshErr + }() + } + source.waitForCalls(t, 1) + source.requireCallCountStable(t, 1, 25*time.Millisecond) + source.release() + + for range 2 { + if err := <-results; err != nil { + t.Fatalf("Refresh() error = %v", err) + } + } + models, err := service.ListModels(ctx, modelcatalog.ListOptions{ProviderID: "codex", Now: integrationTime(21)}) + if err != nil { + t.Fatalf("ListModels(codex) error = %v", err) + } + if got, want := integrationModelKeys(models), []string{"codex/gpt-5.4"}; !slices.Equal(got, want) { + t.Fatalf("model keys = %#v, want %#v", got, want) + } + }) + + t.Run("Should persist concurrent cross provider refreshes without SQLite busy failures", func(t *testing.T) { + t.Parallel() + + ctx := testutil.Context(t) + store, _ := openCatalogGlobalDB(t) + source := newIntegrationBlockingSource(map[string][]modelcatalog.ModelRow{ + "claude": { + integrationRow("claude", "claude-sonnet-4-6", integrationTime(30)), + }, + "codex": { + integrationRow("codex", "gpt-5.4", integrationTime(30)), + }, + }) + service, err := modelcatalog.NewService(store, []modelcatalog.Source{source}) + if err != nil { + t.Fatalf("NewService() error = %v", err) + } + + results := make(chan error, 2) + for _, providerID := range []string{"codex", "claude"} { + go func(providerID string) { + _, refreshErr := service.Refresh(ctx, modelcatalog.RefreshOptions{ + ProviderID: providerID, + SourceID: source.ID(), + Force: true, + Now: integrationTime(30), + }) + results <- refreshErr + }(providerID) + } + source.waitForCalls(t, 2) + source.release() + + for range 2 { + if err := <-results; err != nil { + t.Fatalf("Refresh() error = %v", err) + } + } + models, err := service.ListModels(ctx, modelcatalog.ListOptions{Now: integrationTime(31)}) + if err != nil { + t.Fatalf("ListModels() error = %v", err) + } + if got, want := integrationModelKeys( + models, + ), []string{ + "claude/claude-sonnet-4-6", + "codex/gpt-5.4", + }; !slices.Equal( + got, + want, + ) { + t.Fatalf("model keys = %#v, want %#v", got, want) + } + }) } func openCatalogGlobalDB(t *testing.T) (*globaldb.GlobalDB, string) { @@ -162,3 +265,140 @@ func openCatalogGlobalDB(t *testing.T) (*globaldb.GlobalDB, string) { func integrationTime(offset int) time.Time { return time.Date(2026, 5, 7, 13, offset, 0, 0, time.UTC) } + +type integrationBlockingSource struct { + mu sync.Mutex + rowsByProvider map[string][]modelcatalog.ModelRow + calls int + callsCh chan int + releaseCh chan struct{} + releaseOnce sync.Once +} + +func newIntegrationBlockingSource(rowsByProvider map[string][]modelcatalog.ModelRow) *integrationBlockingSource { + return &integrationBlockingSource{ + rowsByProvider: rowsByProvider, + callsCh: make(chan int, 16), + releaseCh: make(chan struct{}), + } +} + +func (s *integrationBlockingSource) ID() string { + return "provider_live:integration" +} + +func (s *integrationBlockingSource) Kind() modelcatalog.SourceKind { + return modelcatalog.SourceKindProviderLive +} + +func (s *integrationBlockingSource) Priority() int { + return modelcatalog.PriorityProviderLive +} + +func (s *integrationBlockingSource) ProviderIDs() []string { + s.mu.Lock() + defer s.mu.Unlock() + providers := make([]string, 0, len(s.rowsByProvider)) + for providerID := range s.rowsByProvider { + providers = append(providers, providerID) + } + slices.Sort(providers) + return providers +} + +func (s *integrationBlockingSource) ListModels( + ctx context.Context, + opts modelcatalog.ListOptions, +) ([]modelcatalog.ModelRow, error) { + s.mu.Lock() + s.calls++ + calls := s.calls + s.mu.Unlock() + select { + case s.callsCh <- calls: + default: + } + + select { + case <-s.releaseCh: + case <-ctx.Done(): + return nil, ctx.Err() + } + + s.mu.Lock() + rows := cloneIntegrationRows(s.rowsByProvider[opts.ProviderID]) + s.mu.Unlock() + return rows, nil +} + +func (s *integrationBlockingSource) waitForCalls(t *testing.T, want int) { + t.Helper() + + deadline := time.After(time.Second) + for { + if s.callCount() >= want { + return + } + select { + case <-s.callsCh: + case <-deadline: + t.Fatalf("source calls = %d, want at least %d", s.callCount(), want) + } + } +} + +func (s *integrationBlockingSource) requireCallCountStable( + t *testing.T, + want int, + duration time.Duration, +) { + t.Helper() + + timer := time.NewTimer(duration) + defer timer.Stop() + for { + select { + case <-s.callsCh: + if got := s.callCount(); got > want { + t.Fatalf("source calls = %d while first refresh was blocked, want at most %d", got, want) + } + case <-timer.C: + return + } + } +} + +func (s *integrationBlockingSource) release() { + s.releaseOnce.Do(func() { + close(s.releaseCh) + }) +} + +func (s *integrationBlockingSource) callCount() int { + s.mu.Lock() + defer s.mu.Unlock() + return s.calls +} + +func integrationRow(providerID string, modelID string, refreshedAt time.Time) modelcatalog.ModelRow { + return modelcatalog.ModelRow{ + SourceID: "provider_live:integration", + SourceKind: modelcatalog.SourceKindProviderLive, + Priority: modelcatalog.PriorityProviderLive, + ProviderID: providerID, + ModelID: modelID, + RefreshedAt: refreshedAt, + } +} + +func cloneIntegrationRows(rows []modelcatalog.ModelRow) []modelcatalog.ModelRow { + return append([]modelcatalog.ModelRow(nil), rows...) +} + +func integrationModelKeys(models []modelcatalog.Model) []string { + keys := make([]string, 0, len(models)) + for _, model := range models { + keys = append(keys, model.ProviderID+"/"+model.ModelID) + } + return keys +} diff --git a/internal/modelcatalog/service_test.go b/internal/modelcatalog/service_test.go index 2a7302a15..10bfc997a 100644 --- a/internal/modelcatalog/service_test.go +++ b/internal/modelcatalog/service_test.go @@ -403,6 +403,130 @@ func TestCatalogServiceRefresh(t *testing.T) { }) } +func TestCatalogServiceRefreshConcurrency(t *testing.T) { + t.Parallel() + + t.Run("Should coalesce concurrent refreshes for the same provider scope", func(t *testing.T) { + t.Parallel() + + source := newBlockingRefreshSource(map[string][]ModelRow{ + "codex": { + testRow( + "provider_live:codex", + SourceKindProviderLive, + PriorityProviderLive, + "codex", + "gpt-5.4", + testTime(30), + nil, + ), + }, + }) + store := newMemoryStore() + service := newTestService(t, store, []Source{source}) + ctx := testutil.Context(t) + + results := make(chan refreshTestResult, 2) + for range 2 { + go func() { + statuses, err := service.Refresh(ctx, RefreshOptions{ + ProviderID: "codex", + SourceID: source.ID(), + Force: true, + Now: testTime(30), + }) + results <- refreshTestResult{statuses: statuses, err: err} + }() + } + source.waitForCalls(t, 1) + source.requireCallCountStable(t, 1, 25*time.Millisecond) + source.release() + + for range 2 { + result := <-results + if result.err != nil { + t.Fatalf("Refresh() error = %v", result.err) + } + if got, want := len(result.statuses), 1; got != want { + t.Fatalf("len(statuses) = %d, want %d: %#v", got, want, result.statuses) + } + } + }) + + t.Run("Should let concurrent refreshes across providers replace rows deterministically", func(t *testing.T) { + t.Parallel() + + source := newBlockingRefreshSource(map[string][]ModelRow{ + "claude": { + testRow( + "provider_live:shared", + SourceKindProviderLive, + PriorityProviderLive, + "claude", + "claude-sonnet-4-6", + testTime(31), + nil, + ), + }, + "codex": { + testRow( + "provider_live:shared", + SourceKindProviderLive, + PriorityProviderLive, + "codex", + "gpt-5.4", + testTime(31), + nil, + ), + }, + }) + store := newMemoryStore() + service := newTestService(t, store, []Source{source}) + ctx := testutil.Context(t) + + results := make(chan refreshTestResult, 2) + for _, providerID := range []string{"codex", "claude"} { + go func(providerID string) { + statuses, err := service.Refresh(ctx, RefreshOptions{ + ProviderID: providerID, + SourceID: source.ID(), + Force: true, + Now: testTime(31), + }) + results <- refreshTestResult{statuses: statuses, err: err} + }(providerID) + } + source.waitForCalls(t, 2) + source.release() + + for range 2 { + result := <-results + if result.err != nil { + t.Fatalf("Refresh() error = %v", result.err) + } + if got, want := len(result.statuses), 1; got != want { + t.Fatalf("len(statuses) = %d, want %d: %#v", got, want, result.statuses) + } + } + models, err := service.ListModels( + ctx, + ListOptions{IncludeStale: true, Now: testTime(32)}, + ) + if err != nil { + t.Fatalf("ListModels() error = %v", err) + } + if got, want := modelKeys(models), []string{"claude/claude-sonnet-4-6", "codex/gpt-5.4"}; !slices.Equal( + got, + want, + ) { + t.Fatalf("model keys = %#v, want %#v", got, want) + } + if got, want := source.callCount(), 2; got != want { + t.Fatalf("source calls = %d, want %d cross-provider calls", got, want) + } + }) +} + type fakeSource struct { id string kind SourceKind @@ -445,6 +569,122 @@ func (s *fakeSource) ListModels(_ context.Context, opts ListOptions) ([]ModelRow return rows, s.err } +type refreshTestResult struct { + statuses []SourceStatus + err error +} + +type blockingRefreshSource struct { + mu sync.Mutex + rowsByProvider map[string][]ModelRow + calls int + callsCh chan int + releaseCh chan struct{} + releaseOnce sync.Once +} + +func newBlockingRefreshSource(rowsByProvider map[string][]ModelRow) *blockingRefreshSource { + return &blockingRefreshSource{ + rowsByProvider: rowsByProvider, + callsCh: make(chan int, 16), + releaseCh: make(chan struct{}), + } +} + +func (s *blockingRefreshSource) ID() string { + return "provider_live:shared" +} + +func (s *blockingRefreshSource) Kind() SourceKind { + return SourceKindProviderLive +} + +func (s *blockingRefreshSource) Priority() int { + return PriorityProviderLive +} + +func (s *blockingRefreshSource) ProviderIDs() []string { + s.mu.Lock() + defer s.mu.Unlock() + providers := make([]string, 0, len(s.rowsByProvider)) + for providerID := range s.rowsByProvider { + providers = append(providers, providerID) + } + slices.Sort(providers) + return providers +} + +func (s *blockingRefreshSource) TTL() time.Duration { + return 0 +} + +func (s *blockingRefreshSource) ListModels(ctx context.Context, opts ListOptions) ([]ModelRow, error) { + s.mu.Lock() + s.calls++ + calls := s.calls + s.mu.Unlock() + select { + case s.callsCh <- calls: + default: + } + + select { + case <-s.releaseCh: + case <-ctx.Done(): + return nil, ctx.Err() + } + + s.mu.Lock() + rows := cloneModelRows(s.rowsByProvider[opts.ProviderID]) + s.mu.Unlock() + return rows, nil +} + +func (s *blockingRefreshSource) waitForCalls(t *testing.T, want int) { + t.Helper() + + deadline := time.After(time.Second) + for { + if s.callCount() >= want { + return + } + select { + case <-s.callsCh: + case <-deadline: + t.Fatalf("source calls = %d, want at least %d", s.callCount(), want) + } + } +} + +func (s *blockingRefreshSource) requireCallCountStable(t *testing.T, want int, duration time.Duration) { + t.Helper() + + timer := time.NewTimer(duration) + defer timer.Stop() + for { + select { + case <-s.callsCh: + if got := s.callCount(); got > want { + t.Fatalf("source calls = %d while first refresh was blocked, want at most %d", got, want) + } + case <-timer.C: + return + } + } +} + +func (s *blockingRefreshSource) release() { + s.releaseOnce.Do(func() { + close(s.releaseCh) + }) +} + +func (s *blockingRefreshSource) callCount() int { + s.mu.Lock() + defer s.mu.Unlock() + return s.calls +} + type memoryStore struct { mu sync.Mutex rows map[string][]ModelRow diff --git a/internal/testutil/acpmock/cmd/acpmock-driver/main.go b/internal/testutil/acpmock/cmd/acpmock-driver/main.go index 42c91d0d8..ee8db99fe 100644 --- a/internal/testutil/acpmock/cmd/acpmock-driver/main.go +++ b/internal/testutil/acpmock/cmd/acpmock-driver/main.go @@ -35,6 +35,7 @@ type sessionState struct { type mockAgent struct { conn *acpsdk.AgentSideConnection agent acpmock.AgentFixture + configOptions []acpsdk.SessionConfigOption diagnosticsPath string lifecycleCtx context.Context cancelLifecycle context.CancelFunc @@ -73,6 +74,7 @@ func main() { agent := &mockAgent{ agent: agentFixture, + configOptions: sessionConfigOptionsFromFixture(agentFixture.ConfigOptions), diagnosticsPath: strings.TrimSpace(args.DiagnosticsPath), lifecycleCtx: lifecycleCtx, cancelLifecycle: cancelLifecycle, @@ -158,7 +160,10 @@ func (a *mockAgent) NewSession(_ context.Context, params acpsdk.NewSessionReques if err := a.writeSessionDiagnostics("session_new", sessionID, params.McpServers); err != nil { return acpsdk.NewSessionResponse{}, err } - return acpsdk.NewSessionResponse{SessionId: acpsdk.SessionId(sessionID)}, nil + return acpsdk.NewSessionResponse{ + SessionId: acpsdk.SessionId(sessionID), + ConfigOptions: a.cloneConfigOptions(), + }, nil } func (a *mockAgent) LoadSession( @@ -174,7 +179,7 @@ func (a *mockAgent) LoadSession( if err := a.writeSessionDiagnostics("session_load", sessionID, params.McpServers); err != nil { return acpsdk.LoadSessionResponse{}, err } - return acpsdk.LoadSessionResponse{}, nil + return acpsdk.LoadSessionResponse{ConfigOptions: a.cloneConfigOptions()}, nil } func (a *mockAgent) writeSessionDiagnostics( @@ -201,10 +206,21 @@ func (a *mockAgent) SetSessionMode( } func (a *mockAgent) SetSessionConfigOption( - context.Context, - acpsdk.SetSessionConfigOptionRequest, + _ context.Context, + request acpsdk.SetSessionConfigOptionRequest, ) (acpsdk.SetSessionConfigOptionResponse, error) { - return acpsdk.SetSessionConfigOptionResponse{ConfigOptions: []acpsdk.SessionConfigOption{}}, nil + if request.ValueId == nil { + return acpsdk.SetSessionConfigOptionResponse{}, errors.New( + "acpmock-driver: only value-id session config options are supported", + ) + } + if err := a.setConfigOptionValue( + string(request.ValueId.ConfigId), + string(request.ValueId.Value), + ); err != nil { + return acpsdk.SetSessionConfigOptionResponse{}, err + } + return acpsdk.SetSessionConfigOptionResponse{ConfigOptions: a.cloneConfigOptions()}, nil } func (a *mockAgent) UnstableSetSessionModel( @@ -214,6 +230,104 @@ func (a *mockAgent) UnstableSetSessionModel( return acpsdk.UnstableSetSessionModelResponse{}, nil } +func sessionConfigOptionsFromFixture( + options []acpmock.SessionConfigOptionFixture, +) []acpsdk.SessionConfigOption { + if len(options) == 0 { + return nil + } + result := make([]acpsdk.SessionConfigOption, 0, len(options)) + for _, option := range options { + values := make(acpsdk.SessionConfigSelectOptionsUngrouped, 0, len(option.Values)) + for _, value := range option.Values { + label := strings.TrimSpace(value.Label) + if label == "" { + label = strings.TrimSpace(value.Value) + } + values = append(values, acpsdk.SessionConfigSelectOption{ + Name: label, + Value: acpsdk.SessionConfigValueId(strings.TrimSpace(value.Value)), + }) + } + result = append(result, acpsdk.SessionConfigOption{ + Select: &acpsdk.SessionConfigOptionSelect{ + Id: acpsdk.SessionConfigId(strings.TrimSpace(option.ID)), + Name: strings.TrimSpace(option.Name), + CurrentValue: acpsdk.SessionConfigValueId(strings.TrimSpace(option.Current)), + Options: acpsdk.SessionConfigSelectOptions{ + Ungrouped: &values, + }, + Type: "select", + }, + }) + } + return result +} + +func (a *mockAgent) cloneConfigOptions() []acpsdk.SessionConfigOption { + a.mu.Lock() + defer a.mu.Unlock() + return cloneSessionConfigOptions(a.configOptions) +} + +func (a *mockAgent) setConfigOptionValue(configID string, value string) error { + trimmedConfigID := strings.TrimSpace(configID) + trimmedValue := strings.TrimSpace(value) + if trimmedConfigID == "" { + return errors.New("acpmock-driver: session config option id is required") + } + if trimmedValue == "" { + return errors.New("acpmock-driver: session config option value is required") + } + + a.mu.Lock() + defer a.mu.Unlock() + for idx := range a.configOptions { + option := a.configOptions[idx].Select + if option == nil || string(option.Id) != trimmedConfigID { + continue + } + if option.Options.Ungrouped == nil { + return fmt.Errorf("acpmock-driver: config option %q has no selectable values", trimmedConfigID) + } + for _, candidate := range *option.Options.Ungrouped { + if string(candidate.Value) == trimmedValue { + option.CurrentValue = acpsdk.SessionConfigValueId(trimmedValue) + return nil + } + } + return fmt.Errorf( + "acpmock-driver: config option %q value %q is not available", + trimmedConfigID, + trimmedValue, + ) + } + return fmt.Errorf("acpmock-driver: config option %q is not available", trimmedConfigID) +} + +func cloneSessionConfigOptions(options []acpsdk.SessionConfigOption) []acpsdk.SessionConfigOption { + if len(options) == 0 { + return nil + } + cloned := make([]acpsdk.SessionConfigOption, 0, len(options)) + for _, option := range options { + if option.Select != nil { + selectCopy := *option.Select + if option.Select.Options.Ungrouped != nil { + values := append(acpsdk.SessionConfigSelectOptionsUngrouped(nil), (*option.Select.Options.Ungrouped)...) + selectCopy.Options.Ungrouped = &values + } + cloned = append(cloned, acpsdk.SessionConfigOption{Select: &selectCopy}) + continue + } + if option.Boolean != nil { + booleanCopy := *option.Boolean + cloned = append(cloned, acpsdk.SessionConfigOption{Boolean: &booleanCopy}) + } + } + return cloned +} + func (a *mockAgent) Prompt(ctx context.Context, params acpsdk.PromptRequest) (acpsdk.PromptResponse, error) { sessionID := strings.TrimSpace(string(params.SessionId)) if sessionID == "" { diff --git a/internal/testutil/acpmock/cmd/acpmock-driver/main_test.go b/internal/testutil/acpmock/cmd/acpmock-driver/main_test.go index 544440d7a..39732fd71 100644 --- a/internal/testutil/acpmock/cmd/acpmock-driver/main_test.go +++ b/internal/testutil/acpmock/cmd/acpmock-driver/main_test.go @@ -1,6 +1,7 @@ package main import ( + "context" "strings" "testing" @@ -12,89 +13,154 @@ import ( func TestExtractPromptTextPreservesAugmentedPromptDiagnostics(t *testing.T) { t.Parallel() - prompt := "Session instructions\n\n" + - "User request:\n\n" + - "{}\n\n" + - "Relevant durable memory for this turn:\n" + - "- Auth [workspace]\n\n" + - "User message:\n" + - "hello alpha" - blocks := []acpsdk.ContentBlock{ - acpsdk.TextBlock("ignored"), - acpsdk.TextBlock(prompt), - } - - if got, want := extractPromptText(blocks), prompt; got != want { - t.Fatalf("extractPromptText() = %q, want %q", got, want) - } + t.Run("Should preserve augmented prompt diagnostics", func(t *testing.T) { + t.Parallel() + + prompt := "Session instructions\n\n" + + "User request:\n\n" + + "{}\n\n" + + "Relevant durable memory for this turn:\n" + + "- Auth [workspace]\n\n" + + "User message:\n" + + "hello alpha" + blocks := []acpsdk.ContentBlock{ + acpsdk.TextBlock("ignored"), + acpsdk.TextBlock(prompt), + } + + if got, want := extractPromptText(blocks), prompt; got != want { + t.Fatalf("extractPromptText() = %q, want %q", got, want) + } + }) } func TestExtractPromptTextPreservesAugmentedPromptWithoutNestedMessageMarker(t *testing.T) { t.Parallel() - prompt := "Session instructions\n\n" + - "User request:\n\n" + - "{}\n\n" + - "hello alpha" - blocks := []acpsdk.ContentBlock{ - acpsdk.TextBlock(prompt), - } - - if got, want := extractPromptText(blocks), prompt; got != want { - t.Fatalf("extractPromptText() = %q, want %q", got, want) - } + t.Run("Should preserve augmented prompt without nested message marker", func(t *testing.T) { + t.Parallel() + + prompt := "Session instructions\n\n" + + "User request:\n\n" + + "{}\n\n" + + "hello alpha" + blocks := []acpsdk.ContentBlock{ + acpsdk.TextBlock(prompt), + } + + if got, want := extractPromptText(blocks), prompt; got != want { + t.Fatalf("extractPromptText() = %q, want %q", got, want) + } + }) } func TestMockAgentSelectTurnDoesNotCountUnmatchedPrompts(t *testing.T) { t.Parallel() - agent := &mockAgent{ - agent: acpmock.AgentFixture{ - Name: "alpha", - Turns: []acpmock.TurnFixture{ - { - Name: "first", - Match: acpmock.TurnMatch{ - TurnSource: acp.PromptTurnSourceUser, - UserText: "first prompt", - Occurrence: 1, + t.Run("Should not count unmatched prompts", func(t *testing.T) { + t.Parallel() + + agent := &mockAgent{ + agent: acpmock.AgentFixture{ + Name: "alpha", + Turns: []acpmock.TurnFixture{ + { + Name: "first", + Match: acpmock.TurnMatch{ + TurnSource: acp.PromptTurnSourceUser, + UserText: "first prompt", + Occurrence: 1, + }, + }, + { + Name: "second", + Match: acpmock.TurnMatch{ + TurnSource: acp.PromptTurnSourceUser, + UserText: "second prompt", + Occurrence: 2, + }, }, }, + }, + sessions: map[string]*sessionState{}, + } + meta := acp.PromptMeta{TurnSource: acp.PromptTurnSourceUser} + + first, occurrence, err := agent.selectTurn("acp-session-1", "first prompt", meta) + if err != nil { + t.Fatalf("selectTurn(first) error = %v", err) + } + if first.Name != "first" || occurrence != 1 { + t.Fatalf("selectTurn(first) = (%q, %d), want (first, 1)", first.Name, occurrence) + } + + _, occurrence, err = agent.selectTurn("acp-session-1", "extractor internal prompt", meta) + if err == nil || !strings.Contains(err.Error(), "no turn matched") { + t.Fatalf("selectTurn(unmatched) error = %v, want no-match error", err) + } + if occurrence != 2 { + t.Fatalf("selectTurn(unmatched) occurrence = %d, want next occurrence 2", occurrence) + } + + second, occurrence, err := agent.selectTurn("acp-session-1", "second prompt", meta) + if err != nil { + t.Fatalf("selectTurn(second) error = %v", err) + } + if second.Name != "second" || occurrence != 2 { + t.Fatalf("selectTurn(second) = (%q, %d), want (second, 2)", second.Name, occurrence) + } + }) +} + +func TestMockAgentSessionConfigOptions(t *testing.T) { + t.Parallel() + + t.Run("Should update current select values", func(t *testing.T) { + t.Parallel() + + agent := &mockAgent{ + configOptions: sessionConfigOptionsFromFixture([]acpmock.SessionConfigOptionFixture{ { - Name: "second", - Match: acpmock.TurnMatch{ - TurnSource: acp.PromptTurnSourceUser, - UserText: "second prompt", - Occurrence: 2, + ID: "model", + Name: "Model", + Current: "qa-browser-model", + Values: []acpmock.SessionConfigOptionValueFixture{ + {Value: "qa-browser-model", Label: "QA Browser Model"}, + {Value: "qa-browser-model-alt", Label: "QA Browser Model Alt"}, }, }, + }), + } + + response, err := agent.SetSessionConfigOption( + context.Background(), + acpsdk.SetSessionConfigOptionRequest{ + ValueId: &acpsdk.SetSessionConfigOptionValueId{ + ConfigId: acpsdk.SessionConfigId("model"), + Value: acpsdk.SessionConfigValueId("qa-browser-model-alt"), + }, + }, + ) + if err != nil { + t.Fatalf("SetSessionConfigOption() error = %v", err) + } + if got, want := response.ConfigOptions[0].Select.CurrentValue, acpsdk.SessionConfigValueId( + "qa-browser-model-alt", + ); got != want { + t.Fatalf("CurrentValue = %q, want %q", got, want) + } + + _, err = agent.SetSessionConfigOption( + context.Background(), + acpsdk.SetSessionConfigOptionRequest{ + ValueId: &acpsdk.SetSessionConfigOptionValueId{ + ConfigId: acpsdk.SessionConfigId("model"), + Value: acpsdk.SessionConfigValueId("missing-model"), + }, }, - }, - sessions: map[string]*sessionState{}, - } - meta := acp.PromptMeta{TurnSource: acp.PromptTurnSourceUser} - - first, occurrence, err := agent.selectTurn("acp-session-1", "first prompt", meta) - if err != nil { - t.Fatalf("selectTurn(first) error = %v", err) - } - if first.Name != "first" || occurrence != 1 { - t.Fatalf("selectTurn(first) = (%q, %d), want (first, 1)", first.Name, occurrence) - } - - _, occurrence, err = agent.selectTurn("acp-session-1", "extractor internal prompt", meta) - if err == nil || !strings.Contains(err.Error(), "no turn matched") { - t.Fatalf("selectTurn(unmatched) error = %v, want no-match error", err) - } - if occurrence != 2 { - t.Fatalf("selectTurn(unmatched) occurrence = %d, want next occurrence 2", occurrence) - } - - second, occurrence, err := agent.selectTurn("acp-session-1", "second prompt", meta) - if err != nil { - t.Fatalf("selectTurn(second) error = %v", err) - } - if second.Name != "second" || occurrence != 2 { - t.Fatalf("selectTurn(second) = (%q, %d), want (second, 2)", second.Name, occurrence) - } + ) + if err == nil || !strings.Contains(err.Error(), "is not available") { + t.Fatalf("SetSessionConfigOption(missing) error = %v, want unavailable value", err) + } + }) } diff --git a/internal/testutil/acpmock/fixture.go b/internal/testutil/acpmock/fixture.go index 9f420ad20..ed0c008b3 100644 --- a/internal/testutil/acpmock/fixture.go +++ b/internal/testutil/acpmock/fixture.go @@ -49,12 +49,27 @@ type Fixture struct { // AgentFixture describes one named ACP mock agent inside a fixture file. type AgentFixture struct { - Name string `json:"name"` - Provider string `json:"provider"` - Model string `json:"model,omitempty"` - Permissions string `json:"permissions,omitempty"` - Prompt string `json:"prompt,omitempty"` - Turns []TurnFixture `json:"turns"` + Name string `json:"name"` + Provider string `json:"provider"` + Model string `json:"model,omitempty"` + Permissions string `json:"permissions,omitempty"` + Prompt string `json:"prompt,omitempty"` + ConfigOptions []SessionConfigOptionFixture `json:"config_options,omitempty"` + Turns []TurnFixture `json:"turns"` +} + +// SessionConfigOptionFixture describes one deterministic ACP session config select option. +type SessionConfigOptionFixture struct { + ID string `json:"id"` + Name string `json:"name"` + Current string `json:"current"` + Values []SessionConfigOptionValueFixture `json:"values"` +} + +// SessionConfigOptionValueFixture describes one selectable ACP config option value. +type SessionConfigOptionValueFixture struct { + Value string `json:"value"` + Label string `json:"label,omitempty"` } // TurnFixture describes one deterministic prompt turn for an agent. @@ -252,6 +267,11 @@ func (a AgentFixture) Validate(path string) error { if len(a.Turns) == 0 { return fmt.Errorf("acpmock: %s.turns must contain at least one turn", path) } + for idx, option := range a.ConfigOptions { + if err := option.Validate(fmt.Sprintf("%s.config_options[%d]", path, idx)); err != nil { + return err + } + } for idx, turn := range a.Turns { if err := turn.Validate(fmt.Sprintf("%s.turns[%d]", path, idx)); err != nil { return err @@ -260,6 +280,44 @@ func (a AgentFixture) Validate(path string) error { return nil } +// Validate ensures one session config option is deterministic and selectable. +func (o SessionConfigOptionFixture) Validate(path string) error { + id := strings.TrimSpace(o.ID) + if id == "" { + return fmt.Errorf("acpmock: %s.id is required", path) + } + name := strings.TrimSpace(o.Name) + if name == "" { + return fmt.Errorf("acpmock: %s.name is required", path) + } + current := strings.TrimSpace(o.Current) + if current == "" { + return fmt.Errorf("acpmock: %s.current is required", path) + } + if len(o.Values) == 0 { + return fmt.Errorf("acpmock: %s.values must contain at least one value", path) + } + seen := make(map[string]struct{}, len(o.Values)) + currentFound := false + for idx, value := range o.Values { + trimmed := strings.TrimSpace(value.Value) + if trimmed == "" { + return fmt.Errorf("acpmock: %s.values[%d].value is required", path, idx) + } + if _, exists := seen[trimmed]; exists { + return fmt.Errorf("acpmock: %s.values[%d].value duplicates %q", path, idx, trimmed) + } + seen[trimmed] = struct{}{} + if trimmed == current { + currentFound = true + } + } + if !currentFound { + return fmt.Errorf("acpmock: %s.current %q must be listed in values", path, current) + } + return nil +} + // Validate ensures the turn fixture is usable. func (t TurnFixture) Validate(path string) error { if err := t.Match.Validate(path + ".match"); err != nil { diff --git a/internal/testutil/acpmock/testdata/browser_session_lifecycle_fixture.json b/internal/testutil/acpmock/testdata/browser_session_lifecycle_fixture.json index bc0efa505..fc93743ea 100644 --- a/internal/testutil/acpmock/testdata/browser_session_lifecycle_fixture.json +++ b/internal/testutil/acpmock/testdata/browser_session_lifecycle_fixture.json @@ -6,6 +6,42 @@ "provider": "claude", "permissions": "approve-reads", "prompt": "You are the browser lifecycle test agent.", + "config_options": [ + { + "id": "model", + "name": "Model", + "current": "qa-browser-model", + "values": [ + { + "value": "qa-browser-model", + "label": "QA Browser Model" + }, + { + "value": "qa-browser-model-alt", + "label": "QA Browser Model Alt" + } + ] + }, + { + "id": "reasoning_effort", + "name": "Reasoning effort", + "current": "medium", + "values": [ + { + "value": "low", + "label": "Low" + }, + { + "value": "medium", + "label": "Medium" + }, + { + "value": "high", + "label": "High" + } + ] + } + ], "turns": [ { "name": "browser-session-lifecycle", diff --git a/packages/site/content/runtime/cli-reference/agent/heartbeat/index.mdx b/packages/site/content/runtime/cli-reference/agent/heartbeat/index.mdx index 80ed61c57..9e11a52b8 100644 --- a/packages/site/content/runtime/cli-reference/agent/heartbeat/index.mdx +++ b/packages/site/content/runtime/cli-reference/agent/heartbeat/index.mdx @@ -29,16 +29,15 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh agent heartbeat delete](/runtime/cli-reference/agent/heartbeat/delete) | Delete HEARTBEAT.md through managed authoring | -| [agh agent heartbeat history](/runtime/cli-reference/agent/heartbeat/history) | List managed Heartbeat authoring revisions | -| [agh agent heartbeat inspect](/runtime/cli-reference/agent/heartbeat/inspect) | Inspect one agent's resolved Heartbeat policy | +| Command | Description | +| ------------------------------------------------------------------------------- | -------------------------------------------------------------- | +| [agh agent heartbeat delete](/runtime/cli-reference/agent/heartbeat/delete) | Delete HEARTBEAT.md through managed authoring | +| [agh agent heartbeat history](/runtime/cli-reference/agent/heartbeat/history) | List managed Heartbeat authoring revisions | +| [agh agent heartbeat inspect](/runtime/cli-reference/agent/heartbeat/inspect) | Inspect one agent's resolved Heartbeat policy | | [agh agent heartbeat rollback](/runtime/cli-reference/agent/heartbeat/rollback) | Rollback HEARTBEAT.md to a managed revision or snapshot digest | -| [agh agent heartbeat status](/runtime/cli-reference/agent/heartbeat/status) | Read Heartbeat policy status and wake eligibility | -| [agh agent heartbeat validate](/runtime/cli-reference/agent/heartbeat/validate) | Validate a proposed Heartbeat policy body | -| [agh agent heartbeat wake](/runtime/cli-reference/agent/heartbeat/wake) | Request one manual advisory Heartbeat wake | -| [agh agent heartbeat write](/runtime/cli-reference/agent/heartbeat/write) | Create or replace HEARTBEAT.md through managed authoring | +| [agh agent heartbeat status](/runtime/cli-reference/agent/heartbeat/status) | Read Heartbeat policy status and wake eligibility | +| [agh agent heartbeat validate](/runtime/cli-reference/agent/heartbeat/validate) | Validate a proposed Heartbeat policy body | +| [agh agent heartbeat wake](/runtime/cli-reference/agent/heartbeat/wake) | Request one manual advisory Heartbeat wake | +| [agh agent heartbeat write](/runtime/cli-reference/agent/heartbeat/write) | Create or replace HEARTBEAT.md through managed authoring | diff --git a/packages/site/content/runtime/cli-reference/agent/index.mdx b/packages/site/content/runtime/cli-reference/agent/index.mdx index 75bffed50..e4a911d2e 100644 --- a/packages/site/content/runtime/cli-reference/agent/index.mdx +++ b/packages/site/content/runtime/cli-reference/agent/index.mdx @@ -29,12 +29,11 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | +| Command | Description | +| ------------------------------------------------------------- | ------------------------------------------- | | [agh agent heartbeat](/runtime/cli-reference/agent/heartbeat) | Inspect and manage agent HEARTBEAT.md files | -| [agh agent info](/runtime/cli-reference/agent/info) | Show one agent definition | -| [agh agent list](/runtime/cli-reference/agent/list) | List installed agent definitions | -| [agh agent soul](/runtime/cli-reference/agent/soul) | Inspect and manage agent SOUL.md files | +| [agh agent info](/runtime/cli-reference/agent/info) | Show one agent definition | +| [agh agent list](/runtime/cli-reference/agent/list) | List installed agent definitions | +| [agh agent soul](/runtime/cli-reference/agent/soul) | Inspect and manage agent SOUL.md files | diff --git a/packages/site/content/runtime/cli-reference/agent/soul/index.mdx b/packages/site/content/runtime/cli-reference/agent/soul/index.mdx index 0d44fe4d5..2a8bc8582 100644 --- a/packages/site/content/runtime/cli-reference/agent/soul/index.mdx +++ b/packages/site/content/runtime/cli-reference/agent/soul/index.mdx @@ -29,14 +29,13 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh agent soul delete](/runtime/cli-reference/agent/soul/delete) | Delete SOUL.md through managed authoring | -| [agh agent soul history](/runtime/cli-reference/agent/soul/history) | List managed Soul authoring revisions | -| [agh agent soul inspect](/runtime/cli-reference/agent/soul/inspect) | Inspect one agent's resolved Soul | -| [agh agent soul rollback](/runtime/cli-reference/agent/soul/rollback) | Rollback SOUL.md to a managed revision | +| Command | Description | +| --------------------------------------------------------------------- | ---------------------------------------------------- | +| [agh agent soul delete](/runtime/cli-reference/agent/soul/delete) | Delete SOUL.md through managed authoring | +| [agh agent soul history](/runtime/cli-reference/agent/soul/history) | List managed Soul authoring revisions | +| [agh agent soul inspect](/runtime/cli-reference/agent/soul/inspect) | Inspect one agent's resolved Soul | +| [agh agent soul rollback](/runtime/cli-reference/agent/soul/rollback) | Rollback SOUL.md to a managed revision | | [agh agent soul validate](/runtime/cli-reference/agent/soul/validate) | Validate a proposed Soul body or the current SOUL.md | -| [agh agent soul write](/runtime/cli-reference/agent/soul/write) | Create or replace SOUL.md through managed authoring | +| [agh agent soul write](/runtime/cli-reference/agent/soul/write) | Create or replace SOUL.md through managed authoring | diff --git a/packages/site/content/runtime/cli-reference/agh.mdx b/packages/site/content/runtime/cli-reference/agh.mdx index e1ffdf91a..43c5bec12 100644 --- a/packages/site/content/runtime/cli-reference/agh.mdx +++ b/packages/site/content/runtime/cli-reference/agh.mdx @@ -47,35 +47,35 @@ agh -o json ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh agent](/runtime/cli-reference/agent) | Inspect AGH agent definitions | -| [agh automation](/runtime/cli-reference/automation) | Manage automation jobs, triggers, and runs | -| [agh bridge](/runtime/cli-reference/bridge) | Manage bridge instances | -| [agh bundle](/runtime/cli-reference/bundle) | Manage extension bundle presets | -| [agh ch](/runtime/cli-reference/ch) | Use agent-facing coordination channels | -| [agh completion](/runtime/cli-reference/completion) | Generate the autocompletion script for the specified shell | -| [agh config](/runtime/cli-reference/config) | Inspect and mutate AGH configuration | -| [agh daemon](/runtime/cli-reference/daemon) | Manage the AGH daemon | -| [agh extension](/runtime/cli-reference/extension) | Manage AGH extensions | -| [agh hooks](/runtime/cli-reference/hooks) | Inspect configured and executed hooks | -| [agh install](/runtime/cli-reference/install) | Bootstrap AGH and create the default general agent | -| [agh mcp](/runtime/cli-reference/mcp) | Manage MCP integrations | -| [agh me](/runtime/cli-reference/me) | Inspect the current AGH-managed agent session | -| [agh memory](/runtime/cli-reference/memory) | Show, write, search, and operate Memory v2 durable context | -| [agh network](/runtime/cli-reference/network) | Operate the daemon-owned network runtime | -| [agh observe](/runtime/cli-reference/observe) | Query global observability state | -| [agh provider](/runtime/cli-reference/provider) | Inspect and manage provider authentication | -| [agh resource](/runtime/cli-reference/resource) | Manage desired-state resources | -| [agh session](/runtime/cli-reference/session) | Manage AGH sessions | -| [agh skill](/runtime/cli-reference/skill) | Manage local AgentSkills | -| [agh spawn](/runtime/cli-reference/spawn) | Spawn a bounded child agent session | -| [agh task](/runtime/cli-reference/task) | Manage tasks and task runs | -| [agh tool](/runtime/cli-reference/tool) | Inspect and invoke registry tools | -| [agh toolsets](/runtime/cli-reference/toolsets) | Inspect registry toolsets | -| [agh uninstall](/runtime/cli-reference/uninstall) | Stop AGH and remove runtime launch artifacts | -| [agh update](/runtime/cli-reference/update) | Check for and apply the latest stable AGH release | -| [agh vault](/runtime/cli-reference/vault) | Manage encrypted daemon vault metadata and write-only secrets | -| [agh version](/runtime/cli-reference/version) | Print the AGH version | -| [agh whoami](/runtime/cli-reference/whoami) | Print the current AGH agent identity from environment variables | -| [agh workspace](/runtime/cli-reference/workspace) | Manage registered workspaces | +| Command | Description | +| --------------------------------------------------- | --------------------------------------------------------------- | +| [agh agent](/runtime/cli-reference/agent) | Inspect AGH agent definitions | +| [agh automation](/runtime/cli-reference/automation) | Manage automation jobs, triggers, and runs | +| [agh bridge](/runtime/cli-reference/bridge) | Manage bridge instances | +| [agh bundle](/runtime/cli-reference/bundle) | Manage extension bundle presets | +| [agh ch](/runtime/cli-reference/ch) | Use agent-facing coordination channels | +| [agh completion](/runtime/cli-reference/completion) | Generate the autocompletion script for the specified shell | +| [agh config](/runtime/cli-reference/config) | Inspect and mutate AGH configuration | +| [agh daemon](/runtime/cli-reference/daemon) | Manage the AGH daemon | +| [agh extension](/runtime/cli-reference/extension) | Manage AGH extensions | +| [agh hooks](/runtime/cli-reference/hooks) | Inspect configured and executed hooks | +| [agh install](/runtime/cli-reference/install) | Bootstrap AGH and create the default general agent | +| [agh mcp](/runtime/cli-reference/mcp) | Manage MCP integrations | +| [agh me](/runtime/cli-reference/me) | Inspect the current AGH-managed agent session | +| [agh memory](/runtime/cli-reference/memory) | Show, write, search, and operate Memory v2 durable context | +| [agh network](/runtime/cli-reference/network) | Operate the daemon-owned network runtime | +| [agh observe](/runtime/cli-reference/observe) | Query global observability state | +| [agh provider](/runtime/cli-reference/provider) | Inspect and manage provider authentication | +| [agh resource](/runtime/cli-reference/resource) | Manage desired-state resources | +| [agh session](/runtime/cli-reference/session) | Manage AGH sessions | +| [agh skill](/runtime/cli-reference/skill) | Manage local AgentSkills | +| [agh spawn](/runtime/cli-reference/spawn) | Spawn a bounded child agent session | +| [agh task](/runtime/cli-reference/task) | Manage tasks and task runs | +| [agh tool](/runtime/cli-reference/tool) | Inspect and invoke registry tools | +| [agh toolsets](/runtime/cli-reference/toolsets) | Inspect registry toolsets | +| [agh uninstall](/runtime/cli-reference/uninstall) | Stop AGH and remove runtime launch artifacts | +| [agh update](/runtime/cli-reference/update) | Check for and apply the latest stable AGH release | +| [agh vault](/runtime/cli-reference/vault) | Manage encrypted daemon vault metadata and write-only secrets | +| [agh version](/runtime/cli-reference/version) | Print the AGH version | +| [agh whoami](/runtime/cli-reference/whoami) | Print the current AGH agent identity from environment variables | +| [agh workspace](/runtime/cli-reference/workspace) | Manage registered workspaces | diff --git a/packages/site/content/runtime/cli-reference/automation/index.mdx b/packages/site/content/runtime/cli-reference/automation/index.mdx index 857974d0c..251207b59 100644 --- a/packages/site/content/runtime/cli-reference/automation/index.mdx +++ b/packages/site/content/runtime/cli-reference/automation/index.mdx @@ -41,8 +41,8 @@ agh automation -o json ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh automation jobs](/runtime/cli-reference/automation/jobs) | Manage automation jobs | -| [agh automation runs](/runtime/cli-reference/automation/runs) | Inspect automation run history | -| [agh automation triggers](/runtime/cli-reference/automation/triggers) | Manage automation triggers | +| Command | Description | +| --------------------------------------------------------------------- | ------------------------------ | +| [agh automation jobs](/runtime/cli-reference/automation/jobs) | Manage automation jobs | +| [agh automation runs](/runtime/cli-reference/automation/runs) | Inspect automation run history | +| [agh automation triggers](/runtime/cli-reference/automation/triggers) | Manage automation triggers | diff --git a/packages/site/content/runtime/cli-reference/automation/jobs/index.mdx b/packages/site/content/runtime/cli-reference/automation/jobs/index.mdx index e1474db78..5795e889a 100644 --- a/packages/site/content/runtime/cli-reference/automation/jobs/index.mdx +++ b/packages/site/content/runtime/cli-reference/automation/jobs/index.mdx @@ -45,11 +45,11 @@ agh automation jobs -o json ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh automation jobs create](/runtime/cli-reference/automation/jobs/create) | Create an automation job | -| [agh automation jobs delete](/runtime/cli-reference/automation/jobs/delete) | Delete an automation job | -| [agh automation jobs get](/runtime/cli-reference/automation/jobs/get) | Show one automation job | +| Command | Description | +| ----------------------------------------------------------------------------- | --------------------------------------- | +| [agh automation jobs create](/runtime/cli-reference/automation/jobs/create) | Create an automation job | +| [agh automation jobs delete](/runtime/cli-reference/automation/jobs/delete) | Delete an automation job | +| [agh automation jobs get](/runtime/cli-reference/automation/jobs/get) | Show one automation job | | [agh automation jobs history](/runtime/cli-reference/automation/jobs/history) | Show run history for one automation job | -| [agh automation jobs trigger](/runtime/cli-reference/automation/jobs/trigger) | Force an immediate automation job run | -| [agh automation jobs update](/runtime/cli-reference/automation/jobs/update) | Update an automation job | +| [agh automation jobs trigger](/runtime/cli-reference/automation/jobs/trigger) | Force an immediate automation job run | +| [agh automation jobs update](/runtime/cli-reference/automation/jobs/update) | Update an automation job | diff --git a/packages/site/content/runtime/cli-reference/automation/runs/index.mdx b/packages/site/content/runtime/cli-reference/automation/runs/index.mdx index 031f6db87..d7708eb72 100644 --- a/packages/site/content/runtime/cli-reference/automation/runs/index.mdx +++ b/packages/site/content/runtime/cli-reference/automation/runs/index.mdx @@ -47,6 +47,6 @@ agh automation runs -o json ## Subcommands -| Command | Description | -| ------- | ----------- | +| Command | Description | +| --------------------------------------------------------------------- | ----------------------- | | [agh automation runs get](/runtime/cli-reference/automation/runs/get) | Show one automation run | diff --git a/packages/site/content/runtime/cli-reference/automation/triggers/index.mdx b/packages/site/content/runtime/cli-reference/automation/triggers/index.mdx index ed17c0e3a..a35ae3c35 100644 --- a/packages/site/content/runtime/cli-reference/automation/triggers/index.mdx +++ b/packages/site/content/runtime/cli-reference/automation/triggers/index.mdx @@ -46,10 +46,10 @@ agh automation triggers -o json ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh automation triggers create](/runtime/cli-reference/automation/triggers/create) | Create an automation trigger | -| [agh automation triggers delete](/runtime/cli-reference/automation/triggers/delete) | Delete an automation trigger | -| [agh automation triggers get](/runtime/cli-reference/automation/triggers/get) | Show one automation trigger | +| Command | Description | +| ------------------------------------------------------------------------------------- | ------------------------------------------- | +| [agh automation triggers create](/runtime/cli-reference/automation/triggers/create) | Create an automation trigger | +| [agh automation triggers delete](/runtime/cli-reference/automation/triggers/delete) | Delete an automation trigger | +| [agh automation triggers get](/runtime/cli-reference/automation/triggers/get) | Show one automation trigger | | [agh automation triggers history](/runtime/cli-reference/automation/triggers/history) | Show run history for one automation trigger | -| [agh automation triggers update](/runtime/cli-reference/automation/triggers/update) | Update an automation trigger | +| [agh automation triggers update](/runtime/cli-reference/automation/triggers/update) | Update an automation trigger | diff --git a/packages/site/content/runtime/cli-reference/bridge/index.mdx b/packages/site/content/runtime/cli-reference/bridge/index.mdx index 201b4ba96..aea68d9d3 100644 --- a/packages/site/content/runtime/cli-reference/bridge/index.mdx +++ b/packages/site/content/runtime/cli-reference/bridge/index.mdx @@ -29,18 +29,17 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh bridge create](/runtime/cli-reference/bridge/create) | Create a bridge instance | -| [agh bridge disable](/runtime/cli-reference/bridge/disable) | Disable a bridge instance | -| [agh bridge enable](/runtime/cli-reference/bridge/enable) | Enable a bridge instance | -| [agh bridge get](/runtime/cli-reference/bridge/get) | Show one bridge instance | -| [agh bridge list](/runtime/cli-reference/bridge/list) | List bridge instances | -| [agh bridge restart](/runtime/cli-reference/bridge/restart) | Restart a bridge instance | -| [agh bridge routes](/runtime/cli-reference/bridge/routes) | Inspect routes for one bridge instance | -| [agh bridge secret-bindings](/runtime/cli-reference/bridge/secret-bindings) | Manage bridge secret bindings | -| [agh bridge test-delivery](/runtime/cli-reference/bridge/test-delivery) | Resolve a typed outbound delivery target for one bridge instance | -| [agh bridge update](/runtime/cli-reference/bridge/update) | Update mutable bridge fields | +| Command | Description | +| --------------------------------------------------------------------------- | ---------------------------------------------------------------- | +| [agh bridge create](/runtime/cli-reference/bridge/create) | Create a bridge instance | +| [agh bridge disable](/runtime/cli-reference/bridge/disable) | Disable a bridge instance | +| [agh bridge enable](/runtime/cli-reference/bridge/enable) | Enable a bridge instance | +| [agh bridge get](/runtime/cli-reference/bridge/get) | Show one bridge instance | +| [agh bridge list](/runtime/cli-reference/bridge/list) | List bridge instances | +| [agh bridge restart](/runtime/cli-reference/bridge/restart) | Restart a bridge instance | +| [agh bridge routes](/runtime/cli-reference/bridge/routes) | Inspect routes for one bridge instance | +| [agh bridge secret-bindings](/runtime/cli-reference/bridge/secret-bindings) | Manage bridge secret bindings | +| [agh bridge test-delivery](/runtime/cli-reference/bridge/test-delivery) | Resolve a typed outbound delivery target for one bridge instance | +| [agh bridge update](/runtime/cli-reference/bridge/update) | Update mutable bridge fields | diff --git a/packages/site/content/runtime/cli-reference/bridge/secret-bindings/index.mdx b/packages/site/content/runtime/cli-reference/bridge/secret-bindings/index.mdx index 74be516cb..11ac3b267 100644 --- a/packages/site/content/runtime/cli-reference/bridge/secret-bindings/index.mdx +++ b/packages/site/content/runtime/cli-reference/bridge/secret-bindings/index.mdx @@ -41,8 +41,8 @@ agh bridge secret-bindings -o json ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh bridge secret-bindings delete](/runtime/cli-reference/bridge/secret-bindings/delete) | Delete one bridge secret binding | -| [agh bridge secret-bindings list](/runtime/cli-reference/bridge/secret-bindings/list) | List secret bindings for one bridge instance | -| [agh bridge secret-bindings put](/runtime/cli-reference/bridge/secret-bindings/put) | Create or update one bridge secret binding | +| Command | Description | +| ----------------------------------------------------------------------------------------- | -------------------------------------------- | +| [agh bridge secret-bindings delete](/runtime/cli-reference/bridge/secret-bindings/delete) | Delete one bridge secret binding | +| [agh bridge secret-bindings list](/runtime/cli-reference/bridge/secret-bindings/list) | List secret bindings for one bridge instance | +| [agh bridge secret-bindings put](/runtime/cli-reference/bridge/secret-bindings/put) | Create or update one bridge secret binding | diff --git a/packages/site/content/runtime/cli-reference/bundle/index.mdx b/packages/site/content/runtime/cli-reference/bundle/index.mdx index fa1a7c3c6..125a6a9cf 100644 --- a/packages/site/content/runtime/cli-reference/bundle/index.mdx +++ b/packages/site/content/runtime/cli-reference/bundle/index.mdx @@ -29,16 +29,15 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh bundle activate](/runtime/cli-reference/bundle/activate) | Activate a bundle preset | -| [agh bundle catalog](/runtime/cli-reference/bundle/catalog) | List available extension bundle presets | -| [agh bundle deactivate](/runtime/cli-reference/bundle/deactivate) | Deactivate a bundle preset and remove owned resources | -| [agh bundle get](/runtime/cli-reference/bundle/get) | Show one bundle activation | -| [agh bundle list](/runtime/cli-reference/bundle/list) | List active bundle presets | -| [agh bundle network-settings](/runtime/cli-reference/bundle/network-settings) | Show bundle-derived network settings | -| [agh bundle preview](/runtime/cli-reference/bundle/preview) | Preview a bundle activation without writing resources | -| [agh bundle update](/runtime/cli-reference/bundle/update) | Update bundle activation overlays | +| Command | Description | +| ----------------------------------------------------------------------------- | ----------------------------------------------------- | +| [agh bundle activate](/runtime/cli-reference/bundle/activate) | Activate a bundle preset | +| [agh bundle catalog](/runtime/cli-reference/bundle/catalog) | List available extension bundle presets | +| [agh bundle deactivate](/runtime/cli-reference/bundle/deactivate) | Deactivate a bundle preset and remove owned resources | +| [agh bundle get](/runtime/cli-reference/bundle/get) | Show one bundle activation | +| [agh bundle list](/runtime/cli-reference/bundle/list) | List active bundle presets | +| [agh bundle network-settings](/runtime/cli-reference/bundle/network-settings) | Show bundle-derived network settings | +| [agh bundle preview](/runtime/cli-reference/bundle/preview) | Preview a bundle activation without writing resources | +| [agh bundle update](/runtime/cli-reference/bundle/update) | Update bundle activation overlays | diff --git a/packages/site/content/runtime/cli-reference/ch/index.mdx b/packages/site/content/runtime/cli-reference/ch/index.mdx index 6104c3204..bb0b63084 100644 --- a/packages/site/content/runtime/cli-reference/ch/index.mdx +++ b/packages/site/content/runtime/cli-reference/ch/index.mdx @@ -47,9 +47,9 @@ agh ch list -o json ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh ch list](/runtime/cli-reference/ch/list) | List coordination channels visible to the current agent session | -| [agh ch recv](/runtime/cli-reference/ch/recv) | Receive queued coordination messages for a channel | -| [agh ch reply](/runtime/cli-reference/ch/reply) | Reply to a received coordination message | -| [agh ch send](/runtime/cli-reference/ch/send) | Send one task-run coordination message | +| Command | Description | +| ----------------------------------------------- | --------------------------------------------------------------- | +| [agh ch list](/runtime/cli-reference/ch/list) | List coordination channels visible to the current agent session | +| [agh ch recv](/runtime/cli-reference/ch/recv) | Receive queued coordination messages for a channel | +| [agh ch reply](/runtime/cli-reference/ch/reply) | Reply to a received coordination message | +| [agh ch send](/runtime/cli-reference/ch/send) | Send one task-run coordination message | diff --git a/packages/site/content/runtime/cli-reference/completion/bash.mdx b/packages/site/content/runtime/cli-reference/completion/bash.mdx index bc1c5b248..459e57e1f 100644 --- a/packages/site/content/runtime/cli-reference/completion/bash.mdx +++ b/packages/site/content/runtime/cli-reference/completion/bash.mdx @@ -20,6 +20,7 @@ To load completions in your current shell session: source <(agh completion bash) ``` + To load completions for every new session, execute once: #### Linux: @@ -28,14 +29,15 @@ To load completions for every new session, execute once: agh completion bash > /etc/bash_completion.d/agh ``` + #### macOS: ``` agh completion bash > $(brew --prefix)/etc/bash_completion.d/agh ``` -You will need to start a new shell for this setup to take effect. +You will need to start a new shell for this setup to take effect. ``` agh completion bash diff --git a/packages/site/content/runtime/cli-reference/completion/fish.mdx b/packages/site/content/runtime/cli-reference/completion/fish.mdx index 0e238c948..7cd2fed41 100644 --- a/packages/site/content/runtime/cli-reference/completion/fish.mdx +++ b/packages/site/content/runtime/cli-reference/completion/fish.mdx @@ -17,14 +17,15 @@ To load completions in your current shell session: agh completion fish | source ``` + To load completions for every new session, execute once: ``` agh completion fish > ~/.config/fish/completions/agh.fish ``` -You will need to start a new shell for this setup to take effect. +You will need to start a new shell for this setup to take effect. ``` agh completion fish [flags] diff --git a/packages/site/content/runtime/cli-reference/completion/index.mdx b/packages/site/content/runtime/cli-reference/completion/index.mdx index 610062738..f7e3562a1 100644 --- a/packages/site/content/runtime/cli-reference/completion/index.mdx +++ b/packages/site/content/runtime/cli-reference/completion/index.mdx @@ -12,7 +12,6 @@ Generate the autocompletion script for the specified shell Generate the autocompletion script for agh for the specified shell. See each sub-command's help for details on how to use the generated script. - ### Options ``` @@ -35,12 +34,11 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh completion bash](/runtime/cli-reference/completion/bash) | Generate the autocompletion script for bash | -| [agh completion fish](/runtime/cli-reference/completion/fish) | Generate the autocompletion script for fish | +| Command | Description | +| ------------------------------------------------------------------------- | ------------------------------------------------- | +| [agh completion bash](/runtime/cli-reference/completion/bash) | Generate the autocompletion script for bash | +| [agh completion fish](/runtime/cli-reference/completion/fish) | Generate the autocompletion script for fish | | [agh completion powershell](/runtime/cli-reference/completion/powershell) | Generate the autocompletion script for powershell | -| [agh completion zsh](/runtime/cli-reference/completion/zsh) | Generate the autocompletion script for zsh | +| [agh completion zsh](/runtime/cli-reference/completion/zsh) | Generate the autocompletion script for zsh | diff --git a/packages/site/content/runtime/cli-reference/completion/powershell.mdx b/packages/site/content/runtime/cli-reference/completion/powershell.mdx index 7a8991c28..8c3bf965d 100644 --- a/packages/site/content/runtime/cli-reference/completion/powershell.mdx +++ b/packages/site/content/runtime/cli-reference/completion/powershell.mdx @@ -17,10 +17,10 @@ To load completions in your current shell session: agh completion powershell | Out-String | Invoke-Expression ``` + To load completions for every new session, add the output of the above command to your powershell profile. - ``` agh completion powershell [flags] ``` diff --git a/packages/site/content/runtime/cli-reference/completion/zsh.mdx b/packages/site/content/runtime/cli-reference/completion/zsh.mdx index 783e3da2b..1ce4c4700 100644 --- a/packages/site/content/runtime/cli-reference/completion/zsh.mdx +++ b/packages/site/content/runtime/cli-reference/completion/zsh.mdx @@ -12,18 +12,20 @@ Generate the autocompletion script for zsh Generate the autocompletion script for the zsh shell. If shell completion is not already enabled in your environment you will need -to enable it. You can execute the following once: +to enable it. You can execute the following once: ``` echo "autoload -U compinit; compinit" >> ~/.zshrc ``` + To load completions in your current shell session: ``` source <(agh completion zsh) ``` + To load completions for every new session, execute once: #### Linux: @@ -32,14 +34,15 @@ To load completions for every new session, execute once: agh completion zsh > "${fpath[1]}/_agh" ``` + #### macOS: ``` agh completion zsh > $(brew --prefix)/share/zsh/site-functions/_agh ``` -You will need to start a new shell for this setup to take effect. +You will need to start a new shell for this setup to take effect. ``` agh completion zsh [flags] diff --git a/packages/site/content/runtime/cli-reference/config/index.mdx b/packages/site/content/runtime/cli-reference/config/index.mdx index 388b83c35..d7992a043 100644 --- a/packages/site/content/runtime/cli-reference/config/index.mdx +++ b/packages/site/content/runtime/cli-reference/config/index.mdx @@ -29,16 +29,15 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh config check](/runtime/cli-reference/config/check) | Alias for config validate | -| [agh config edit](/runtime/cli-reference/config/edit) | Open the selected config overlay in $VISUAL or $EDITOR | -| [agh config get](/runtime/cli-reference/config/get) | Get one redacted effective config value | -| [agh config list](/runtime/cli-reference/config/list) | List redacted effective config values | -| [agh config path](/runtime/cli-reference/config/path) | Show resolved AGH config paths | -| [agh config set](/runtime/cli-reference/config/set) | Set one config value through the validated config writer | -| [agh config show](/runtime/cli-reference/config/show) | Show the redacted effective config | -| [agh config validate](/runtime/cli-reference/config/validate) | Validate AGH configuration | +| Command | Description | +| ------------------------------------------------------------- | -------------------------------------------------------- | +| [agh config check](/runtime/cli-reference/config/check) | Alias for config validate | +| [agh config edit](/runtime/cli-reference/config/edit) | Open the selected config overlay in $VISUAL or $EDITOR | +| [agh config get](/runtime/cli-reference/config/get) | Get one redacted effective config value | +| [agh config list](/runtime/cli-reference/config/list) | List redacted effective config values | +| [agh config path](/runtime/cli-reference/config/path) | Show resolved AGH config paths | +| [agh config set](/runtime/cli-reference/config/set) | Set one config value through the validated config writer | +| [agh config show](/runtime/cli-reference/config/show) | Show the redacted effective config | +| [agh config validate](/runtime/cli-reference/config/validate) | Validate AGH configuration | diff --git a/packages/site/content/runtime/cli-reference/daemon/index.mdx b/packages/site/content/runtime/cli-reference/daemon/index.mdx index e58057d73..fd9e886fe 100644 --- a/packages/site/content/runtime/cli-reference/daemon/index.mdx +++ b/packages/site/content/runtime/cli-reference/daemon/index.mdx @@ -29,11 +29,10 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh daemon start](/runtime/cli-reference/daemon/start) | Start the AGH daemon | -| [agh daemon status](/runtime/cli-reference/daemon/status) | Show daemon status | -| [agh daemon stop](/runtime/cli-reference/daemon/stop) | Stop the AGH daemon | +| Command | Description | +| --------------------------------------------------------- | -------------------- | +| [agh daemon start](/runtime/cli-reference/daemon/start) | Start the AGH daemon | +| [agh daemon status](/runtime/cli-reference/daemon/status) | Show daemon status | +| [agh daemon stop](/runtime/cli-reference/daemon/stop) | Stop the AGH daemon | diff --git a/packages/site/content/runtime/cli-reference/extension/index.mdx b/packages/site/content/runtime/cli-reference/extension/index.mdx index c1495841d..bc3dd7a59 100644 --- a/packages/site/content/runtime/cli-reference/extension/index.mdx +++ b/packages/site/content/runtime/cli-reference/extension/index.mdx @@ -29,16 +29,15 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh extension disable](/runtime/cli-reference/extension/disable) | Disable an installed extension | -| [agh extension enable](/runtime/cli-reference/extension/enable) | Enable an installed extension | +| Command | Description | +| ----------------------------------------------------------------- | --------------------------------------------------------- | +| [agh extension disable](/runtime/cli-reference/extension/disable) | Disable an installed extension | +| [agh extension enable](/runtime/cli-reference/extension/enable) | Enable an installed extension | | [agh extension install](/runtime/cli-reference/extension/install) | Install a local extension or download one from a registry | -| [agh extension list](/runtime/cli-reference/extension/list) | List installed extensions | -| [agh extension remove](/runtime/cli-reference/extension/remove) | Remove an installed extension from disk and the registry | -| [agh extension search](/runtime/cli-reference/extension/search) | Search remote extension registries | -| [agh extension status](/runtime/cli-reference/extension/status) | Show extension runtime status | -| [agh extension update](/runtime/cli-reference/extension/update) | Check for or install updates for marketplace extensions | +| [agh extension list](/runtime/cli-reference/extension/list) | List installed extensions | +| [agh extension remove](/runtime/cli-reference/extension/remove) | Remove an installed extension from disk and the registry | +| [agh extension search](/runtime/cli-reference/extension/search) | Search remote extension registries | +| [agh extension status](/runtime/cli-reference/extension/status) | Show extension runtime status | +| [agh extension update](/runtime/cli-reference/extension/update) | Check for or install updates for marketplace extensions | diff --git a/packages/site/content/runtime/cli-reference/hooks/index.mdx b/packages/site/content/runtime/cli-reference/hooks/index.mdx index c7e2b67e8..f9cd7728c 100644 --- a/packages/site/content/runtime/cli-reference/hooks/index.mdx +++ b/packages/site/content/runtime/cli-reference/hooks/index.mdx @@ -29,12 +29,11 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh hooks events](/runtime/cli-reference/hooks/events) | List supported hook events | -| [agh hooks info](/runtime/cli-reference/hooks/info) | Show detailed information for one or more hooks by name | -| [agh hooks list](/runtime/cli-reference/hooks/list) | List resolved hooks in pipeline order | -| [agh hooks runs](/runtime/cli-reference/hooks/runs) | Show persisted hook execution history | +| Command | Description | +| ------------------------------------------------------- | ------------------------------------------------------- | +| [agh hooks events](/runtime/cli-reference/hooks/events) | List supported hook events | +| [agh hooks info](/runtime/cli-reference/hooks/info) | Show detailed information for one or more hooks by name | +| [agh hooks list](/runtime/cli-reference/hooks/list) | List resolved hooks in pipeline order | +| [agh hooks runs](/runtime/cli-reference/hooks/runs) | Show persisted hook execution history | diff --git a/packages/site/content/runtime/cli-reference/mcp/auth/index.mdx b/packages/site/content/runtime/cli-reference/mcp/auth/index.mdx index c90338fa0..b69e3e3d9 100644 --- a/packages/site/content/runtime/cli-reference/mcp/auth/index.mdx +++ b/packages/site/content/runtime/cli-reference/mcp/auth/index.mdx @@ -29,11 +29,10 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh mcp auth login](/runtime/cli-reference/mcp/auth/login) | Run OAuth login for a remote MCP server | +| Command | Description | +| ------------------------------------------------------------- | --------------------------------------- | +| [agh mcp auth login](/runtime/cli-reference/mcp/auth/login) | Run OAuth login for a remote MCP server | | [agh mcp auth logout](/runtime/cli-reference/mcp/auth/logout) | Revoke or delete remote MCP auth tokens | -| [agh mcp auth status](/runtime/cli-reference/mcp/auth/status) | Show redacted remote MCP auth status | +| [agh mcp auth status](/runtime/cli-reference/mcp/auth/status) | Show redacted remote MCP auth status | diff --git a/packages/site/content/runtime/cli-reference/mcp/index.mdx b/packages/site/content/runtime/cli-reference/mcp/index.mdx index 7163a2a18..a9f70ee85 100644 --- a/packages/site/content/runtime/cli-reference/mcp/index.mdx +++ b/packages/site/content/runtime/cli-reference/mcp/index.mdx @@ -29,9 +29,8 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | +| Command | Description | +| ----------------------------------------------- | ------------------------------- | | [agh mcp auth](/runtime/cli-reference/mcp/auth) | Authenticate remote MCP servers | diff --git a/packages/site/content/runtime/cli-reference/me/index.mdx b/packages/site/content/runtime/cli-reference/me/index.mdx index 834e4db45..01502917a 100644 --- a/packages/site/content/runtime/cli-reference/me/index.mdx +++ b/packages/site/content/runtime/cli-reference/me/index.mdx @@ -51,6 +51,6 @@ agh me -o json ## Subcommands -| Command | Description | -| ------- | ----------- | +| Command | Description | +| --------------------------------------------------- | ------------------------------------------------------------------- | | [agh me context](/runtime/cli-reference/me/context) | Inspect the bounded situation context for the current agent session | diff --git a/packages/site/content/runtime/cli-reference/memory/adhoc/index.mdx b/packages/site/content/runtime/cli-reference/memory/adhoc/index.mdx index 0b6d4f77b..17f82854d 100644 --- a/packages/site/content/runtime/cli-reference/memory/adhoc/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/adhoc/index.mdx @@ -29,10 +29,9 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | +| Command | Description | +| ----------------------------------------------------------------- | -------------------------- | | [agh memory adhoc list](/runtime/cli-reference/memory/adhoc/list) | Reserved Memory v2 command | | [agh memory adhoc show](/runtime/cli-reference/memory/adhoc/show) | Reserved Memory v2 command | diff --git a/packages/site/content/runtime/cli-reference/memory/daily/index.mdx b/packages/site/content/runtime/cli-reference/memory/daily/index.mdx index 7702e7b35..4551ea22b 100644 --- a/packages/site/content/runtime/cli-reference/memory/daily/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/daily/index.mdx @@ -29,13 +29,12 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh memory daily archive](/runtime/cli-reference/memory/daily/archive) | Reserved Memory v2 command | -| [agh memory daily ls](/runtime/cli-reference/memory/daily/ls) | List Memory v2 daily operation logs | -| [agh memory daily purge](/runtime/cli-reference/memory/daily/purge) | Reserved Memory v2 command | -| [agh memory daily restore](/runtime/cli-reference/memory/daily/restore) | Reserved Memory v2 command | -| [agh memory daily show](/runtime/cli-reference/memory/daily/show) | Reserved Memory v2 command | +| Command | Description | +| ----------------------------------------------------------------------- | ----------------------------------- | +| [agh memory daily archive](/runtime/cli-reference/memory/daily/archive) | Reserved Memory v2 command | +| [agh memory daily ls](/runtime/cli-reference/memory/daily/ls) | List Memory v2 daily operation logs | +| [agh memory daily purge](/runtime/cli-reference/memory/daily/purge) | Reserved Memory v2 command | +| [agh memory daily restore](/runtime/cli-reference/memory/daily/restore) | Reserved Memory v2 command | +| [agh memory daily show](/runtime/cli-reference/memory/daily/show) | Reserved Memory v2 command | diff --git a/packages/site/content/runtime/cli-reference/memory/decisions/index.mdx b/packages/site/content/runtime/cli-reference/memory/decisions/index.mdx index 6de535bbd..842278ef4 100644 --- a/packages/site/content/runtime/cli-reference/memory/decisions/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/decisions/index.mdx @@ -29,11 +29,10 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh memory decisions list](/runtime/cli-reference/memory/decisions/list) | List Memory v2 controller decisions | +| Command | Description | +| ----------------------------------------------------------------------------- | ---------------------------------------- | +| [agh memory decisions list](/runtime/cli-reference/memory/decisions/list) | List Memory v2 controller decisions | | [agh memory decisions revert](/runtime/cli-reference/memory/decisions/revert) | Revert one Memory v2 controller decision | -| [agh memory decisions show](/runtime/cli-reference/memory/decisions/show) | Show one Memory v2 controller decision | +| [agh memory decisions show](/runtime/cli-reference/memory/decisions/show) | Show one Memory v2 controller decision | diff --git a/packages/site/content/runtime/cli-reference/memory/dream/index.mdx b/packages/site/content/runtime/cli-reference/memory/dream/index.mdx index 3da7d4919..205c1533f 100644 --- a/packages/site/content/runtime/cli-reference/memory/dream/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/dream/index.mdx @@ -29,12 +29,11 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh memory dream retry](/runtime/cli-reference/memory/dream/retry) | Retry one failed Memory v2 dreaming run | -| [agh memory dream show](/runtime/cli-reference/memory/dream/show) | Show one Memory v2 dreaming run | -| [agh memory dream status](/runtime/cli-reference/memory/dream/status) | Show Memory v2 dreaming runtime status | -| [agh memory dream trigger](/runtime/cli-reference/memory/dream/trigger) | Trigger Memory v2 dreaming | +| Command | Description | +| ----------------------------------------------------------------------- | --------------------------------------- | +| [agh memory dream retry](/runtime/cli-reference/memory/dream/retry) | Retry one failed Memory v2 dreaming run | +| [agh memory dream show](/runtime/cli-reference/memory/dream/show) | Show one Memory v2 dreaming run | +| [agh memory dream status](/runtime/cli-reference/memory/dream/status) | Show Memory v2 dreaming runtime status | +| [agh memory dream trigger](/runtime/cli-reference/memory/dream/trigger) | Trigger Memory v2 dreaming | diff --git a/packages/site/content/runtime/cli-reference/memory/extractor/index.mdx b/packages/site/content/runtime/cli-reference/memory/extractor/index.mdx index 8ec17c122..d15fc6d32 100644 --- a/packages/site/content/runtime/cli-reference/memory/extractor/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/extractor/index.mdx @@ -29,13 +29,12 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh memory extractor disable](/runtime/cli-reference/memory/extractor/disable) | Reserved Memory v2 command | -| [agh memory extractor drain](/runtime/cli-reference/memory/extractor/drain) | Drain Memory v2 extractor work | +| Command | Description | +| ----------------------------------------------------------------------------------------- | -------------------------------------------- | +| [agh memory extractor disable](/runtime/cli-reference/memory/extractor/disable) | Reserved Memory v2 command | +| [agh memory extractor drain](/runtime/cli-reference/memory/extractor/drain) | Drain Memory v2 extractor work | | [agh memory extractor list-pending](/runtime/cli-reference/memory/extractor/list-pending) | List Memory v2 extractor pending/DLQ records | -| [agh memory extractor replay](/runtime/cli-reference/memory/extractor/replay) | Replay Memory v2 extractor work | -| [agh memory extractor status](/runtime/cli-reference/memory/extractor/status) | Show Memory v2 extractor runtime status | +| [agh memory extractor replay](/runtime/cli-reference/memory/extractor/replay) | Replay Memory v2 extractor work | +| [agh memory extractor status](/runtime/cli-reference/memory/extractor/status) | Show Memory v2 extractor runtime status | diff --git a/packages/site/content/runtime/cli-reference/memory/index.mdx b/packages/site/content/runtime/cli-reference/memory/index.mdx index c0f138225..1b00cfae6 100644 --- a/packages/site/content/runtime/cli-reference/memory/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/index.mdx @@ -29,28 +29,27 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh memory adhoc](/runtime/cli-reference/memory/adhoc) | Inspect ad-hoc Memory v2 notes | -| [agh memory daily](/runtime/cli-reference/memory/daily) | Inspect Memory v2 daily operation logs | -| [agh memory decisions](/runtime/cli-reference/memory/decisions) | Inspect and revert Memory v2 controller decisions | -| [agh memory delete](/runtime/cli-reference/memory/delete) | Delete a Memory v2 entry through the controller | -| [agh memory dream](/runtime/cli-reference/memory/dream) | Operate Memory v2 dreaming runs | -| [agh memory edit](/runtime/cli-reference/memory/edit) | Edit a Memory v2 entry through the controller | -| [agh memory extractor](/runtime/cli-reference/memory/extractor) | Operate Memory v2 extractor runtime | -| [agh memory health](/runtime/cli-reference/memory/health) | Show Memory v2 health | -| [agh memory history](/runtime/cli-reference/memory/history) | Show redaction-safe Memory v2 operation history | -| [agh memory list](/runtime/cli-reference/memory/list) | List Memory v2 entries | -| [agh memory promote](/runtime/cli-reference/memory/promote) | Promote a memory entry across Memory v2 scopes | -| [agh memory provider](/runtime/cli-reference/memory/provider) | Operate Memory v2 providers | -| [agh memory recall](/runtime/cli-reference/memory/recall) | Inspect Memory v2 recall traces | -| [agh memory reindex](/runtime/cli-reference/memory/reindex) | Rebuild the derived Memory v2 search catalog | -| [agh memory reload](/runtime/cli-reference/memory/reload) | Invalidate frozen memory snapshots for future session boots | -| [agh memory reset](/runtime/cli-reference/memory/reset) | Reset derived Memory v2 state through the daemon | -| [agh memory scope-show](/runtime/cli-reference/memory/scope-show) | Show resolved Memory v2 precedence for a selector | -| [agh memory search](/runtime/cli-reference/memory/search) | Search deterministic Memory v2 recall | -| [agh memory show](/runtime/cli-reference/memory/show) | Show one Memory v2 entry | -| [agh memory write](/runtime/cli-reference/memory/write) | Create a Memory v2 entry through the controller | +| Command | Description | +| ----------------------------------------------------------------- | ----------------------------------------------------------- | +| [agh memory adhoc](/runtime/cli-reference/memory/adhoc) | Inspect ad-hoc Memory v2 notes | +| [agh memory daily](/runtime/cli-reference/memory/daily) | Inspect Memory v2 daily operation logs | +| [agh memory decisions](/runtime/cli-reference/memory/decisions) | Inspect and revert Memory v2 controller decisions | +| [agh memory delete](/runtime/cli-reference/memory/delete) | Delete a Memory v2 entry through the controller | +| [agh memory dream](/runtime/cli-reference/memory/dream) | Operate Memory v2 dreaming runs | +| [agh memory edit](/runtime/cli-reference/memory/edit) | Edit a Memory v2 entry through the controller | +| [agh memory extractor](/runtime/cli-reference/memory/extractor) | Operate Memory v2 extractor runtime | +| [agh memory health](/runtime/cli-reference/memory/health) | Show Memory v2 health | +| [agh memory history](/runtime/cli-reference/memory/history) | Show redaction-safe Memory v2 operation history | +| [agh memory list](/runtime/cli-reference/memory/list) | List Memory v2 entries | +| [agh memory promote](/runtime/cli-reference/memory/promote) | Promote a memory entry across Memory v2 scopes | +| [agh memory provider](/runtime/cli-reference/memory/provider) | Operate Memory v2 providers | +| [agh memory recall](/runtime/cli-reference/memory/recall) | Inspect Memory v2 recall traces | +| [agh memory reindex](/runtime/cli-reference/memory/reindex) | Rebuild the derived Memory v2 search catalog | +| [agh memory reload](/runtime/cli-reference/memory/reload) | Invalidate frozen memory snapshots for future session boots | +| [agh memory reset](/runtime/cli-reference/memory/reset) | Reset derived Memory v2 state through the daemon | +| [agh memory scope-show](/runtime/cli-reference/memory/scope-show) | Show resolved Memory v2 precedence for a selector | +| [agh memory search](/runtime/cli-reference/memory/search) | Search deterministic Memory v2 recall | +| [agh memory show](/runtime/cli-reference/memory/show) | Show one Memory v2 entry | +| [agh memory write](/runtime/cli-reference/memory/write) | Create a Memory v2 entry through the controller | diff --git a/packages/site/content/runtime/cli-reference/memory/provider/index.mdx b/packages/site/content/runtime/cli-reference/memory/provider/index.mdx index 97bb2b35d..0d109bef6 100644 --- a/packages/site/content/runtime/cli-reference/memory/provider/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/provider/index.mdx @@ -29,11 +29,10 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh memory provider disable](/runtime/cli-reference/memory/provider/disable) | Disable one Memory v2 provider | -| [agh memory provider enable](/runtime/cli-reference/memory/provider/enable) | Enable and select one Memory v2 provider | -| [agh memory provider list](/runtime/cli-reference/memory/provider/list) | List registered Memory v2 providers | +| Command | Description | +| ----------------------------------------------------------------------------- | ---------------------------------------- | +| [agh memory provider disable](/runtime/cli-reference/memory/provider/disable) | Disable one Memory v2 provider | +| [agh memory provider enable](/runtime/cli-reference/memory/provider/enable) | Enable and select one Memory v2 provider | +| [agh memory provider list](/runtime/cli-reference/memory/provider/list) | List registered Memory v2 providers | diff --git a/packages/site/content/runtime/cli-reference/memory/recall/index.mdx b/packages/site/content/runtime/cli-reference/memory/recall/index.mdx index ae5afcbf3..5c3188ce0 100644 --- a/packages/site/content/runtime/cli-reference/memory/recall/index.mdx +++ b/packages/site/content/runtime/cli-reference/memory/recall/index.mdx @@ -29,9 +29,8 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | +| Command | Description | +| --------------------------------------------------------------------- | ------------------------------------ | | [agh memory recall trace](/runtime/cli-reference/memory/recall/trace) | Show one redaction-safe recall trace | diff --git a/packages/site/content/runtime/cli-reference/network/directs/index.mdx b/packages/site/content/runtime/cli-reference/network/directs/index.mdx index 5cb5684f4..8643f897e 100644 --- a/packages/site/content/runtime/cli-reference/network/directs/index.mdx +++ b/packages/site/content/runtime/cli-reference/network/directs/index.mdx @@ -29,12 +29,11 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh network directs list](/runtime/cli-reference/network/directs/list) | List direct rooms in a channel | -| [agh network directs messages](/runtime/cli-reference/network/directs/messages) | List messages in one direct room | -| [agh network directs resolve](/runtime/cli-reference/network/directs/resolve) | Create or return the deterministic direct room for two peers | -| [agh network directs show](/runtime/cli-reference/network/directs/show) | Show one direct room | +| Command | Description | +| ------------------------------------------------------------------------------- | ------------------------------------------------------------ | +| [agh network directs list](/runtime/cli-reference/network/directs/list) | List direct rooms in a channel | +| [agh network directs messages](/runtime/cli-reference/network/directs/messages) | List messages in one direct room | +| [agh network directs resolve](/runtime/cli-reference/network/directs/resolve) | Create or return the deterministic direct room for two peers | +| [agh network directs show](/runtime/cli-reference/network/directs/show) | Show one direct room | diff --git a/packages/site/content/runtime/cli-reference/network/index.mdx b/packages/site/content/runtime/cli-reference/network/index.mdx index ac6fe1b8d..0813df9f1 100644 --- a/packages/site/content/runtime/cli-reference/network/index.mdx +++ b/packages/site/content/runtime/cli-reference/network/index.mdx @@ -29,16 +29,15 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh network channels](/runtime/cli-reference/network/channels) | List active runtime channels | -| [agh network directs](/runtime/cli-reference/network/directs) | Inspect restricted direct rooms | -| [agh network inbox](/runtime/cli-reference/network/inbox) | Show queued inbound messages for one session | -| [agh network peers](/runtime/cli-reference/network/peers) | List visible local and remote peers | -| [agh network send](/runtime/cli-reference/network/send) | Send one envelope through the daemon-owned network runtime | -| [agh network status](/runtime/cli-reference/network/status) | Show network runtime status and queue metrics | -| [agh network threads](/runtime/cli-reference/network/threads) | Inspect public network threads | -| [agh network work](/runtime/cli-reference/network/work) | Inspect lifecycle-bearing network work | +| Command | Description | +| --------------------------------------------------------------- | ---------------------------------------------------------- | +| [agh network channels](/runtime/cli-reference/network/channels) | List active runtime channels | +| [agh network directs](/runtime/cli-reference/network/directs) | Inspect restricted direct rooms | +| [agh network inbox](/runtime/cli-reference/network/inbox) | Show queued inbound messages for one session | +| [agh network peers](/runtime/cli-reference/network/peers) | List visible local and remote peers | +| [agh network send](/runtime/cli-reference/network/send) | Send one envelope through the daemon-owned network runtime | +| [agh network status](/runtime/cli-reference/network/status) | Show network runtime status and queue metrics | +| [agh network threads](/runtime/cli-reference/network/threads) | Inspect public network threads | +| [agh network work](/runtime/cli-reference/network/work) | Inspect lifecycle-bearing network work | diff --git a/packages/site/content/runtime/cli-reference/network/threads/index.mdx b/packages/site/content/runtime/cli-reference/network/threads/index.mdx index 41fcfa52f..bc4cddd0e 100644 --- a/packages/site/content/runtime/cli-reference/network/threads/index.mdx +++ b/packages/site/content/runtime/cli-reference/network/threads/index.mdx @@ -29,11 +29,10 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh network threads list](/runtime/cli-reference/network/threads/list) | List public threads in a channel | +| Command | Description | +| ------------------------------------------------------------------------------- | ---------------------------------- | +| [agh network threads list](/runtime/cli-reference/network/threads/list) | List public threads in a channel | | [agh network threads messages](/runtime/cli-reference/network/threads/messages) | List messages in one public thread | -| [agh network threads show](/runtime/cli-reference/network/threads/show) | Show one public thread | +| [agh network threads show](/runtime/cli-reference/network/threads/show) | Show one public thread | diff --git a/packages/site/content/runtime/cli-reference/network/work/index.mdx b/packages/site/content/runtime/cli-reference/network/work/index.mdx index 23bc9bd06..6db599f08 100644 --- a/packages/site/content/runtime/cli-reference/network/work/index.mdx +++ b/packages/site/content/runtime/cli-reference/network/work/index.mdx @@ -29,10 +29,9 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | +| Command | Description | +| --------------------------------------------------------------------- | -------------------------- | | [agh network work lookup](/runtime/cli-reference/network/work/lookup) | Show one network work item | | [agh network work status](/runtime/cli-reference/network/work/status) | Show one network work item | diff --git a/packages/site/content/runtime/cli-reference/observe/index.mdx b/packages/site/content/runtime/cli-reference/observe/index.mdx index 2be4091c0..23875b096 100644 --- a/packages/site/content/runtime/cli-reference/observe/index.mdx +++ b/packages/site/content/runtime/cli-reference/observe/index.mdx @@ -29,10 +29,9 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | +| Command | Description | +| ----------------------------------------------------------- | --------------------------------------- | | [agh observe events](/runtime/cli-reference/observe/events) | Read cross-session observability events | -| [agh observe health](/runtime/cli-reference/observe/health) | Show observability health | +| [agh observe health](/runtime/cli-reference/observe/health) | Show observability health | diff --git a/packages/site/content/runtime/cli-reference/provider/auth/index.mdx b/packages/site/content/runtime/cli-reference/provider/auth/index.mdx index cc9075979..f90c388d9 100644 --- a/packages/site/content/runtime/cli-reference/provider/auth/index.mdx +++ b/packages/site/content/runtime/cli-reference/provider/auth/index.mdx @@ -29,10 +29,9 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh provider auth login](/runtime/cli-reference/provider/auth/login) | Run the provider native login command | -| [agh provider auth status](/runtime/cli-reference/provider/auth/status) | Show provider authentication status | +| Command | Description | +| ----------------------------------------------------------------------- | ------------------------------------- | +| [agh provider auth login](/runtime/cli-reference/provider/auth/login) | Run the provider native login command | +| [agh provider auth status](/runtime/cli-reference/provider/auth/status) | Show provider authentication status | diff --git a/packages/site/content/runtime/cli-reference/provider/index.mdx b/packages/site/content/runtime/cli-reference/provider/index.mdx index d48a33452..fe3632993 100644 --- a/packages/site/content/runtime/cli-reference/provider/index.mdx +++ b/packages/site/content/runtime/cli-reference/provider/index.mdx @@ -29,10 +29,9 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh provider auth](/runtime/cli-reference/provider/auth) | Inspect native CLI and bound-secret provider authentication | -| [agh provider models](/runtime/cli-reference/provider/models) | Inspect and refresh the provider model catalog | +| Command | Description | +| ------------------------------------------------------------- | ----------------------------------------------------------- | +| [agh provider auth](/runtime/cli-reference/provider/auth) | Inspect native CLI and bound-secret provider authentication | +| [agh provider models](/runtime/cli-reference/provider/models) | Inspect and refresh the provider model catalog | diff --git a/packages/site/content/runtime/cli-reference/provider/models/index.mdx b/packages/site/content/runtime/cli-reference/provider/models/index.mdx index 345d14560..bb1a45486 100644 --- a/packages/site/content/runtime/cli-reference/provider/models/index.mdx +++ b/packages/site/content/runtime/cli-reference/provider/models/index.mdx @@ -29,11 +29,10 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh provider models list](/runtime/cli-reference/provider/models/list) | List provider model catalog entries | -| [agh provider models refresh](/runtime/cli-reference/provider/models/refresh) | Refresh provider model catalog sources | -| [agh provider models status](/runtime/cli-reference/provider/models/status) | Show provider model catalog source status | +| Command | Description | +| ----------------------------------------------------------------------------- | ----------------------------------------- | +| [agh provider models list](/runtime/cli-reference/provider/models/list) | List provider model catalog entries | +| [agh provider models refresh](/runtime/cli-reference/provider/models/refresh) | Refresh provider model catalog sources | +| [agh provider models status](/runtime/cli-reference/provider/models/status) | Show provider model catalog source status | diff --git a/packages/site/content/runtime/cli-reference/resource/index.mdx b/packages/site/content/runtime/cli-reference/resource/index.mdx index d0c23baf4..8984e84ea 100644 --- a/packages/site/content/runtime/cli-reference/resource/index.mdx +++ b/packages/site/content/runtime/cli-reference/resource/index.mdx @@ -41,9 +41,9 @@ agh resource -o json ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh resource delete](/runtime/cli-reference/resource/delete) | Delete one desired-state resource | -| [agh resource get](/runtime/cli-reference/resource/get) | Show one desired-state resource | -| [agh resource list](/runtime/cli-reference/resource/list) | List desired-state resources | -| [agh resource put](/runtime/cli-reference/resource/put) | Create or update one desired-state resource | +| Command | Description | +| ------------------------------------------------------------- | ------------------------------------------- | +| [agh resource delete](/runtime/cli-reference/resource/delete) | Delete one desired-state resource | +| [agh resource get](/runtime/cli-reference/resource/get) | Show one desired-state resource | +| [agh resource list](/runtime/cli-reference/resource/list) | List desired-state resources | +| [agh resource put](/runtime/cli-reference/resource/put) | Create or update one desired-state resource | diff --git a/packages/site/content/runtime/cli-reference/session/index.mdx b/packages/site/content/runtime/cli-reference/session/index.mdx index c999cc675..6e85ff786 100644 --- a/packages/site/content/runtime/cli-reference/session/index.mdx +++ b/packages/site/content/runtime/cli-reference/session/index.mdx @@ -29,22 +29,21 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh session approve](/runtime/cli-reference/session/approve) | Approve or reject a pending session permission request | -| [agh session events](/runtime/cli-reference/session/events) | Read session events | -| [agh session health](/runtime/cli-reference/session/health) | Read session health and wake eligibility | -| [agh session history](/runtime/cli-reference/session/history) | Show session history grouped by turn | +| Command | Description | +| ------------------------------------------------------------- | ---------------------------------------------------------- | +| [agh session approve](/runtime/cli-reference/session/approve) | Approve or reject a pending session permission request | +| [agh session events](/runtime/cli-reference/session/events) | Read session events | +| [agh session health](/runtime/cli-reference/session/health) | Read session health and wake eligibility | +| [agh session history](/runtime/cli-reference/session/history) | Show session history grouped by turn | | [agh session inspect](/runtime/cli-reference/session/inspect) | Inspect session health, wake audit, and policy correlation | -| [agh session list](/runtime/cli-reference/session/list) | List sessions | -| [agh session new](/runtime/cli-reference/session/new) | Create a new session | -| [agh session prompt](/runtime/cli-reference/session/prompt) | Send a prompt to a session | -| [agh session repair](/runtime/cli-reference/session/repair) | Inspect and repair an interrupted session transcript | -| [agh session resume](/runtime/cli-reference/session/resume) | Resume a stopped session | -| [agh session soul](/runtime/cli-reference/session/soul) | Manage session Soul snapshots | -| [agh session status](/runtime/cli-reference/session/status) | Show session status | -| [agh session stop](/runtime/cli-reference/session/stop) | Stop a session | -| [agh session wait](/runtime/cli-reference/session/wait) | Block until a session stops | +| [agh session list](/runtime/cli-reference/session/list) | List sessions | +| [agh session new](/runtime/cli-reference/session/new) | Create a new session | +| [agh session prompt](/runtime/cli-reference/session/prompt) | Send a prompt to a session | +| [agh session repair](/runtime/cli-reference/session/repair) | Inspect and repair an interrupted session transcript | +| [agh session resume](/runtime/cli-reference/session/resume) | Resume a stopped session | +| [agh session soul](/runtime/cli-reference/session/soul) | Manage session Soul snapshots | +| [agh session status](/runtime/cli-reference/session/status) | Show session status | +| [agh session stop](/runtime/cli-reference/session/stop) | Stop a session | +| [agh session wait](/runtime/cli-reference/session/wait) | Block until a session stops | diff --git a/packages/site/content/runtime/cli-reference/session/soul/index.mdx b/packages/site/content/runtime/cli-reference/session/soul/index.mdx index ea96d3311..f53ed2612 100644 --- a/packages/site/content/runtime/cli-reference/session/soul/index.mdx +++ b/packages/site/content/runtime/cli-reference/session/soul/index.mdx @@ -29,9 +29,8 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | +| Command | Description | +| ----------------------------------------------------------------------- | --------------------------------------- | | [agh session soul refresh](/runtime/cli-reference/session/soul/refresh) | Refresh an idle session's Soul snapshot | diff --git a/packages/site/content/runtime/cli-reference/skill/index.mdx b/packages/site/content/runtime/cli-reference/skill/index.mdx index 7e7a94200..fabc463e2 100644 --- a/packages/site/content/runtime/cli-reference/skill/index.mdx +++ b/packages/site/content/runtime/cli-reference/skill/index.mdx @@ -29,18 +29,17 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh skill create](/runtime/cli-reference/skill/create) | Scaffold a new workspace skill | -| [agh skill disable](/runtime/cli-reference/skill/disable) | Disable a daemon-managed skill | -| [agh skill enable](/runtime/cli-reference/skill/enable) | Enable a daemon-managed skill | -| [agh skill info](/runtime/cli-reference/skill/info) | Show detailed metadata for one skill | -| [agh skill install](/runtime/cli-reference/skill/install) | Install a marketplace skill | -| [agh skill list](/runtime/cli-reference/skill/list) | List locally available skills | -| [agh skill remove](/runtime/cli-reference/skill/remove) | Remove an installed marketplace skill | -| [agh skill search](/runtime/cli-reference/skill/search) | Search marketplace skills | -| [agh skill update](/runtime/cli-reference/skill/update) | Check for or install updates for marketplace skills | -| [agh skill view](/runtime/cli-reference/skill/view) | Read a skill or one of its resource files | +| Command | Description | +| --------------------------------------------------------- | --------------------------------------------------- | +| [agh skill create](/runtime/cli-reference/skill/create) | Scaffold a new workspace skill | +| [agh skill disable](/runtime/cli-reference/skill/disable) | Disable a daemon-managed skill | +| [agh skill enable](/runtime/cli-reference/skill/enable) | Enable a daemon-managed skill | +| [agh skill info](/runtime/cli-reference/skill/info) | Show detailed metadata for one skill | +| [agh skill install](/runtime/cli-reference/skill/install) | Install a marketplace skill | +| [agh skill list](/runtime/cli-reference/skill/list) | List locally available skills | +| [agh skill remove](/runtime/cli-reference/skill/remove) | Remove an installed marketplace skill | +| [agh skill search](/runtime/cli-reference/skill/search) | Search marketplace skills | +| [agh skill update](/runtime/cli-reference/skill/update) | Check for or install updates for marketplace skills | +| [agh skill view](/runtime/cli-reference/skill/view) | Read a skill or one of its resource files | diff --git a/packages/site/content/runtime/cli-reference/task/child/index.mdx b/packages/site/content/runtime/cli-reference/task/child/index.mdx index 117da3c23..824d97c4f 100644 --- a/packages/site/content/runtime/cli-reference/task/child/index.mdx +++ b/packages/site/content/runtime/cli-reference/task/child/index.mdx @@ -41,6 +41,6 @@ agh task child -o json ## Subcommands -| Command | Description | -| ------- | ----------- | +| Command | Description | +| ----------------------------------------------------------------- | ------------------------------------ | | [agh task child create](/runtime/cli-reference/task/child/create) | Create a child task beneath a parent | diff --git a/packages/site/content/runtime/cli-reference/task/dependency/index.mdx b/packages/site/content/runtime/cli-reference/task/dependency/index.mdx index 4dd17e7ae..a0a7d5565 100644 --- a/packages/site/content/runtime/cli-reference/task/dependency/index.mdx +++ b/packages/site/content/runtime/cli-reference/task/dependency/index.mdx @@ -41,7 +41,7 @@ agh task dependency -o json ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh task dependency add](/runtime/cli-reference/task/dependency/add) | Add a dependency edge to a task | +| Command | Description | +| --------------------------------------------------------------------------- | ------------------------------------ | +| [agh task dependency add](/runtime/cli-reference/task/dependency/add) | Add a dependency edge to a task | | [agh task dependency remove](/runtime/cli-reference/task/dependency/remove) | Remove a dependency edge from a task | diff --git a/packages/site/content/runtime/cli-reference/task/index.mdx b/packages/site/content/runtime/cli-reference/task/index.mdx index 8b9e3b7ea..adcaea8f7 100644 --- a/packages/site/content/runtime/cli-reference/task/index.mdx +++ b/packages/site/content/runtime/cli-reference/task/index.mdx @@ -54,26 +54,26 @@ agh task -o json ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh task approve](/runtime/cli-reference/task/approve) | Approve a task and enqueue its first run | -| [agh task cancel](/runtime/cli-reference/task/cancel) | Cancel a task tree | -| [agh task child](/runtime/cli-reference/task/child) | Manage child tasks | -| [agh task complete](/runtime/cli-reference/task/complete) | Complete a claimed task run for the current agent session | -| [agh task create](/runtime/cli-reference/task/create) | Create a task | -| [agh task delete](/runtime/cli-reference/task/delete) | Delete a task | -| [agh task dependency](/runtime/cli-reference/task/dependency) | Manage task dependencies | -| [agh task fail](/runtime/cli-reference/task/fail) | Fail a claimed task run for the current agent session | -| [agh task get](/runtime/cli-reference/task/get) | Show one task with related detail | -| [agh task heartbeat](/runtime/cli-reference/task/heartbeat) | Extend a claimed task run lease for the current agent session | -| [agh task list](/runtime/cli-reference/task/list) | List tasks | -| [agh task next](/runtime/cli-reference/task/next) | Claim the next task run for the current agent session | -| [agh task notification](/runtime/cli-reference/task/notification) | Manage task terminal notifications | -| [agh task profile](/runtime/cli-reference/task/profile) | Manage task execution profiles | -| [agh task publish](/runtime/cli-reference/task/publish) | Publish a draft task and enqueue its first run | -| [agh task reject](/runtime/cli-reference/task/reject) | Reject a pending approval task | -| [agh task release](/runtime/cli-reference/task/release) | Release a claimed task run for the current agent session | -| [agh task review](/runtime/cli-reference/task/review) | Manage task-run reviews | -| [agh task run](/runtime/cli-reference/task/run) | Manage task runs | -| [agh task start](/runtime/cli-reference/task/start) | Enqueue a run for an executable task | -| [agh task update](/runtime/cli-reference/task/update) | Update mutable task fields | +| Command | Description | +| ----------------------------------------------------------------- | ------------------------------------------------------------- | +| [agh task approve](/runtime/cli-reference/task/approve) | Approve a task and enqueue its first run | +| [agh task cancel](/runtime/cli-reference/task/cancel) | Cancel a task tree | +| [agh task child](/runtime/cli-reference/task/child) | Manage child tasks | +| [agh task complete](/runtime/cli-reference/task/complete) | Complete a claimed task run for the current agent session | +| [agh task create](/runtime/cli-reference/task/create) | Create a task | +| [agh task delete](/runtime/cli-reference/task/delete) | Delete a task | +| [agh task dependency](/runtime/cli-reference/task/dependency) | Manage task dependencies | +| [agh task fail](/runtime/cli-reference/task/fail) | Fail a claimed task run for the current agent session | +| [agh task get](/runtime/cli-reference/task/get) | Show one task with related detail | +| [agh task heartbeat](/runtime/cli-reference/task/heartbeat) | Extend a claimed task run lease for the current agent session | +| [agh task list](/runtime/cli-reference/task/list) | List tasks | +| [agh task next](/runtime/cli-reference/task/next) | Claim the next task run for the current agent session | +| [agh task notification](/runtime/cli-reference/task/notification) | Manage task terminal notifications | +| [agh task profile](/runtime/cli-reference/task/profile) | Manage task execution profiles | +| [agh task publish](/runtime/cli-reference/task/publish) | Publish a draft task and enqueue its first run | +| [agh task reject](/runtime/cli-reference/task/reject) | Reject a pending approval task | +| [agh task release](/runtime/cli-reference/task/release) | Release a claimed task run for the current agent session | +| [agh task review](/runtime/cli-reference/task/review) | Manage task-run reviews | +| [agh task run](/runtime/cli-reference/task/run) | Manage task runs | +| [agh task start](/runtime/cli-reference/task/start) | Enqueue a run for an executable task | +| [agh task update](/runtime/cli-reference/task/update) | Update mutable task fields | diff --git a/packages/site/content/runtime/cli-reference/task/notification/index.mdx b/packages/site/content/runtime/cli-reference/task/notification/index.mdx index fb26a0edf..0e821741b 100644 --- a/packages/site/content/runtime/cli-reference/task/notification/index.mdx +++ b/packages/site/content/runtime/cli-reference/task/notification/index.mdx @@ -41,9 +41,9 @@ agh task notification -o json ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh task notification delete](/runtime/cli-reference/task/notification/delete) | Delete one bridge terminal notification subscription | -| [agh task notification list](/runtime/cli-reference/task/notification/list) | List bridge terminal notification subscriptions for one task | -| [agh task notification show](/runtime/cli-reference/task/notification/show) | Show one bridge terminal notification subscription | -| [agh task notification subscribe](/runtime/cli-reference/task/notification/subscribe) | Subscribe a bridge target to task terminal notifications | +| Command | Description | +| ------------------------------------------------------------------------------------- | ------------------------------------------------------------ | +| [agh task notification delete](/runtime/cli-reference/task/notification/delete) | Delete one bridge terminal notification subscription | +| [agh task notification list](/runtime/cli-reference/task/notification/list) | List bridge terminal notification subscriptions for one task | +| [agh task notification show](/runtime/cli-reference/task/notification/show) | Show one bridge terminal notification subscription | +| [agh task notification subscribe](/runtime/cli-reference/task/notification/subscribe) | Subscribe a bridge target to task terminal notifications | diff --git a/packages/site/content/runtime/cli-reference/task/profile/index.mdx b/packages/site/content/runtime/cli-reference/task/profile/index.mdx index 2cb28099f..1cb11b6a6 100644 --- a/packages/site/content/runtime/cli-reference/task/profile/index.mdx +++ b/packages/site/content/runtime/cli-reference/task/profile/index.mdx @@ -41,8 +41,8 @@ agh task profile -o json ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh task profile delete](/runtime/cli-reference/task/profile/delete) | Delete one task execution profile | -| [agh task profile inspect](/runtime/cli-reference/task/profile/inspect) | Show one task execution profile | -| [agh task profile update](/runtime/cli-reference/task/profile/update) | Replace one task execution profile | +| Command | Description | +| ----------------------------------------------------------------------- | ---------------------------------- | +| [agh task profile delete](/runtime/cli-reference/task/profile/delete) | Delete one task execution profile | +| [agh task profile inspect](/runtime/cli-reference/task/profile/inspect) | Show one task execution profile | +| [agh task profile update](/runtime/cli-reference/task/profile/update) | Replace one task execution profile | diff --git a/packages/site/content/runtime/cli-reference/task/review/index.mdx b/packages/site/content/runtime/cli-reference/task/review/index.mdx index 42eda9650..5db3f783c 100644 --- a/packages/site/content/runtime/cli-reference/task/review/index.mdx +++ b/packages/site/content/runtime/cli-reference/task/review/index.mdx @@ -41,9 +41,9 @@ agh task review -o json ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh task review list](/runtime/cli-reference/task/review/list) | List task-run reviews | -| [agh task review request](/runtime/cli-reference/task/review/request) | Request review for a task run | -| [agh task review show](/runtime/cli-reference/task/review/show) | Show one task-run review | -| [agh task review submit](/runtime/cli-reference/task/review/submit) | Submit one task-run review verdict | +| Command | Description | +| --------------------------------------------------------------------- | ---------------------------------- | +| [agh task review list](/runtime/cli-reference/task/review/list) | List task-run reviews | +| [agh task review request](/runtime/cli-reference/task/review/request) | Request review for a task run | +| [agh task review show](/runtime/cli-reference/task/review/show) | Show one task-run review | +| [agh task review submit](/runtime/cli-reference/task/review/submit) | Submit one task-run review verdict | diff --git a/packages/site/content/runtime/cli-reference/task/run/index.mdx b/packages/site/content/runtime/cli-reference/task/run/index.mdx index 75158b601..486e72762 100644 --- a/packages/site/content/runtime/cli-reference/task/run/index.mdx +++ b/packages/site/content/runtime/cli-reference/task/run/index.mdx @@ -41,13 +41,13 @@ agh task run -o json ## Subcommands -| Command | Description | -| ------- | ----------- | +| Command | Description | +| ----------------------------------------------------------------------------- | ------------------------------------------------------- | | [agh task run attach-session](/runtime/cli-reference/task/run/attach-session) | Attach an existing session to a claimed or starting run | -| [agh task run cancel](/runtime/cli-reference/task/run/cancel) | Cancel a task run | -| [agh task run claim](/runtime/cli-reference/task/run/claim) | Claim a queued task run | -| [agh task run complete](/runtime/cli-reference/task/run/complete) | Complete a running task run | -| [agh task run enqueue](/runtime/cli-reference/task/run/enqueue) | Enqueue a task run | -| [agh task run fail](/runtime/cli-reference/task/run/fail) | Fail a task run | -| [agh task run list](/runtime/cli-reference/task/run/list) | List runs for a task | -| [agh task run start](/runtime/cli-reference/task/run/start) | Start a claimed task run | +| [agh task run cancel](/runtime/cli-reference/task/run/cancel) | Cancel a task run | +| [agh task run claim](/runtime/cli-reference/task/run/claim) | Claim a queued task run | +| [agh task run complete](/runtime/cli-reference/task/run/complete) | Complete a running task run | +| [agh task run enqueue](/runtime/cli-reference/task/run/enqueue) | Enqueue a task run | +| [agh task run fail](/runtime/cli-reference/task/run/fail) | Fail a task run | +| [agh task run list](/runtime/cli-reference/task/run/list) | List runs for a task | +| [agh task run start](/runtime/cli-reference/task/run/start) | Start a claimed task run | diff --git a/packages/site/content/runtime/cli-reference/tool/index.mdx b/packages/site/content/runtime/cli-reference/tool/index.mdx index d4b7c42e9..a4de83ae2 100644 --- a/packages/site/content/runtime/cli-reference/tool/index.mdx +++ b/packages/site/content/runtime/cli-reference/tool/index.mdx @@ -29,13 +29,12 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | +| Command | Description | +| ------------------------------------------------------- | ------------------------------------------------------ | | [agh tool approve](/runtime/cli-reference/tool/approve) | Mint a one-shot approval token for one tool invocation | -| [agh tool info](/runtime/cli-reference/tool/info) | Show one registry tool descriptor and diagnostics | -| [agh tool invoke](/runtime/cli-reference/tool/invoke) | Invoke one registry tool through daemon policy | -| [agh tool list](/runtime/cli-reference/tool/list) | List operator-visible registry tools | -| [agh tool search](/runtime/cli-reference/tool/search) | Search operator-visible registry tools | +| [agh tool info](/runtime/cli-reference/tool/info) | Show one registry tool descriptor and diagnostics | +| [agh tool invoke](/runtime/cli-reference/tool/invoke) | Invoke one registry tool through daemon policy | +| [agh tool list](/runtime/cli-reference/tool/list) | List operator-visible registry tools | +| [agh tool search](/runtime/cli-reference/tool/search) | Search operator-visible registry tools | diff --git a/packages/site/content/runtime/cli-reference/toolsets/index.mdx b/packages/site/content/runtime/cli-reference/toolsets/index.mdx index 8316a83e0..281984342 100644 --- a/packages/site/content/runtime/cli-reference/toolsets/index.mdx +++ b/packages/site/content/runtime/cli-reference/toolsets/index.mdx @@ -29,10 +29,9 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | +| Command | Description | +| --------------------------------------------------------- | ----------------------------------- | | [agh toolsets info](/runtime/cli-reference/toolsets/info) | Show one registry toolset expansion | -| [agh toolsets list](/runtime/cli-reference/toolsets/list) | List registry toolsets | +| [agh toolsets list](/runtime/cli-reference/toolsets/list) | List registry toolsets | diff --git a/packages/site/content/runtime/cli-reference/vault/index.mdx b/packages/site/content/runtime/cli-reference/vault/index.mdx index 093ef4af3..3a97a8150 100644 --- a/packages/site/content/runtime/cli-reference/vault/index.mdx +++ b/packages/site/content/runtime/cli-reference/vault/index.mdx @@ -29,12 +29,11 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh vault delete](/runtime/cli-reference/vault/delete) | Delete one vault secret | -| [agh vault get](/runtime/cli-reference/vault/get) | Show redacted metadata for one vault secret | -| [agh vault list](/runtime/cli-reference/vault/list) | List redacted vault secret metadata | -| [agh vault put](/runtime/cli-reference/vault/put) | Store one write-only vault secret from stdin | +| Command | Description | +| ------------------------------------------------------- | -------------------------------------------- | +| [agh vault delete](/runtime/cli-reference/vault/delete) | Delete one vault secret | +| [agh vault get](/runtime/cli-reference/vault/get) | Show redacted metadata for one vault secret | +| [agh vault list](/runtime/cli-reference/vault/list) | List redacted vault secret metadata | +| [agh vault put](/runtime/cli-reference/vault/put) | Store one write-only vault secret from stdin | diff --git a/packages/site/content/runtime/cli-reference/workspace/index.mdx b/packages/site/content/runtime/cli-reference/workspace/index.mdx index 0d35b0f0e..ba5b2cde0 100644 --- a/packages/site/content/runtime/cli-reference/workspace/index.mdx +++ b/packages/site/content/runtime/cli-reference/workspace/index.mdx @@ -29,13 +29,12 @@ Every AGH command supports `-o, --output`: - `jsonl` for wait or streaming commands that emit one JSON record per line - `toon` for compact agent-readable summaries - ## Subcommands -| Command | Description | -| ------- | ----------- | -| [agh workspace add](/runtime/cli-reference/workspace/add) | Register a workspace | -| [agh workspace edit](/runtime/cli-reference/workspace/edit) | Edit a registered workspace | -| [agh workspace info](/runtime/cli-reference/workspace/info) | Show one workspace with resolved details | -| [agh workspace list](/runtime/cli-reference/workspace/list) | List registered workspaces | -| [agh workspace remove](/runtime/cli-reference/workspace/remove) | Remove a workspace registration | +| Command | Description | +| --------------------------------------------------------------- | ---------------------------------------- | +| [agh workspace add](/runtime/cli-reference/workspace/add) | Register a workspace | +| [agh workspace edit](/runtime/cli-reference/workspace/edit) | Edit a registered workspace | +| [agh workspace info](/runtime/cli-reference/workspace/info) | Show one workspace with resolved details | +| [agh workspace list](/runtime/cli-reference/workspace/list) | List registered workspaces | +| [agh workspace remove](/runtime/cli-reference/workspace/remove) | Remove a workspace registration | diff --git a/web/e2e/__tests__/session-provider-override.spec.ts b/web/e2e/__tests__/session-provider-override.spec.ts index 23f645765..0bae123a3 100644 --- a/web/e2e/__tests__/session-provider-override.spec.ts +++ b/web/e2e/__tests__/session-provider-override.spec.ts @@ -61,7 +61,7 @@ test.use({ }, }); -test("operator can create a provider-override session and gets an inline resume failure when that provider disappears", async ({ +test("operator can create a provider/model override session and gets an inline resume failure when that provider disappears", async ({ appPage, browserArtifacts, runtime, @@ -114,6 +114,16 @@ test("operator can create a provider-override session and gets an inline resume await appPage.setViewportSize({ width: 1280, height: 800 }); await appPage.getByTestId("session-create-provider-select").selectOption(overrideProvider); + await appPage.getByTestId("session-create-model-select").click(); + await expect(appPage.getByTestId("model-command-item-qa-browser-model")).toBeVisible(); + await appPage.getByTestId("model-command-item-qa-browser-model").click(); + await expect(appPage.getByTestId("session-create-model-select")).toContainText( + "qa-browser-model" + ); + await expect(appPage.getByTestId("session-create-reasoning-default")).toContainText("medium"); + await appPage.getByTestId("session-create-reasoning-select").click(); + await expect(appPage.getByTestId("reasoning-command-item-high")).toBeVisible(); + await appPage.getByTestId("reasoning-command-item-high").click(); const createRequestPromise = appPage.waitForRequest( request => request.method() === "POST" && request.url().endsWith("/api/sessions") @@ -128,12 +138,16 @@ test("operator can create a provider-override session and gets an inline resume const createResponse = await createResponsePromise; const createRequestBody = createRequest.postDataJSON() as { agent_name?: string; + model?: string; provider?: string; + reasoning_effort?: string; workspace?: string; }; expect(createRequestBody).toMatchObject({ agent_name: browserLifecycleAgent, + model: "qa-browser-model", provider: overrideProvider, + reasoning_effort: "high", workspace: workspace.id, }); expect(createResponse.ok()).toBeTruthy(); @@ -285,6 +299,12 @@ async function writeWorkspaceConfig(input: { `command = "${escapeTomlString(input.overrideCommand)}"`, `[providers.${overrideProvider}.models]`, `default = "qa-browser-model"`, + `[[providers.${overrideProvider}.models.curated]]`, + `id = "qa-browser-model"`, + `display_name = "QA Browser Model"`, + `supports_reasoning = true`, + `reasoning_efforts = ["low", "medium", "high"]`, + `default_reasoning_effort = "medium"`, `[[providers.${overrideProvider}.credential_slots]]`, `name = "api_key"`, `target_env = "QA_BROWSER_API_KEY"`, diff --git a/web/e2e/fixtures/__tests__/runtime-seed.test.ts b/web/e2e/fixtures/__tests__/runtime-seed.test.ts index 41debb174..ff1f54119 100644 --- a/web/e2e/fixtures/__tests__/runtime-seed.test.ts +++ b/web/e2e/fixtures/__tests__/runtime-seed.test.ts @@ -1118,7 +1118,17 @@ describe("browser runtime seed helpers", () => { name: "browser-provider", settings: { command: "browser-provider", - models: { default: "gpt-5.4", curated: [{ id: "gpt-5.4" }] }, + models: { + default: "gpt-5.4", + curated: [ + { + id: "gpt-5.4", + supports_reasoning: true, + reasoning_efforts: ["low", "medium", "high"], + default_reasoning_effort: "medium", + }, + ], + }, }, }, ], @@ -1167,6 +1177,28 @@ describe("browser runtime seed helpers", () => { "/api/settings/providers/browser-provider", expect.objectContaining({ method: "PUT" }) ); + const providerRequest = requestJSON.mock.calls.find( + ([pathname]) => pathname === "/api/settings/providers/browser-provider" + ); + if (!providerRequest) { + throw new Error("settings provider seed did not issue provider PUT request"); + } + const providerInit = providerRequest[1] as RequestInit; + const providerBody = JSON.parse(String(providerInit.body)); + expect(providerBody.settings.models).toMatchObject({ + default: "gpt-5.4", + curated: [ + { + id: "gpt-5.4", + supports_reasoning: true, + reasoning_efforts: ["low", "medium", "high"], + default_reasoning_effort: "medium", + }, + ], + }); + expect(JSON.stringify(providerBody)).not.toContain("default_model"); + expect(JSON.stringify(providerBody)).not.toContain("supported_models"); + expect(JSON.stringify(providerBody)).not.toContain("supports_reasoning_effort"); expect(requestJSON).toHaveBeenCalledWith( "/api/settings/hooks/browser-turn-end", expect.objectContaining({ method: "PUT" }) From 2debf0cf809f84314d39eaded01277ea038f6959 Mon Sep 17 00:00:00 2001 From: Pedro Nauck Date: Thu, 7 May 2026 10:33:19 -0300 Subject: [PATCH 12/13] test: add provider model catalog QA program Adds the release-grade QA artifacts for the provider model catalog program (Tasks 01-11) under .compozy/tasks/provider-model-catalog/qa/: coverage matrix mapping every TechSpec safety invariant, ADR, task, public surface and failure mode to concrete cases; master test plan with charter, environment, entry/exit criteria, scenario contract and verification commands; tiered regression suite; 33 concrete test cases (SMOKE-001 + TC-FUNC-001..015 + TC-INT-001..006 + TC-PERF-001..002 + TC-SEC-001..002 + TC-UI-001..003 + TC-REG-001..002 + TC-SCEN-001..002); bug template; verification report template. Task 13 can execute the plan from an isolated agh-qa-bootstrap lab without inventing scenarios. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../qa/issues/BUG-NNN-template.md | 72 +++++++ .../qa/test-cases/SMOKE-001.md | 57 ++++++ .../qa/test-cases/TC-FUNC-001.md | 84 ++++++++ .../qa/test-cases/TC-FUNC-002.md | 52 +++++ .../qa/test-cases/TC-FUNC-003.md | 48 +++++ .../qa/test-cases/TC-FUNC-004.md | 58 ++++++ .../qa/test-cases/TC-FUNC-005.md | 57 ++++++ .../qa/test-cases/TC-FUNC-006.md | 49 +++++ .../qa/test-cases/TC-FUNC-007.md | 51 +++++ .../qa/test-cases/TC-FUNC-008.md | 51 +++++ .../qa/test-cases/TC-FUNC-009.md | 46 +++++ .../qa/test-cases/TC-FUNC-010.md | 50 +++++ .../qa/test-cases/TC-FUNC-011.md | 50 +++++ .../qa/test-cases/TC-FUNC-012.md | 44 ++++ .../qa/test-cases/TC-FUNC-013.md | 44 ++++ .../qa/test-cases/TC-FUNC-014.md | 48 +++++ .../qa/test-cases/TC-FUNC-015.md | 48 +++++ .../qa/test-cases/TC-INT-001.md | 52 +++++ .../qa/test-cases/TC-INT-002.md | 48 +++++ .../qa/test-cases/TC-INT-003.md | 45 +++++ .../qa/test-cases/TC-INT-004.md | 49 +++++ .../qa/test-cases/TC-INT-005.md | 48 +++++ .../qa/test-cases/TC-INT-006.md | 44 ++++ .../qa/test-cases/TC-PERF-001.md | 50 +++++ .../qa/test-cases/TC-PERF-002.md | 48 +++++ .../qa/test-cases/TC-REG-001.md | 45 +++++ .../qa/test-cases/TC-REG-002.md | 42 ++++ .../qa/test-cases/TC-SCEN-001.md | 83 ++++++++ .../qa/test-cases/TC-SCEN-002.md | 83 ++++++++ .../qa/test-cases/TC-SEC-001.md | 43 ++++ .../qa/test-cases/TC-SEC-002.md | 50 +++++ .../qa/test-cases/TC-UI-001.md | 57 ++++++ .../qa/test-cases/TC-UI-002.md | 43 ++++ .../qa/test-cases/TC-UI-003.md | 51 +++++ .../qa/test-plans/00-coverage-matrix.md | 124 ++++++++++++ .../provider-model-catalog-regression.md | 79 ++++++++ .../provider-model-catalog-test-plan.md | 190 ++++++++++++++++++ .../qa/verification-report-template.md | 110 ++++++++++ 38 files changed, 2293 insertions(+) create mode 100644 .compozy/tasks/provider-model-catalog/qa/issues/BUG-NNN-template.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/SMOKE-001.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-001.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-002.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-003.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-004.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-005.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-006.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-007.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-008.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-009.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-010.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-011.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-012.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-013.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-014.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-015.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-001.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-002.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-003.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-004.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-005.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-006.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-PERF-001.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-PERF-002.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-REG-001.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-REG-002.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-SCEN-001.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-SCEN-002.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-SEC-001.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-SEC-002.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-UI-001.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-UI-002.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-cases/TC-UI-003.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-plans/00-coverage-matrix.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-plans/provider-model-catalog-regression.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/test-plans/provider-model-catalog-test-plan.md create mode 100644 .compozy/tasks/provider-model-catalog/qa/verification-report-template.md diff --git a/.compozy/tasks/provider-model-catalog/qa/issues/BUG-NNN-template.md b/.compozy/tasks/provider-model-catalog/qa/issues/BUG-NNN-template.md new file mode 100644 index 000000000..68767cf75 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/issues/BUG-NNN-template.md @@ -0,0 +1,72 @@ +# BUG-NNN: + +**Severity:** Critical | High | Medium | Low +**Priority:** P0 | P1 | P2 | P3 +**Type:** Functional | UI | Performance | Security | Data | Crash +**Status:** Open +**Discovered During:** TC-FUNC-NNN | TC-INT-NNN | TC-PERF-NNN | TC-SEC-NNN | TC-UI-NNN | TC-REG-NNN | TC-SCEN-NNN +**Reporter:** +**Created:** YYYY-MM-DD +**Last Updated:** YYYY-MM-DD + +## Environment + +- **Build:** +- **OS:** +- **Browser:** (only for UI bugs) +- **URL / Endpoint:** +- **Bootstrap manifest:** +- **Lab root / runtime home / ports:** +- **Live provider/LLM:** + +## Summary + + + +## Behavioral Impact + +- **Operator/User Goal:** +- **Agent Behavior:** +- **Business Outcome:** +- **Cross-Surface State:** + +## Reproduction + +```bash +# Verbatim commands (paths from bootstrap manifest) +``` + +Observed before fix: + +- + +## Expected + + + +## Root Cause + + + +## Fix + + + +## Verification + +- +- +- + +## Impact + +- **Users Affected:** +- **Frequency:** +- **Workaround:** + +## Related + +- Test Case: +- TechSpec Invariant: +- ADR: +- Logs / artifacts: diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/SMOKE-001.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/SMOKE-001.md new file mode 100644 index 000000000..80b8da370 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/SMOKE-001.md @@ -0,0 +1,57 @@ +# SMOKE-001: Provider Model Catalog Smoke Readiness + +**Priority:** P0 +**Type:** Smoke +**Status:** Not Run +**Estimated Time:** 15 minutes +**Created:** 2026-05-07 +**Last Updated:** 2026-05-07 + +--- + +## Objective + +Confirm the isolated QA lab is healthy enough to execute release-grade catalog scenarios. Smoke is **entry criteria only**; passing this case proves nothing about feature behavior. + +## Preconditions + +- [ ] `agh-qa-bootstrap` produced `bootstrap-manifest.json` for the run. +- [ ] Unique `AGH_HOME`, ports, and tmux socket allocated. +- [ ] `AGH_WEB_API_PROXY_TARGET` exported from manifest. +- [ ] No production code changes pending beyond Task 12 / Task 13 QA artifacts. + +## Test Steps + +1. **Verify daemon binary builds.** + - Command: `make build`. + - **Expected:** Exit 0; binary present at `bin/agh`. +2. **Verify codegen contracts are clean.** + - Command: `make codegen-check`. + - **Expected:** No drift in `openapi/agh.json` or `web/src/generated/agh-openapi.d.ts`. +3. **Verify Bun typecheck and unit tests.** + - Command: `make bun-typecheck && make bun-test`. + - **Expected:** All workspaces pass; vitest catches no regression. +4. **Verify focused Go gates compile and pass.** + - Command: `go test -race -count=1 ./internal/config ./internal/store/globaldb ./internal/modelcatalog/... ./internal/acp ./internal/api/... ./internal/cli ./internal/extension/...`. + - **Expected:** Exit 0. +5. **Boot the daemon and request status.** + - Command (in lab): `agh daemon start --foreground &` then `agh provider models status -o json`. + - **Expected:** JSON payload includes `sources` array with `idle` or `succeeded` `refresh_state`. + +## Audit Coverage + +- Smoke entry only. Does **not** satisfy any release-grade audit minimum. + +## Pass Criteria + +- All five steps exit 0. +- Daemon responds within 5s. + +## Failure Criteria + +- Any step exits non-zero. +- Daemon hangs or returns OS-level error. + +## Notes + +If smoke fails, halt the QA run and report the failing step in `qa/verification-report.md` before any TC-FUNC/INT execution. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-001.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-001.md new file mode 100644 index 000000000..51a6a6813 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-001.md @@ -0,0 +1,84 @@ +# TC-FUNC-001: Provider Config Hard-Cut - Old Keys Rejected + +**Priority:** P0 +**Type:** Functional +**Module:** `internal/config` +**Requirement:** ADR-002, TechSpec Delete Targets, Task 01. +**Status:** Not Run +**Created:** 2026-05-07 +**Last Updated:** 2026-05-07 + +## Objective + +Verify that any `config.toml` containing the deleted flat provider model fields fails validation with deterministic, path-scoped errors and that no compatibility fallback rehydrates the values. + +## Preconditions + +- [ ] Fresh isolated `AGH_HOME` (no prior config cache). +- [ ] Daemon binary built from current branch. + +## Test Steps + +1. **Write `config.toml` with the deleted `default_model` key.** + - Input: + ```toml + [providers.codex] + command = "/bin/true" + default_model = "gpt-5.4" + ``` + - **Expected:** `agh config validate` (and daemon boot) returns an error referencing path `providers.codex.default_model` and explicitly stating the key is removed. +2. **Replace with deleted `supported_models` key.** + - Input: + ```toml + [providers.codex] + command = "/bin/true" + supported_models = ["gpt-5.4"] + ``` + - **Expected:** Error references `providers.codex.supported_models`. +3. **Replace with deleted `supports_reasoning_effort` key.** + - Input: + ```toml + [providers.codex] + command = "/bin/true" + supports_reasoning_effort = true + ``` + - **Expected:** Error references `providers.codex.supports_reasoning_effort`. +4. **Confirm new nested shape parses cleanly.** + - Input: + ```toml + [providers.codex] + command = "/bin/true" + [providers.codex.models] + default = "gpt-5.4" + [[providers.codex.models.curated]] + id = "gpt-5.4" + supports_reasoning = true + reasoning_efforts = ["minimal", "low", "medium", "high", "xhigh"] + default_reasoning_effort = "medium" + ``` + - **Expected:** Validation succeeds; daemon starts; `agh provider models list codex -o json` returns rows tagged with `source_id="config"` and priority `120`. + +## Negative / Boundary Tests + +- Empty curated array with valid `default` → must succeed (manual default model is valid, SI-6). +- `default = ""` → must fail with explicit path `providers.codex.models.default`. +- Curated model `id` blank → must fail. +- `default_reasoning_effort = "extreme"` not in `reasoning_efforts` → must fail. + +## Audit Coverage + +- C6 task tree (Task 01). +- C8 cross-surface truth: rendered `agh config show` and persisted SQLite catalog row both reflect new shape. +- TechSpec Safety Invariants: SI-6 (manual entry valid), SI-8 (only `internal/modelcatalog.Store` writes catalog rows). + +## Pass Criteria + +- Steps 1-3 fail with the documented error path; no silent hydrate of legacy fields. +- Step 4 produces catalog rows attributed to the `config` source. +- `agh config show` does not emit any of the deleted keys. + +## Failure Criteria + +- Any deleted key parses without error. +- Error path lacks the offending key name. +- Catalog row attributes the data to a source other than `config` (priority 120). diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-002.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-002.md new file mode 100644 index 000000000..84a32f55e --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-002.md @@ -0,0 +1,52 @@ +# TC-FUNC-002: Provider Config - Curated Validation Rules + +**Priority:** P1 +**Type:** Functional +**Module:** `internal/config` +**Requirement:** TechSpec Config Lifecycle. +**Status:** Not Run + +## Objective + +Verify the nested `[providers..models]` block enforces the documented validation rules and accepts manual default models. + +## Preconditions + +- [ ] Fresh `AGH_HOME`. +- [ ] Daemon binary built from current branch. + +## Test Steps + +1. **Manual default model outside curated list is accepted.** + - Input: `[providers.codex.models] default = "manual-gpt-9000"` with empty `curated`. + - **Expected:** Validation succeeds; `agh provider models list codex -o json` includes `manual-gpt-9000` only when sources later report it; manual selection at session creation succeeds. +2. **Duplicate curated `id` is rejected.** + - Input: two `[[providers.codex.models.curated]]` entries with `id = "gpt-5.4"`. + - **Expected:** Error references both occurrences. +3. **Blank reasoning effort is rejected.** + - Input: `reasoning_efforts = ["high", ""]`. + - **Expected:** Error references the empty entry. +4. **`default_reasoning_effort` must be present in `reasoning_efforts`.** + - Input: `reasoning_efforts = ["low", "medium"]`, `default_reasoning_effort = "high"`. + - **Expected:** Error references the curated entry's effort path. +5. **`[model_catalog.sources.models_dev]` defaults populate.** + - Input: omit the section entirely. + - **Expected:** `agh config show` resolves `enabled=true`, `endpoint="https://models.dev/api.json"`, `ttl="24h"`, `timeout="10s"`. +6. **`models.discovery.command` and `.endpoint` are mutually exclusive when both set without adapter override.** + - Input: `[providers.openclaw.models.discovery] command = "x" endpoint = "https://"`. + - **Expected:** Error states only one of the two is allowed unless the provider adapter documents both. + +## Audit Coverage + +- C6 task tree (Task 01, Task 03 sources, Task 05 daemon wiring). +- SI-6 (manual model entry remains valid). + +## Pass Criteria + +- All validation cases produce the documented error or success. +- Defaults appear when omitted. + +## Failure Criteria + +- Any blank/duplicate/invalid combination is silently accepted. +- Defaults differ from the TechSpec values. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-003.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-003.md new file mode 100644 index 000000000..4a567b0c7 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-003.md @@ -0,0 +1,48 @@ +# TC-FUNC-003: Builtin Source Converts Defaults to Priority-10 Rows + +**Priority:** P1 +**Type:** Functional +**Module:** `internal/modelcatalog` (`builtin` source) +**Requirement:** TechSpec Source Implementations. +**Status:** Not Run + +## Objective + +Verify the `builtin` source emits source rows with priority 10, supports offline first-run, and never wins against config or live sources. + +## Preconditions + +- [ ] Fresh `AGH_HOME` with no overrides for built-in providers. +- [ ] Network disabled (no `models.dev`, no live discovery). + +## Test Steps + +1. **Boot daemon offline.** + - Command: `agh daemon start --foreground` with `AGH_DISABLE_OUTBOUND=1` (or stubbed transport). + - **Expected:** Daemon starts; no errors logged that block startup. +2. **List catalog for a built-in provider (e.g. `codex`).** + - Command: `agh provider models list codex -o json`. + - **Expected:** Models present with `sources[0].source_id="builtin"` and `priority=10`; `availability_state="unknown"`. +3. **Add a config curated model that overrides display name.** + - Update `config.toml` with curated metadata for the same `model_id`. + - **Expected:** Merged projection shows the config-source `display_name` because priority 120 > 10; builtin row remains addressable as a separate source via `--source builtin`. +4. **Disable the builtin source via internal API.** + - Programmatically remove builtin source registration in tests. + - **Expected:** Catalog falls back to remaining sources without panicking; no orphan rows remain in `model_catalog_rows` for the removed source after replace. + +## Audit Coverage + +- C6 task tree (Task 03). +- SI-13 (partial-source success). + +## Pass Criteria + +- Builtin rows appear at priority 10. +- Config wins on conflict; builtin survives as second source. +- Removing builtin source does not corrupt rows. + +## Failure Criteria + +- Builtin priority differs from 10. +- Builtin overrides higher-priority sources. +- Daemon panics or fails to boot offline. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-004.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-004.md new file mode 100644 index 000000000..86be1e886 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-004.md @@ -0,0 +1,58 @@ +# TC-FUNC-004: Catalog Merge Determinism (Priority + Freshness + Source-ID Tie-Break) + +**Priority:** P0 +**Type:** Functional +**Module:** `internal/modelcatalog` merge +**Requirement:** TechSpec Proposed Design / Architectural Boundaries. +**Status:** Not Run + +## Objective + +Verify that the merge projection is deterministic and follows the documented priority order, freshness tie-break, and source-id tie-break, with lower-priority sources filling missing fields. + +## Preconditions + +- [ ] Catalog seeded via test harness with crafted source rows for one provider/model. +- [ ] All rows written through `internal/modelcatalog.Store.ReplaceSourceRows`. + +## Test Steps + +1. **Higher-priority source wins conflicting non-empty field.** + - Seed: `config` (priority 120) `display_name="Config Name"`, `models_dev` (priority 50) `display_name="DevName"`. + - **Expected:** Projected `display_name="Config Name"`. +2. **Lower-priority source fills missing field.** + - Seed: `config` row sets only `default_reasoning_effort`; `models_dev` row sets `cost_input_per_million`. + - **Expected:** Projected model exposes both fields. +3. **Freshness tie-break.** + - Seed: two rows with identical priority but different `refreshed_at`. + - **Expected:** Fresher row wins. +4. **Source-id tie-break.** + - Seed: two rows with identical priority and `refreshed_at`. + - **Expected:** Ascending `source_id` wins. +5. **Sources array sorted deterministically.** + - **Expected:** `sources` ordered `(priority DESC, refreshed_at DESC, source_id ASC)`. +6. **Projection top-level sorted by `(provider_id ASC, model_id ASC)`.** + - **Expected:** Stable across repeated calls. +7. **Availability state derivation.** + - Seed: live row `available=true stale=false` + models_dev row. + - **Expected:** `availability_state="available_live"`. + - Replace: live row `available=true stale=true` → `available_stale`. + - Replace: live row `available=false stale=true` → `unavailable_stale`. + - Remove live/extension row → `unknown`. +8. **`models.dev` and `builtin` never elevate availability above `unknown`.** + - Seed only `models_dev` + `builtin`. + - **Expected:** `availability_state="unknown"` and `available=null`. + +## Audit Coverage + +- C6 task tree (Task 03). +- SI-5 (`models.dev` not authority), SI-13 (partial success). + +## Pass Criteria + +- Every assertion holds across two consecutive runs (determinism). + +## Failure Criteria + +- Any tie-break diverges from the documented order. +- `models.dev`/`builtin` ever yield `available=true` directly. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-005.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-005.md new file mode 100644 index 000000000..494c497f6 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-005.md @@ -0,0 +1,57 @@ +# TC-FUNC-005: `models.dev` Source - TTL, Disable, Legacy Aliases + +**Priority:** P1 +**Type:** Functional +**Module:** `internal/modelcatalog/modelsdev.go` +**Requirement:** TechSpec `Models.dev Source` + Config Lifecycle. +**Status:** Not Run + +## Objective + +Verify the `models.dev` source honors the configurable TTL, endpoint, timeout, and disable switch; tolerates current and legacy schema aliases; and never proves account-level availability. + +## Preconditions + +- [ ] `httptest`-based stub server mirroring `models.dev/api.json`. +- [ ] Config writes `[model_catalog.sources.models_dev]` with stub endpoint. + +## Test Steps + +1. **Current-schema parse.** + - Stub returns canonical fields (`reasoning`, `tool_call`, `limit.context`, `cost.input`, `cost.output`). + - **Expected:** Rows include `supports_reasoning`, `supports_tools`, `context_window`, `cost_*` populated. +2. **Legacy-schema parse.** + - Stub returns `supportsReasoning`, `supports_reasoning`, `supportsTools`, `supports_tools`, `contextWindow`, `maxInputTokens`, `maxOutputTokens`, `pricing.input`, `pricing.output`. + - **Expected:** All fields parse identically; tolerant aliases tested. +3. **TTL respected.** + - Trigger refresh; immediately call list with `Refresh=false`. + - **Expected:** Cached rows returned without HTTP call within TTL. +4. **Disable switch.** + - Set `[model_catalog.sources.models_dev] enabled = false`. + - **Expected:** Source status `refresh_state="idle"`, no outbound HTTP, rows absent for the source. +5. **Override endpoint and timeout.** + - Set `endpoint = "http://127.0.0.1:0/api.json"`, `timeout = "1ms"`. + - **Expected:** Source status records timeout error; redacted `last_error`; prior stale rows preserved. +6. **No account availability.** + - Stub returns models for `codex` provider with `available=true` field. + - **Expected:** Projection ignores `available` from `models.dev` (kind keeps `available=null`); availability remains `unknown` unless live/extension says otherwise. +7. **Provider-scoped status row.** + - Stub spans 3 AGH providers; refresh once. + - **Expected:** `model_catalog_sources` has 3 rows (one per provider) for `models_dev`; no blank-provider sentinel row. + +## Audit Coverage + +- C6 task tree (Task 03 + Task 05 wiring). +- SI-5, SI-13. + +## Pass Criteria + +- All schema variants parse. +- TTL/disable/override honored. +- Provider-scoped status rows preserved. + +## Failure Criteria + +- Any legacy alias fails parse. +- Disabled source still calls HTTP. +- Account availability inferred from `models.dev`. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-006.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-006.md new file mode 100644 index 000000000..943d2f1ca --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-006.md @@ -0,0 +1,49 @@ +# TC-FUNC-006: Stale Fallback Preserves Prior Successful Rows + +**Priority:** P1 +**Type:** Functional +**Module:** `internal/modelcatalog` +**Requirement:** TechSpec Safety Invariants SI-4. +**Status:** Not Run + +## Objective + +Verify that when a source refresh fails after at least one prior successful refresh, AGH preserves the previously stored rows, marks them stale, and surfaces the redacted `last_error` through projection and status. + +## Preconditions + +- [ ] Catalog seeded via successful refresh of a stub source. +- [ ] Stub source can be flipped to fail on demand. + +## Test Steps + +1. **Successful refresh.** + - Trigger refresh; assert `model_catalog_rows` has rows for the source with `stale=0`. + - **Expected:** Source status `last_success_at` populated; rows readable via projection. +2. **Force failure on next refresh.** + - Stub returns 5xx; trigger `agh provider models refresh codex --source models_dev`. + - **Expected:** Source status records `refresh_state="failed"`, `last_error` redacted; previous rows now flagged `stale=1`. +3. **List after failure returns stale rows with markers.** + - Command: `agh provider models list codex --include-stale -o json`. + - **Expected:** Rows present with `stale=true`; `availability_state` either `available_stale` or `unavailable_stale` if previous live row existed; `unknown` otherwise. +4. **Without `--include-stale`, projection still includes stale rows but flags them.** + - **Expected:** Default behavior surfaces stale rows tagged `stale=true` (TechSpec keeps stale rows usable as fallback). +5. **Daemon restart preserves stale rows.** + - Restart daemon; reissue list. + - **Expected:** Same rows present, still flagged stale; no row loss. + +## Audit Coverage + +- C6 task tree (Task 03, Task 05). +- SI-4, SI-13. + +## Pass Criteria + +- Stale rows persist across refresh failure and daemon restart. +- `last_error` redacted (no API key / OAuth / env secret string). + +## Failure Criteria + +- Failure clears prior rows. +- Status loses `last_success_at`. +- Stale rows missing the `stale` flag. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-007.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-007.md new file mode 100644 index 000000000..23ffd6bcb --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-007.md @@ -0,0 +1,51 @@ +# TC-FUNC-007: Partial Source Success vs All-Source Failure + +**Priority:** P0 +**Type:** Functional +**Module:** `internal/modelcatalog.Service.ListModels` +**Requirement:** TechSpec Safety Invariants SI-13. +**Status:** Not Run + +## Objective + +Verify that: +- A list call succeeds when at least one source delivers usable rows or a stale cache exists. +- A list call fails (deterministic error) only when every usable source fails AND no stale cache exists. + +## Preconditions + +- [ ] Catalog with multiple sources registered for one provider. +- [ ] Stub control over each source's success/failure. + +## Test Steps + +1. **Partial success.** + - Force `models.dev` 5xx; let `builtin` return rows. + - Command: `agh provider models list codex -o json`. + - **Expected:** Exit 0; rows from `builtin` returned; status reports `models_dev` as failed; `last_error` redacted. +2. **All-source failure with stale cache.** + - Run a successful refresh first; then force every source to fail. + - **Expected:** List returns stale rows with `stale=true`; no error to operator. +3. **All-source failure with no stale cache.** + - Wipe SQLite catalog tables (test harness only); force every source to fail. + - **Expected:** List returns deterministic error referencing the failed sources; CLI exit non-zero with structured JSON error in `-o json` mode. +4. **Refresh during all-source failure remains coalesced.** + - Issue two concurrent refreshes for the same provider. + - **Expected:** One subprocess/network attempt per source; status batch returned identically to both callers. + +## Audit Coverage + +- C6 task tree (Task 03, Task 05). +- SI-4, SI-13. + +## Pass Criteria + +- Steps 1-2 succeed without error. +- Step 3 fails with structured error and non-zero exit. +- Step 4 shows single underlying call. + +## Failure Criteria + +- Partial failure reported as global failure. +- All-source failure with stale cache returns error. +- Coalescing breaks under concurrent refresh. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-008.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-008.md new file mode 100644 index 000000000..3e44a25bd --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-008.md @@ -0,0 +1,51 @@ +# TC-FUNC-008: Live Provider Source Timeout + Effective Auth/Home/Env + +**Priority:** P1 +**Type:** Functional +**Module:** `internal/modelcatalog/live_sources.go` +**Requirement:** TechSpec Live Provider Sources, SI-3. +**Status:** Not Run + +## Objective + +Verify each registered live provider source is timeout-bound, uses the provider's effective auth/home/env policy, never inherits the request context's deadline implicitly, and records source status (not session blockers) on failure. + +## Preconditions + +- [ ] Stub or fake provider subprocess and HTTP endpoints. +- [ ] Provider config with `home_policy`, `env_policy`, `auth_mode` set per provider. +- [ ] Daemon base env injected for live discovery. + +## Test Steps + +1. **Timeout enforcement.** + - Stub server delays 30s; provider discovery timeout 1s. + - **Expected:** Source status records `failed` with redacted timeout message; no panic; coalescing serializes per provider (TC-PERF-001 covers concurrent storms). +2. **Provider home policy honored.** + - Set `home_policy=isolated`; spawn live discovery subprocess. + - **Expected:** Subprocess `HOME` matches provider isolated home; daemon does not leak operator `HOME`. +3. **Auth status command non-zero.** + - Stub `auth_status_command` returns exit 2. + - **Expected:** Source status `failed`; daemon does not raise an operator error; manual model entry still works. +4. **Provider secret resolver exposes redacted env.** + - Resolver injects `OPENAI_API_KEY=secret-xyzzy`. + - **Expected:** Source error log entries do not contain `secret-xyzzy`; refer to TC-SEC-001 for cross-surface redaction. +5. **Source IDs are `provider_live:` with priority 110.** + - **Expected:** SQLite rows match the documented IDs and priority (Task 04 invariant). + +## Audit Coverage + +- C6 task tree (Task 04). +- SI-1 (no session blocker), SI-3, SI-9. + +## Pass Criteria + +- Timeouts enforced. +- Effective home/env honored. +- Source IDs and priority match Task 04 contract. + +## Failure Criteria + +- Subprocess inherits operator `HOME`. +- Source error contains raw secret material. +- Timeout exceeds configured timeout. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-009.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-009.md new file mode 100644 index 000000000..f76b2ffff --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-009.md @@ -0,0 +1,46 @@ +# TC-FUNC-009: Live Discovery Never Touches ACP Sessions + +**Priority:** P1 +**Type:** Functional +**Module:** `internal/modelcatalog` live sources + `internal/acp` +**Requirement:** TechSpec Safety Invariants SI-2, ADR-001. +**Status:** Not Run + +## Objective + +Verify live provider discovery never calls `session/new`, `session/load`, `session/set_model`, or `session/set_config_option`, and that unavailable side-effect-free discovery paths surface as source-status failures, not session blockers. + +## Preconditions + +- [ ] ACP fake driver instrumented to assert it is not invoked from discovery code paths. +- [ ] Discovery sources registered for built-in providers and adapter-config providers (OpenClaw, Hermes, Pi). + +## Test Steps + +1. **Run a refresh storm against every provider.** + - Command: `for p in codex anthropic openrouter ollama opencode openclaw hermes pi; do agh provider models refresh $p; done`. + - **Expected:** ACP fake driver records zero invocations. +2. **Provider without `discovery.command`/`discovery.endpoint`.** + - Configure OpenClaw with `discovery.enabled=true` but no command/endpoint. + - **Expected:** Source status `refresh_state="failed"`, `last_error` references missing discovery contract; session creation for that provider remains usable; manual model entry still valid. +3. **Provider discovery enabled with invalid HTTP endpoint.** + - Set `endpoint = "http://127.0.0.1:0"`. + - **Expected:** Source status `failed` with redacted error; ACP driver still untouched. +4. **Concurrent session creation while discovery refresh runs.** + - Trigger refresh and a session create simultaneously for the same provider. + - **Expected:** Session creation completes without waiting on discovery; ACP fake records only `session/new` from the session caller, not from discovery. + +## Audit Coverage + +- C6 task tree (Task 04, Task 06). +- SI-1, SI-2, SI-3. + +## Pass Criteria + +- Zero ACP `session/*` calls originate from discovery code. +- Missing discovery configuration produces source status, never blocks sessions. + +## Failure Criteria + +- Discovery code path invokes any ACP session method. +- Failure to discover blocks session creation. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-010.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-010.md new file mode 100644 index 000000000..c764540cf --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-010.md @@ -0,0 +1,50 @@ +# TC-FUNC-010: ACP `session/set_config_option` Precedence + +**Priority:** P0 +**Type:** Functional +**Module:** `internal/acp` (Driver.applySessionModel) +**Requirement:** TechSpec ACP Session Config Options, SI-7. +**Status:** Not Run + +## Objective + +Verify the upgraded SDK driver prefers `session/set_config_option` for model and reasoning effort and only falls back to `session/set_model` when no matching config option exists. Reasoning never sent when no matching control is advertised. + +## Preconditions + +- [ ] `coder/acp-go-sdk@v0.12.2` upgraded. +- [ ] ACP fake driver fixtures expose `configOptions` for `model` and `reasoning_effort` (and the documented synonyms). + +## Test Steps + +1. **`session/new` advertises a `model` config option matching the requested model.** + - **Expected:** Driver issues `session/set_config_option` with `id="model"`, `value=`; never invokes `session/set_model` for this case. +2. **`session/new` advertises a reasoning option.** + - **Expected:** Driver applies reasoning via `session/set_config_option`; legacy `set_model` not invoked. +3. **`config_option_update` event arrives mid-session.** + - **Expected:** Driver updates session state; HTTP/UDS session capability surfaces reflect new options on next read. +4. **No matching config option present, but legacy model state advertises the model.** + - **Expected:** Driver falls back to `session/set_model`; debug log notes fallback reason. +5. **Neither config option nor legacy model state.** + - **Expected:** Driver does not send any model mutation; reasoning effort is silently skipped (SI-7); session creation succeeds with default state. +6. **Conservative ID matching.** + - Stub option ID `model_v2` (not in known list). + - **Expected:** Driver does not assume it is a model option; treats as opaque; falls back as in step 5 if no exact `model` ID. + +## Audit Coverage + +- C6 task tree (Task 06). +- SI-7. + +## Pass Criteria + +- Steps 1-3 use `session/set_config_option`. +- Step 4 falls back to `session/set_model`. +- Step 5 sends no mutation. +- Step 6 never invents reasoning levels from `supports_reasoning=true`. + +## Failure Criteria + +- Driver invokes `session/set_model` when a matching config option exists. +- Reasoning effort fired without an advertised control. +- Unknown option IDs treated as model option. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-011.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-011.md new file mode 100644 index 000000000..8f9be6609 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-011.md @@ -0,0 +1,50 @@ +# TC-FUNC-011: Extension `model.source` Manifest + Row Validation + +**Priority:** P1 +**Type:** Functional +**Module:** `internal/extension` +**Requirement:** ADR-003, TechSpec Extension Sources. +**Status:** Not Run + +## Objective + +Verify extension manifests can declare a `model.source` capability with a normalizable slug; non-normalizable slugs are rejected; `models/list` results pass through `internal/modelcatalog` validation; invalid rows are dropped with deterministic source-status errors. + +## Preconditions + +- [ ] Extension fixture with manifest declaring `model.source` capability for one provider. +- [ ] Daemon configured to register that extension. + +## Test Steps + +1. **Manifest accepts normalizable slug.** + - Manifest declares `name = "Acme Models"` mapped to slug `acme-models`. + - **Expected:** Daemon registers `source_id="extension:acme-models"`; manifest validation passes. +2. **Manifest rejects unmappable slug.** + - Manifest declares `name = "??"`. + - **Expected:** Validation fails with deterministic error referencing the manifest field. +3. **Extension returns valid rows.** + - `models/list` returns rows with provider/model IDs the extension declares. + - **Expected:** Rows persist; merge applies extension priority 100; status `succeeded`. +4. **Extension returns invalid rows.** + - Stub returns row with empty `model_id`. + - **Expected:** Row rejected; remaining valid rows persist; source status records redacted error referencing the offending field. +5. **Extension declares provider it has no grant for.** + - **Expected:** Source status reports `failed` with capability-missing error; no rows persisted. + +## Audit Coverage + +- C6 task tree (Task 08). +- SI-8 (only `internal/modelcatalog.Store` writes rows), SI-9 (redaction). + +## Pass Criteria + +- Manifest validation matches Task 08 fixtures. +- Invalid rows do not pollute persisted catalog. +- Capability gate enforced. + +## Failure Criteria + +- Invalid manifest passes validation. +- Invalid row corrupts persisted state. +- Capability gate bypassed. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-012.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-012.md new file mode 100644 index 000000000..549943f84 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-012.md @@ -0,0 +1,44 @@ +# TC-FUNC-012: Extension Capability Missing / Revoked = Denial + +**Priority:** P1 +**Type:** Functional +**Module:** `internal/extension/host_api_models.go` +**Requirement:** ADR-003. +**Status:** Not Run + +## Objective + +Verify extension Host API methods (`models/list`, `models/refresh`, `models/status`) honor capability grants and surface deterministic denial errors when grants are missing or revoked, without leaking daemon internals. + +## Preconditions + +- [ ] Extension fixture with grants toggleable per method. + +## Test Steps + +1. **All three grants present.** + - **Expected:** Host API succeeds; payload matches daemon-owned projection (not raw extension payload). +2. **`models/list` grant missing.** + - **Expected:** Host API returns deterministic capability error; no rows leaked. +3. **`models/refresh` grant missing.** + - **Expected:** Refresh denied; no source status changes; no subprocess executed. +4. **`models/status` grant missing.** + - **Expected:** Status request denied; no source status read. +5. **Grant revoked mid-run.** + - Trigger list, then revoke grant, then trigger again. + - **Expected:** Second call denied; no cached payload returned. + +## Audit Coverage + +- C6 task tree (Task 08). +- SI-8, SI-9. + +## Pass Criteria + +- Capability gate enforced on every call. +- Errors deterministic. + +## Failure Criteria + +- Missing grant still returns rows or status. +- Error surface leaks daemon internals. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-013.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-013.md new file mode 100644 index 000000000..65b0f2432 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-013.md @@ -0,0 +1,44 @@ +# TC-FUNC-013: Source Error Redaction at Persistence + Projection + +**Priority:** P0 +**Type:** Functional +**Module:** `internal/modelcatalog/redact.go`, projection helpers. +**Requirement:** TechSpec SI-9. +**Status:** Not Run + +## Objective + +Verify source errors are redacted at both persistence time and at every public projection boundary so that secrets cannot leak through alternate surfaces. + +## Preconditions + +- [ ] Stub source whose error message contains an API key (`sk-test-1234567890abcdef`), an OAuth token (`Bearer secret.token`), and an env-shaped secret (`OPENAI_API_KEY=secret-xyzzy`). +- [ ] Daemon log capture available. + +## Test Steps + +1. **Trigger refresh failure with the seeded error string.** + - **Expected:** SQLite `model_catalog_sources.last_error` contains a redacted summary; raw secret strings absent. +2. **List status via HTTP / UDS / CLI / Host API.** + - **Expected:** `last_error` field redacted in every surface; payload byte-equal between HTTP and UDS for the same status row. +3. **List status via web app.** + - **Expected:** Web component renders the redacted string only; no secret visible in DOM, network response, or React Query cache (TC-UI-001 covers UI rendering). +4. **Daemon log capture.** + - **Expected:** Structured log entry omits secret strings; correlation keys (`refresh_request_id`, `provider_id`, `source_id`, `source_kind`) present. +5. **Inject error at projection time only (bypassing persistence redaction).** + - **Expected:** Projection helper still redacts before serialization (defense in depth at HTTP/UDS/Host API/SSE boundary). + +## Audit Coverage + +- C6 task tree (Task 11), C11 disruption probe. +- SI-9. + +## Pass Criteria + +- No surface emits raw secret material. +- Both persistence and projection redaction functions invoked. + +## Failure Criteria + +- Any surface (logs, status, API, web, Host API, SSE) reveals a secret. +- Projection skips redaction when persistence layer is bypassed. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-014.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-014.md new file mode 100644 index 000000000..8f697584e --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-014.md @@ -0,0 +1,48 @@ +# TC-FUNC-014: Refresh Deadline Detached From Request Context + +**Priority:** P1 +**Type:** Functional +**Module:** `internal/modelcatalog.Service.Refresh` +**Requirement:** TechSpec SI-11. +**Status:** Not Run + +## Objective + +Verify refresh work uses `context.WithoutCancel(ctx)` plus an explicit `context.WithDeadline`, so HTTP/UDS request cancellation does not abort refresh prematurely and refresh deadlines do not leak from the request context. + +## Preconditions + +- [ ] Refresh stub configured to take longer than the request timeout. +- [ ] Test harness with deterministic clock or sleep-based assertion. + +## Test Steps + +1. **Cancel the HTTP request mid-refresh.** + - Trigger `POST /api/providers/codex/models/refresh` with a 100ms client timeout while the source takes 2s. + - **Expected:** Client receives canceled response; daemon completes refresh through the configured deadline; `model_catalog_sources` records the refresh outcome. +2. **Configured deadline applied.** + - Configure a refresh deadline (default 60s in TechSpec; configurable per source). + - **Expected:** Refresh completes within the configured deadline regardless of the request lifetime. +3. **Daemon shutdown joins outstanding refresh workers.** + - Initiate refresh; gracefully shut daemon. + - **Expected:** Daemon waits for refresh worker to finish (or hits configured shutdown timeout) before exit; no orphan goroutine; SQLite rows consistent. +4. **Repeated cancellation under storm.** + - Cancel 100 sequential refresh calls within 50ms. + - **Expected:** Coalescing prevents storm; one underlying refresh completes; status reflects single outcome. + +## Audit Coverage + +- C6 task tree (Task 05, Task 11). +- SI-11. + +## Pass Criteria + +- Refresh outcome recorded after request cancellation. +- Deadlines respected. +- Daemon shutdown clean. + +## Failure Criteria + +- Refresh aborts when request cancels. +- Deadlines inherited implicitly from request context. +- Goroutine leak after shutdown. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-015.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-015.md new file mode 100644 index 000000000..116cfd426 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-FUNC-015.md @@ -0,0 +1,48 @@ +# TC-FUNC-015: Generated Contracts and Docs Drift Gate + +**Priority:** P1 +**Type:** Functional +**Module:** Codegen + Docs +**Requirement:** TechSpec Web/Docs Impact, Task 10. +**Status:** Not Run + +## Objective + +Verify `make codegen` regenerates `openapi/agh.json`, `web/src/generated/agh-openapi.d.ts`, and CLI references; the docs vitest enforces hard-cut copy in `packages/site`. + +## Preconditions + +- [ ] Working tree clean except QA artifacts. +- [ ] `make` toolchain available. + +## Test Steps + +1. **Run codegen.** + - Command: `make codegen`. + - **Expected:** `git status` shows no diff (committed state already matches generated). +2. **Run codegen-check.** + - Command: `make codegen-check`. + - **Expected:** Exit 0. +3. **Run docs vitest.** + - Command: `cd packages/site && bun run test -- provider-model-catalog-docs`. + - **Expected:** Suite passes; no flat-field claims (`default_model`, `supported_models`, `supports_reasoning_effort`) in narrative copy outside the hard-cut warning. +4. **CLI docs regenerated.** + - Command: `make cli-docs`. + - **Expected:** `packages/site/content/runtime/cli/provider/models/{list,refresh,status}.mdx` reflects current cobra exports. +5. **Inspect MDX sources.** + - Command: `grep -R "default_model\|supported_models\|supports_reasoning_effort" packages/site/content/runtime`. + - **Expected:** No matches outside hard-cut warning copy. + +## Audit Coverage + +- C6 task tree (Task 10). + +## Pass Criteria + +- Codegen idempotent; docs vitest green. + +## Failure Criteria + +- Codegen produces diff. +- Docs vitest fails. +- Hard-cut residue in narrative copy. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-001.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-001.md new file mode 100644 index 000000000..834702b86 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-001.md @@ -0,0 +1,52 @@ +# TC-INT-001: Global Migration v23 - Fresh DB + Reopen-After-Restart + +**Priority:** P0 +**Type:** Integration +**Systems:** `internal/store/globaldb` schema, `internal/modelcatalog.Store`. +**Requirement:** TechSpec Data Model, SI-10, Task 02. +**Status:** Not Run + +## Objective + +Verify the migration registry creates `model_catalog_sources`, `model_catalog_rows`, `model_catalog_reasoning_efforts`, and the documented indexes on a fresh DB; that the `BEGIN IMMEDIATE` write transaction is honored; that reopening the DB after a daemon restart keeps the row identity stable; and that the migration registry append-only contract still passes after v23. + +## Preconditions + +- [ ] Test isolated `globaldb` instance. +- [ ] No prior migrations. + +## Test Steps + +1. **Fresh DB migration.** + - Run migrator end-to-end. + - **Expected:** `schema_migrations` ends at v23 with the documented `name`/`checksum` for the model catalog migration; previous v1-v22 unchanged. +2. **Tables and indexes exist.** + - Inspect SQLite schema. + - **Expected:** Three tables and the indexes `idx_model_catalog_rows_provider_model`, `idx_model_catalog_rows_source_provider`, `idx_model_catalog_sources_provider` exist; foreign-key cascade on reasoning efforts present. +3. **Insert + read round-trip.** + - Use `Store.ReplaceSourceRows` with one row including reasoning efforts and a stale flag. + - **Expected:** `ListRows`/`ListSourceStatus` returns identical data; reasoning efforts ordered by `rank`. +4. **Reopen after restart.** + - Close DB; reopen. + - **Expected:** Rows present; reasoning efforts still ordered; status row preserved. +5. **WAL/SHM companion handling.** + - Simulate stale `-wal`/`-shm` companions; reopen. + - **Expected:** Migrator recovers cleanly; no migration mismatch. +6. **Append-only contract guarded.** + - Modify migration v23 hash and reopen. + - **Expected:** Migrator fails fast with mismatch error; never silently rewrites history. + +## Audit Coverage + +- C6 task tree (Task 02), C8 cross-surface persistence truth. +- SI-8, SI-10. + +## Pass Criteria + +- All steps pass with deterministic data. + +## Failure Criteria + +- Schema differs from TechSpec. +- Append-only contract rewritable. +- Reopen loses rows. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-002.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-002.md new file mode 100644 index 000000000..7189a83f8 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-002.md @@ -0,0 +1,48 @@ +# TC-INT-002: HTTP/UDS Native Catalog Handlers Serve Daemon-Owned Projection + +**Priority:** P0 +**Type:** Integration +**Systems:** `internal/api/core`, `internal/api/httpapi`, `internal/api/udsapi`, `internal/modelcatalog`. +**Requirement:** TechSpec Public Interfaces, ADR-001, Task 07. +**Status:** Not Run + +## Objective + +Verify the native catalog HTTP and UDS routes (registered via the `/api/providers/*catalog_path` dispatcher) return identical daemon-owned payloads for list / refresh / status across both transports. + +## Preconditions + +- [ ] Daemon running with seeded catalog state from TC-INT-001 fixture. +- [ ] Bearer token configured for HTTP; UDS connected via local socket from bootstrap manifest. + +## Test Steps + +1. **HTTP `GET /api/providers/models`.** + - **Expected:** 200; payload `ProviderModelPayload` shape; deterministic sort order. +2. **HTTP `GET /api/providers/{provider_id}/models`.** + - **Expected:** Returns subset filtered by provider; same deterministic sort. +3. **HTTP `POST /api/providers/{provider_id}/models/refresh`.** + - **Expected:** Returns `[]SourceStatus` with `refresh_request_id`; status reflects new `last_refresh_at`. +4. **HTTP `GET /api/providers/models/status` and `/api/providers/{provider_id}/models/status`.** + - **Expected:** Status payload includes redacted `last_error`; rows match SQLite source rows. +5. **UDS parity.** + - Repeat each call via UDS client (`internal/cli/client_provider_models.go` exposes the parity surface). + - **Expected:** Same shape; UDS responses match HTTP byte-equally for steady-state list payloads (TC-INT-003 validates byte equality). +6. **Refresh failure path.** + - Force a source to fail; refresh again. + - **Expected:** HTTP and UDS responses both surface failed status with redacted error. + +## Audit Coverage + +- C5 channel coverage, C8 cross-surface truth. +- SI-4, SI-9, SI-13. + +## Pass Criteria + +- All routes respond with documented payloads on both transports. +- Refresh failures surface consistently. + +## Failure Criteria + +- Any route differs in shape between HTTP and UDS. +- Refresh failure exposes raw error. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-003.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-003.md new file mode 100644 index 000000000..b77888f9a --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-003.md @@ -0,0 +1,45 @@ +# TC-INT-003: HTTP/UDS Canonical JSON Byte Equality + CLI Parity + +**Priority:** P0 +**Type:** Integration +**Systems:** `internal/api/core` deterministic encoder, `internal/api/testutil/model_catalog_parity_test.go`. +**Requirement:** TechSpec Testing Approach, Task 11. +**Status:** Not Run + +## Objective + +Verify the native catalog payload bytes match exactly between HTTP and UDS for at least one deterministic catalog state, that CLI structured JSON output covers the same persisted state, and that the Host API projection is structurally equivalent. + +## Preconditions + +- [ ] Daemon seeded with deterministic catalog state. +- [ ] Bearer auth + UDS socket from bootstrap manifest. + +## Test Steps + +1. **Capture HTTP `GET /api/providers/models` response body bytes.** +2. **Capture UDS `GET /api/providers/models` response body bytes.** + - **Expected:** Byte equal after canonical sort. +3. **Capture CLI `agh provider models list -o json` output.** + - **Expected:** Structurally equivalent (same provider/model rows, sources, availability) after JSON normalization; CLI may add wrapper metadata but core list matches. +4. **Capture Host API `models/list` (extension capability granted) response.** + - **Expected:** Same provider/model rows; daemon-owned projection (not raw extension payload). +5. **Repeat for status (`GET /api/providers/models/status`) and refresh (one cycle).** + - **Expected:** Same parity holds. +6. **Modify state via Settings > Providers (TC-UI-001).** + - **Expected:** Subsequent CLI/HTTP/UDS/Host API calls reflect the change uniformly. + +## Audit Coverage + +- C5, C8. +- TC-INT-002 covers shape; TC-INT-003 enforces byte/structural identity. + +## Pass Criteria + +- All four surfaces agree for at least the steady-state list payload. + +## Failure Criteria + +- Any drift between HTTP and UDS bytes. +- CLI loses fields. +- Host API exposes raw extension payload. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-004.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-004.md new file mode 100644 index 000000000..99092f352 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-004.md @@ -0,0 +1,49 @@ +# TC-INT-004: `/api/openai/v1/models` HTTP-Only Registration + Filter + +**Priority:** P0 +**Type:** Integration +**Systems:** `internal/api/httpapi/routes.go`, `internal/api/udsapi/routes.go`, `internal/api/core/model_catalog.go`. +**Requirement:** TechSpec OpenAI-Compatible Projection, Task 07. +**Status:** Not Run + +## Objective + +Verify `/api/openai/v1/models` is registered only on HTTP, returns the OpenAI-shaped projection with `agh` metadata, accepts `provider_id` filter, and is absent from UDS routes. + +## Preconditions + +- [ ] Daemon running with seeded catalog and bearer auth. + +## Test Steps + +1. **HTTP `GET /api/openai/v1/models`.** + - **Expected:** 200; body `{"object":"list","data":[...]}`; each item has `id`, `object="model"`, `created=0`, `owned_by=`, `agh.{provider_id, display_name, supports_tools, supports_reasoning, availability_state, reasoning_efforts, context_window, max_output_tokens, sources}`. +2. **`provider_id` filter.** + - Command: `GET /api/openai/v1/models?provider_id=codex`. + - **Expected:** Subset filtered; deterministic order. +3. **Unknown `provider_id`.** + - Command: `GET /api/openai/v1/models?provider_id=unknown-xyz`. + - **Expected:** 200 with empty `data` array; no error. +4. **UDS does not expose the route.** + - Command: hit UDS path `/api/openai/v1/models`. + - **Expected:** 404 (route not registered); UDS routes table only includes the native catalog dispatcher. +5. **Refresh route absent for OpenAI projection.** + - Command: HTTP `POST /api/openai/v1/models`. + - **Expected:** 404 / method not allowed; refresh remains exclusive to native catalog routes. +6. **Source identity exposed in `agh.sources`.** + - **Expected:** Array of `source_id` strings ordered consistently with native projection. + +## Audit Coverage + +- C5, C8. +- SI-9 (no secret in OpenAI payload). + +## Pass Criteria + +- All steps match documented behavior. + +## Failure Criteria + +- UDS exposes the OpenAI route. +- Filter ignored. +- `agh` metadata missing. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-005.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-005.md new file mode 100644 index 000000000..9a1b89de8 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-005.md @@ -0,0 +1,48 @@ +# TC-INT-005: Extension Source - Success and Denial Through Host API + +**Priority:** P0 +**Type:** Integration +**Systems:** `internal/extension`, `internal/modelcatalog`, Host API. +**Requirement:** ADR-003, TechSpec Extensibility Integration Plan, Task 08. +**Status:** Not Run + +## Objective + +Verify the extension `model.source` end-to-end path: AGH calls extension `models/list`, validates and persists rows, and surfaces the daemon-owned projection through Host API; capability denial is deterministic. + +## Preconditions + +- [ ] Extension fixture with `model.source` capability for provider `codex`. +- [ ] Capability grants toggleable. + +## Test Steps + +1. **Extension grant present, valid rows.** + - Trigger Host API `models/refresh` for `codex`. + - **Expected:** Extension subprocess invoked; AGH validates rows; SQLite catalog updated; status `succeeded`; `models/list` returns rows including extension priority 100. +2. **Extension returns invalid row.** + - **Expected:** Row dropped; source status records redacted error referencing the offending field; valid rows persist. +3. **Capability missing for `models/list`.** + - **Expected:** Deterministic capability error returned; no rows leaked. +4. **Capability missing for `models/refresh`.** + - **Expected:** No subprocess invoked; source status unchanged. +5. **Capability missing for `models/status`.** + - **Expected:** Deterministic capability error. +6. **Extension declares provider it has no grant for.** + - **Expected:** Refresh fails closed with capability error; valid grants for other providers unaffected. + +## Audit Coverage + +- C5, C6 (Task 08), C11 disruption probe. +- SI-8, SI-9. + +## Pass Criteria + +- Steps 1-2 produce correct catalog state. +- Steps 3-6 deterministically denied. + +## Failure Criteria + +- Denied call returns rows. +- Invalid extension row breaks persistence. +- Subprocess invoked without grant. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-006.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-006.md new file mode 100644 index 000000000..a7d96a6a3 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-INT-006.md @@ -0,0 +1,44 @@ +# TC-INT-006: ACP SDK v0.12.2 - Create / Load / Resume Coverage + +**Priority:** P0 +**Type:** Integration +**Systems:** `internal/acp` driver + ACP fake fixtures. +**Requirement:** TechSpec ACP Session Config Options, Task 06. +**Status:** Not Run + +## Objective + +Verify upgrade to `coder/acp-go-sdk@v0.12.2` keeps create/load/resume/mode behavior intact, exposes captured `configOptions`, and propagates `ACPCapsPayload.config_options` / `SessionConfigOptionPayload`. + +## Preconditions + +- [ ] ACP fake driver with fixtures for `session/new`, `session/load`, `config_option_update`, mode events. + +## Test Steps + +1. **`session/new` returns `configOptions`.** + - **Expected:** Driver records options; HTTP/UDS capability payload includes `config_options` with the documented shape. +2. **`session/load` reuses captured options.** + - **Expected:** No duplicate model mutations. +3. **`session/set_config_option` applied for model and reasoning.** + - **Expected:** Driver issues the call when matching IDs are advertised; legacy `session/set_model` only used as fallback (TC-FUNC-010 covers exact behavior). +4. **`config_option_update` event mid-session.** + - **Expected:** Capability payload updated on next read. +5. **Mode/cancellation/error fields renamed in v0.12.2.** + - **Expected:** Driver compiles and tests prove old behavior intact (existing create/load/resume coverage). +6. **Resume flow.** + - **Expected:** Resumed session retains `configOptions` from prior load; no new `session/set_*` calls if state matches. + +## Audit Coverage + +- C5, C6 (Task 06). + +## Pass Criteria + +- All ACP flows pass on the upgraded SDK. +- `config_options` surface populated. + +## Failure Criteria + +- ACP driver regresses on create/load/resume. +- `config_options` missing in payload. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-PERF-001.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-PERF-001.md new file mode 100644 index 000000000..aa7c5cd22 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-PERF-001.md @@ -0,0 +1,50 @@ +# TC-PERF-001: Refresh Concurrency - Per-Provider Serialization + Cross-Provider Parallelism + +**Priority:** P0 +**Type:** Performance +**Module:** `internal/modelcatalog.Service.Refresh`, refresh wrapper. +**Requirement:** TechSpec SI-12, Task 11. +**Status:** Not Run + +## Objective + +Verify per-provider refresh requests serialize before any subprocess or provider-home work, identical concurrent requests for one provider coalesce, refresh storms across providers proceed in parallel, and SQLite write contention is avoided (no `BUSY` errors). + +## Preconditions + +- [ ] Stub live sources for `codex`, `anthropic`, `gemini`, `openrouter`, `ollama` with measurable subprocess latency. +- [ ] Test harness counts subprocess invocations and SQLite write attempts. + +## Test Steps + +1. **N concurrent same-provider refreshes.** + - Issue 32 simultaneous `POST /api/providers/codex/models/refresh` requests. + - **Expected:** Exactly one subprocess invocation; all 32 callers receive the same status batch with the same `refresh_request_id`. +2. **N cross-provider refreshes.** + - Issue refreshes for all 5 providers concurrently. + - **Expected:** 5 underlying subprocess invocations run in parallel; total wall time approximates the slowest provider, not the sum. +3. **Mixed storm.** + - Issue 32 same-provider + 32 cross-provider concurrently. + - **Expected:** Same-provider coalesced; cross-provider parallel; no SQLite `BUSY` error escapes coalescing. +4. **Repeated coalescing returns identical statuses.** + - **Expected:** Two callers in the same coalesce window see byte-equal status payloads; refresh request id correlated. +5. **SQLite contention.** + - Drive 100 refreshes/second across 5 providers for 30s. + - **Expected:** Zero `SQLITE_BUSY` propagated; per-provider serialization holds; no row corruption. + +## Audit Coverage + +- C5, C6 (Task 11), C11 disruption probe. +- SI-12, SI-13. + +## Pass Criteria + +- Same-provider coalescing observed. +- Cross-provider parallelism observed. +- No `SQLITE_BUSY` escapes. + +## Failure Criteria + +- Multiple subprocess invocations per coalesced batch. +- Cross-provider serialized. +- SQLite errors observed. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-PERF-002.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-PERF-002.md new file mode 100644 index 000000000..a4844d13a --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-PERF-002.md @@ -0,0 +1,48 @@ +# TC-PERF-002: Detached Refresh Lifetime + Daemon Shutdown Join + +**Priority:** P0 +**Type:** Performance +**Module:** `internal/modelcatalog` refresh wrapper. +**Requirement:** TechSpec SI-11. +**Status:** Not Run + +## Objective + +Verify request-cancellation does not abort detached refresh work, the configured deadline binds refresh, daemon shutdown joins outstanding refresh workers, and no goroutine leaks remain. + +## Preconditions + +- [ ] Stub source with controllable latency. +- [ ] Goroutine leak detector or `runtime.NumGoroutine` snapshot harness. + +## Test Steps + +1. **Cancel mid-flight HTTP refresh.** + - Configure stub latency = 2s; client timeout = 100ms. + - **Expected:** Client gets canceled error; daemon completes refresh; SQLite reflects success; goroutine count returns to baseline. +2. **Override request context deadline.** + - Submit refresh under a context with 50ms deadline. + - **Expected:** Refresh ignores caller deadline; uses configured deadline. +3. **Daemon shutdown.** + - Trigger refresh; immediately call daemon shutdown. + - **Expected:** Shutdown waits for refresh to complete (or hits configured shutdown timeout); SQLite consistent; no orphan goroutine; `Close` on store happens after refresh worker join. +4. **Goroutine leak check.** + - After 100 cancellation cycles, snapshot `runtime.NumGoroutine`. + - **Expected:** No monotonic growth. + +## Audit Coverage + +- C11. +- SI-11, SI-12. + +## Pass Criteria + +- Refresh completes under cancellation. +- Daemon shuts down cleanly. +- No goroutine leak. + +## Failure Criteria + +- Refresh aborts when request cancels. +- Goroutine count grows. +- Daemon exits before refresh completes. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-REG-001.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-REG-001.md new file mode 100644 index 000000000..12bd116e4 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-REG-001.md @@ -0,0 +1,45 @@ +# TC-REG-001: Hard-Cut Residue Repository Scan + +**Priority:** P1 +**Type:** Regression +**Surface:** Repository. +**Requirement:** ADR-002, Task 11.1. +**Status:** Not Run + +## Objective + +Verify no production code or generated artifact references `default_model`, `supported_models`, or `supports_reasoning_effort` outside the documented hard-cut warning copy and historical migration text. + +## Preconditions + +- [ ] Working tree clean except QA artifacts. + +## Test Steps + +1. **Repository grep.** + - Command: `grep -nE "default_model|supported_models|supports_reasoning_effort" -r --include="*.go" --include="*.ts" --include="*.tsx" --include="*.json" --include="*.toml" .` + - **Expected:** Only known allowlisted matches appear: + - `internal/modelcatalog/hardcut_residue_test.go` and related tests asserting the residue scan. + - `packages/site` warning copy (`provider-model-catalog-docs.test.ts`). + - QA artifacts under `.compozy/tasks/provider-model-catalog/qa/`. + - No production source under `internal/`, `web/src/`, `cmd/`, `openapi/`, or generated TS/openapi files contain the literal strings. +2. **Generated contracts.** + - Inspect `openapi/agh.json` and `web/src/generated/agh-openapi.d.ts`. + - **Expected:** No occurrences of the deleted fields. +3. **Web E2E fixtures.** + - Inspect `web/e2e/fixtures/`. + - **Expected:** No references to deleted keys. +4. **Site narrative copy.** + - **Expected:** Only hard-cut warning copy mentions the deleted keys; the docs vitest enforces this. + +## Audit Coverage + +- C6 (Task 11), C8. + +## Pass Criteria + +- Grep produces only allowlisted matches. + +## Failure Criteria + +- Any unexpected reference. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-REG-002.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-REG-002.md new file mode 100644 index 000000000..7a2078835 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-REG-002.md @@ -0,0 +1,42 @@ +# TC-REG-002: Generated Docs and CLI Reference Stay in Sync + +**Priority:** P1 +**Type:** Regression +**Surface:** `packages/site`, `make cli-docs`, `make codegen-check`. +**Requirement:** TechSpec Docs Impact, Task 10. +**Status:** Not Run + +## Objective + +Verify generated CLI docs, generated OpenAPI/TS types, and narrative MDX align with current production behavior. + +## Preconditions + +- [ ] Branch up-to-date. + +## Test Steps + +1. **Run `make cli-docs`.** + - **Expected:** No diff against committed `packages/site/content/runtime/cli/provider/models/{list,refresh,status}.mdx`. +2. **Run `make codegen-check`.** + - **Expected:** No diff in `openapi/agh.json` or `web/src/generated/agh-openapi.d.ts`. +3. **Run `cd packages/site && bun run test -- provider-model-catalog-docs`.** + - **Expected:** Suite passes; no flat-field claims outside warning copy. +4. **Open `packages/site/content/runtime/core/agents/model-catalog.mdx`.** + - **Expected:** Documents native HTTP/UDS catalog endpoints, `/api/openai/v1/models`, refresh lifetime/coalescing, extension `model.source`. Merge priority table reflects current source priorities (config 120 / live 110 / extension 100 / models_dev 50 / builtin 10). +5. **Open `packages/site/content/runtime/core/configuration/config-toml.mdx`.** + - **Expected:** `[model_catalog.sources.models_dev]`, `models.discovery`, and nested `[providers..models]` documented; defaults and validation rules match TechSpec. + +## Audit Coverage + +- C6 (Task 10). + +## Pass Criteria + +- All gates green; no diff. + +## Failure Criteria + +- Codegen diff. +- Docs vitest fails. +- Narrative copy contradicts daemon behavior. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-SCEN-001.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-SCEN-001.md new file mode 100644 index 000000000..5da8d83ef --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-SCEN-001.md @@ -0,0 +1,83 @@ +# TC-SCEN-001: Operator Edits Provider Catalog and Starts a Session + +**Priority:** P0 +**Type:** Real Scenario +**Status:** Not Run +**Estimated Time:** 25 minutes +**Created:** 2026-05-07 + +--- + +## Behavioral Scenario Charter + +- **Startup situation**: Operator runs an isolated AGH lab (unique `AGH_HOME`, ports, tmux socket) provisioned by `agh-qa-bootstrap`. At least one ACP-capable provider is configured with synthetic credentials; live discovery uses stub HTTP servers and fake subprocesses by default. +- **Operator intent**: Adjust the curated metadata for one provider, refresh its catalog, then start a new session against a chosen model with reasoning effort. +- **Expected business outcome**: Operator perceives a coherent catalog with deterministic source attribution; the new session creates against the chosen model and reasoning effort; selection persists across surfaces. +- **AGH surfaces used**: Web (Settings > Providers, new session dialog), HTTP (`/api/providers/...`), SQLite (`model_catalog_*` tables), ACP (`session/new`, `session/set_config_option`). +- **Real provider/LLM expectation**: ACP fake driver acts as the provider unless `MODELCATALOG_LIVE=1` is set; in that case one ACP-backed provider must produce a real `session/new` response. +- **Blocked live-provider boundary**: Default run uses fake ACP; `MODELCATALOG_LIVE=1` annex documents the real-provider boundary in the verification report. +- **Scenario contract minimums covered**: operator role, web channel, HTTP channel, SQLite truth, ACP control, manual entry, stale fallback observation. + +## Actors and Agent Roles + +| Actor / Agent | Role | Expected Behavior | Evidence Source | +|---------------|------|-------------------|-----------------| +| Operator | Catalog editor + session creator | Edits curated metadata, refreshes catalog, starts session | Web screenshots + DOM snapshot | +| ACP fake driver | Provider | Returns `configOptions` for model + reasoning | ACP fixture transcript | +| Daemon | Catalog authority | Persists curated edit, refreshes, projects to surfaces | SQLite + HTTP responses | + +## Preconditions + +- [ ] Bootstrap manifest exists; `AGH_WEB_API_PROXY_TARGET` exported. +- [ ] Daemon running; web app reachable; ACP fake driver registered. +- [ ] Catalog seeded with `models.dev` + `builtin` rows for `codex`. + +## Journey Steps + +1. **Operator opens Settings > Providers.** + - Surface: Web. + - Input: Browser navigation to `/settings/providers` via `browser-use:browser` (fallback `agent-browser`). + - **Expected:** Provider cards render with source status; redacted `last_error` shown for any failed source; `default_model`/`supported_models`/`supports_reasoning_effort` strings absent in DOM and React Query cache. +2. **Operator adds a curated entry with reasoning efforts.** + - Surface: Web form. + - Input: `id="manual-gpt"`, `display_name="Manual GPT"`, `reasoning_efforts=["medium","high"]`, `default_reasoning_effort="medium"`. + - **Expected:** PUT request matches generated TS contract; daemon persists; SQLite `model_catalog_rows` has a `config` row at priority 120 with snapshot-preserved metadata; CLI / HTTP / UDS / Host API agree (TC-INT-003). +3. **Operator refreshes catalog.** + - Surface: Web refresh button. + - Input: Click refresh on `codex` card. + - **Expected:** UI shows pending state; on completion `last_refresh_at` updates; if a stub source is failing, `stale=true` flag visible with redacted error. +4. **Operator opens new session dialog and selects manual model.** + - Surface: Web dialog. + - Input: select provider `codex`, model `manual-gpt`, reasoning `medium`. + - **Expected:** Dialog renders catalog rows from `useProviderModels`; manual entry valid; submission triggers `session/new` and `session/set_config_option` (TC-FUNC-010 invariant). +5. **Operator confirms session is live with chosen model.** + - Surface: Web active session panel. + - **Expected:** Session controls switch to ACP `configOptions`; chosen model + reasoning effort reflected; catalog metadata never overrides current option value (SI-7). +6. **Disruption probe - stale catalog while session lives.** + - Probe: stub `models.dev` 5xx; trigger refresh. + - **Expected:** Catalog rows flagged stale; running session unaffected; manual model selection still valid. + +## Required Evidence + +- Browser screenshots: settings page, dialog, active session controls. +- HTTP request/response logs (network panel exports). +- SQLite snapshots (rows + status) before and after edit. +- ACP fake driver transcript showing `session/new` + `session/set_config_option`. +- Daemon log capture with `refresh_request_id` correlation. + +## Audit Coverage + +- C4 (operator), C5 (Web + HTTP), C8 (cross-surface), C10 (artifact reuse: catalog row reused by TC-SCEN-002), C11 (stale probe), C14. + +## Pass Criteria + +- Operator goal achieved end-to-end without manual workaround. +- Catalog row visible across CLI/HTTP/UDS/Host API. +- ACP control matches TC-FUNC-010 invariants. +- Stale probe surfaces redacted error; session unaffected. + +## Failure Criteria + +- Settings form emits legacy fields. +- ACP control regresses to `session/set_model` despite advertised config option. +- Stale state hidden or session aborted. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-SCEN-002.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-SCEN-002.md new file mode 100644 index 000000000..550f72c76 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-SCEN-002.md @@ -0,0 +1,83 @@ +# TC-SCEN-002: Agent Manages Catalog Through CLI/HTTP/UDS/Host API + +**Priority:** P0 +**Type:** Real Scenario +**Status:** Not Run +**Estimated Time:** 30 minutes +**Created:** 2026-05-07 + +--- + +## Behavioral Scenario Charter + +- **Startup situation**: Same isolated lab as TC-SCEN-001. Catalog state already includes the curated row produced in TC-SCEN-001 (artifact reuse). +- **Operator intent**: An external agent (script or another AGH agent) manages the catalog without web UI: list rows, refresh, inspect status, select a manual model, and start a session via API. +- **Expected business outcome**: The agent's CLI/HTTP/UDS/Host API operations are deterministic, structured, byte-equal between transports for steady-state list payloads, and reflect the same persisted state as the web UI. +- **AGH surfaces used**: CLI (`agh provider models {list|refresh|status}`), HTTP (`/api/providers/...`, `/api/openai/v1/models`), UDS (`/api/providers/...`), Host API (`models/list|refresh|status`). +- **Real provider/LLM expectation**: ACP fake driver creates the session; opt-in `MODELCATALOG_LIVE=1` annex covers a real provider. +- **Blocked live-provider boundary**: documented in the verification report when not running live. +- **Scenario contract minimums covered**: agent role, CLI/HTTP/UDS/Host API channels, refresh storm, redaction, deterministic JSON output, capability gating. + +## Actors and Agent Roles + +| Actor / Agent | Role | Expected Behavior | Evidence Source | +|---------------|------|-------------------|-----------------| +| Remote agent | Catalog reader/operator | Drives CLI + HTTP + UDS + Host API | CLI transcripts | +| Daemon | Catalog authority | Serves identical projection | HTTP/UDS/Host API responses | +| Extension | `model.source` provider | Returns valid + invalid rows on demand | Extension subprocess transcript | + +## Preconditions + +- [ ] Bootstrap manifest exists; `AGH_HOME`, ports, sockets unique. +- [ ] Daemon running with extension fixture installed. +- [ ] TC-SCEN-001 catalog state present (`manual-gpt` curated row). + +## Journey Steps + +1. **Agent runs `agh provider models list -o json`.** + - Surface: CLI. + - Input: no flags. + - **Expected:** JSON includes `manual-gpt` row from TC-SCEN-001; sources sorted deterministically; output structurally equivalent to HTTP `GET /api/providers/models`. +2. **Agent triggers refresh storm.** + - Surface: CLI + HTTP concurrently for `codex`, `anthropic`, `gemini`. + - **Expected:** Same-provider coalescing observed (TC-PERF-001); cross-provider parallel; redacted errors only. +3. **Agent inspects status via UDS.** + - Surface: UDS client. + - **Expected:** Same byte-equal status payload as HTTP for steady-state list; `refresh_request_id`, `last_refresh_at`, `last_error` redacted. +4. **Agent inspects OpenAI projection.** + - Surface: HTTP `GET /api/openai/v1/models?provider_id=codex`. + - **Expected:** OpenAI shape with `agh` metadata; UDS does NOT expose this route. +5. **Agent calls Host API `models/list` (with grant).** + - **Expected:** Daemon-owned projection; structurally equivalent to HTTP/CLI; raw extension payload not leaked. +6. **Agent revokes Host API grant and retries.** + - **Expected:** Deterministic capability error; no rows leaked. +7. **Agent creates a session via HTTP `POST /api/sessions` selecting manual model.** + - **Expected:** Session creation succeeds; ACP control uses `session/set_config_option`; session fixture confirms. +8. **Disruption probe - extension returns invalid row mid-storm.** + - **Expected:** Invalid row dropped; valid rows persist; redacted error surfaced; refresh request id correlated in logs. + +## Required Evidence + +- CLI transcripts with structured JSON output. +- HTTP/UDS response bodies (canonical sort) for byte-equality check. +- Host API response bodies before and after grant revoke. +- Daemon log entries with `refresh_request_id`, `provider_id`, `source_id`, `source_kind`, `extension_name` correlation keys. +- Extension subprocess transcript. + +## Audit Coverage + +- C4 (agent), C5 (CLI + HTTP + UDS + Host API), C8 (parity), C9 (provider boundary), C10 (artifact reuse from TC-SCEN-001), C11 (refresh storm + extension denial + invalid row), C14. + +## Pass Criteria + +- All four surfaces show identical persisted state. +- Refresh storm coalesced. +- Capability gate enforced. +- Manual model session created via API. + +## Failure Criteria + +- CLI/HTTP/UDS/Host API drift. +- OpenAI projection registered on UDS. +- Capability gate bypassed. +- Refresh storm causes SQLite `BUSY`. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-SEC-001.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-SEC-001.md new file mode 100644 index 000000000..db62b537d --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-SEC-001.md @@ -0,0 +1,43 @@ +# TC-SEC-001: No Secret Material Leaks Across Surfaces + +**Priority:** P0 +**Type:** Security +**OWASP Category:** A09 (logging) / A02 (cryptographic failures) +**Risk Level:** Critical +**Requirement:** TechSpec SI-9. +**Status:** Not Run + +## Objective + +Verify API keys, OAuth tokens, secret-shaped env vars, and provider credential material never appear in any source error, log, status payload, SSE event, web-visible payload, or Host API response. + +## Preconditions + +- [ ] Daemon running; structured logs captured. +- [ ] Source stubs configured to return errors that include API key, OAuth token, env-shaped secrets. +- [ ] Provider env explicitly seeds `OPENAI_API_KEY=sk-test-1234567890abcdef`, `ANTHROPIC_API_KEY=sk-ant-secret`, `OAUTH_REFRESH_TOKEN=oauth.refresh.secret`. + +## Test Steps + +1. **Trigger refresh failures with seeded errors for `models.dev`, live providers, extension source.** +2. **Capture logs (stdout + structured), HTTP/UDS status responses, CLI output, Host API response, web `network` traffic from Settings > Providers, SSE events.** +3. **Grep all captured payloads for the seeded secret strings.** + - **Expected:** Zero matches. +4. **Reduce redaction helper to no-op (test harness override) and re-run.** + - **Expected:** Projection-time redaction still catches secrets; defense-in-depth confirmed. +5. **Restore redaction helper; introduce a new secret-looking string in error.** + - **Expected:** Redacted summary remains readable but obfuscates secret-shaped substrings. + +## Audit Coverage + +- C11 disruption probe, C14. +- SI-9. + +## Pass Criteria + +- No secret leak across any surface. + +## Failure Criteria + +- Secret string appears in any captured surface. +- Redaction toggleable from outside redact helper. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-SEC-002.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-SEC-002.md new file mode 100644 index 000000000..6b3d654fc --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-SEC-002.md @@ -0,0 +1,50 @@ +# TC-SEC-002: `/api/openai/v1/models` Auth + OpenAI-Shaped Errors + +**Priority:** P0 +**Type:** Security +**OWASP Category:** A01 (broken access control) +**Risk Level:** High +**Requirement:** TechSpec OpenAI-Compatible Projection. +**Status:** Not Run + +## Objective + +Verify the OpenAI projection enforces bearer auth like every `/api/*` route, returns OpenAI-shaped error envelope on auth failure, and remains absent from UDS where authentication semantics differ. + +## Preconditions + +- [ ] Daemon running with bearer auth enforced. +- [ ] Test client without token. + +## Test Steps + +1. **Unauthenticated HTTP request.** + - Command: `GET /api/openai/v1/models` without `Authorization`. + - **Expected:** 401 / 403 with OpenAI-shaped error envelope: `{"error":{"message":"...","type":"...","code":"..."}}`; AGH HTTP status code matches `/api/*` semantics; no catalog data leaked. +2. **Bad bearer token.** + - **Expected:** Same shape; rate limiting and CORS middleware applied if enabled. +3. **CORS preflight.** + - Send OPTIONS with allowed origin. + - **Expected:** CORS responds per `/api/*` policy. +4. **Authenticated `provider_id` filter for unknown provider.** + - **Expected:** 200 with empty `data`; no error. +5. **Method not supported for refresh.** + - Command: `POST /api/openai/v1/models`. + - **Expected:** 404/405 with OpenAI-shaped error if applicable. +6. **UDS does not expose the route.** + - Command: hit UDS path. + - **Expected:** 404; auth boundary respected. + +## Audit Coverage + +- C5, C9 boundary, C11. + +## Pass Criteria + +- All steps return documented behavior. + +## Failure Criteria + +- Unauthenticated call returns data. +- Error envelope diverges from OpenAI shape. +- UDS exposes the route. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-UI-001.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-UI-001.md new file mode 100644 index 000000000..9396364a4 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-UI-001.md @@ -0,0 +1,57 @@ +# TC-UI-001: Settings > Providers - Source Status + Refresh + +**Priority:** P1 +**Type:** UI +**Surface:** `web/src/routes/_app/settings/providers.tsx`, `web/src/systems/model-catalog/` +**Requirement:** TechSpec Web, Task 09. +**Status:** Not Run + +## Objective + +Verify each provider card surfaces source status (id, kind, last refresh, next refresh, redacted last error, stale flag), exposes a refresh control, and reflects daemon-served catalog state including curated metadata snapshot preservation. + +## Preconditions + +- [ ] Daemon running with seeded catalog state. +- [ ] Web app served under `AGH_WEB_API_PROXY_TARGET` from bootstrap manifest. +- [ ] Browser via `browser-use:browser` or `agent-browser` fallback. + +## Test Steps + +1. **Open Settings > Providers.** + - **Expected:** Each provider card lists every catalog source with status; loading skeleton replaced by data; no console errors. +2. **Trigger refresh for one provider.** + - **Expected:** Refresh button enters pending state; on completion the card updates source rows, `last_refresh_at`, and `refresh_state`; no other provider is impacted. +3. **Force a source error and refresh.** + - **Expected:** Card shows redacted `last_error`; stale flag visible; manual entry control still available. +4. **Curated metadata snapshot preserved on save.** + - Edit curated entry; save settings. + - **Expected:** Catalog adapters use snapshot-preserved metadata so unrelated rows are not mutated; daemon `config` source rows reflect only the edited fields. +5. **Visual conformance.** + - **Expected:** Card uses `DESIGN.md` tokens (no shadows, warm-dark palette, signal palette for refresh state colors). Default state matches `Paper` artboards in `DESIGN.md`. No invented metrics shown. + +## Visual Specifications + +- Background: `oklch` warm-dark token from `DESIGN.md`. +- Refresh button states: idle (neutral), running (warning `#FFD60A`), success (`#30D158`), failure (`#FF453A`). +- Stale label uses warning palette. + +## Responsive Checks + +- Desktop 1280px, Tablet 768px, Mobile 375px - layout legible at each breakpoint. + +## Audit Coverage + +- C5, C8, C11. + +## Pass Criteria + +- Source status renders correctly with redacted errors. +- Refresh updates only target provider. +- Curated edit preserves snapshot. + +## Failure Criteria + +- Stale flag missing. +- Console error during refresh. +- Curated edit corrupts other models. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-UI-002.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-UI-002.md new file mode 100644 index 000000000..82bcea959 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-UI-002.md @@ -0,0 +1,43 @@ +# TC-UI-002: Settings > Providers - Manual Entry + Curated Edit + +**Priority:** P1 +**Type:** UI +**Surface:** `web/src/routes/_app/settings/providers.tsx` +**Requirement:** TechSpec SI-6, Task 09. +**Status:** Not Run + +## Objective + +Verify the new settings form edits `models.default` and `models.curated`, allows manual model IDs (curated is not an allowlist), and emits payloads matching the new nested contract (no `default_model`, `supported_models`, or `supports_reasoning_effort`). + +## Preconditions + +- [ ] Daemon and web app running. + +## Test Steps + +1. **Add curated model with reasoning efforts.** + - **Expected:** Form accepts metadata; submits payload using `models.default`/`models.curated`; daemon persists; CLI/HTTP/UDS reflect change. +2. **Set default to a model NOT in curated list.** + - **Expected:** Form accepts; payload shows `models.default = "manual-id"`; manual model becomes selectable in session create dialog. +3. **Reject duplicate curated id.** + - **Expected:** Form shows inline validation error. +4. **Reject blank reasoning effort.** + - **Expected:** Inline validation error referencing the empty entry. +5. **Inspect payload network request.** + - **Expected:** No legacy keys; matches generated TS contract `web/src/generated/agh-openapi.d.ts`. + +## Audit Coverage + +- C5, C8. +- SI-6. + +## Pass Criteria + +- Form contract matches generated types. +- Manual entry accepted. + +## Failure Criteria + +- Legacy fields appear in payload. +- Manual default rejected. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-UI-003.md b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-UI-003.md new file mode 100644 index 000000000..3b5cf715d --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-cases/TC-UI-003.md @@ -0,0 +1,51 @@ +# TC-UI-003: New Session Dialog - Catalog vs ACP Config Options + +**Priority:** P1 +**Type:** UI +**Surface:** `web/src/systems/session/components/session-create-dialog.tsx`, `web/src/systems/session/hooks/use-session-create-dialog.ts`, `web/src/systems/model-catalog/lib/derive-active-session-options.ts`. +**Requirement:** TechSpec SI-7, Task 09. +**Status:** Not Run + +## Objective + +Verify the new session dialog uses the daemon catalog (not legacy `supported_models`) for pre-session model selection, supports manual entry, surfaces stale/error/empty states, and switches to ACP `configOptions` once the session is active. + +## Preconditions + +- [ ] Daemon with seeded catalog. +- [ ] Web app under bootstrap manifest proxy target. + +## Test Steps + +1. **Open new session dialog with seeded catalog.** + - **Expected:** Model picker lists rows from `useProviderModels`; sources/availability badges visible; manual entry input present. +2. **Catalog stale.** + - Force stale flag in seed. + - **Expected:** Stale models render with stale label; selection still allowed. +3. **Catalog empty.** + - Seed with zero rows. + - **Expected:** Empty state shown; manual entry remains valid; submitting manual model creates session successfully. +4. **Refresh in dialog.** + - **Expected:** `useRefreshProviderModels` triggers; loading state visible; rows update on completion. +5. **Switch to active session.** + - After session creates, open active session settings panel. + - **Expected:** Controls switch to ACP `configOptions` (model + reasoning) via `deriveActiveSessionOptions`; catalog metadata never overrides session option current value (SI-7). +6. **No legacy field reads.** + - Inspect React Query cache + network responses. + - **Expected:** No `supported_models` / `default_model` / `supports_reasoning_effort` references. + +## Audit Coverage + +- C5, C7, C11. +- SI-7. + +## Pass Criteria + +- Catalog drives picker; ACP overrides post-creation. +- Manual entry valid in all states. + +## Failure Criteria + +- Picker reads legacy field. +- ACP override missing. +- Manual entry blocked. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-plans/00-coverage-matrix.md b/.compozy/tasks/provider-model-catalog/qa/test-plans/00-coverage-matrix.md new file mode 100644 index 000000000..8aa31bd1b --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-plans/00-coverage-matrix.md @@ -0,0 +1,124 @@ +# Provider Model Catalog - QA Coverage Matrix + +This matrix maps every TechSpec safety invariant, ADR decision, and implementation task to the concrete test cases authored under `qa/test-cases/`. Task 13 must run every case listed here. A blank row is a Task 13 blocker. + +## Source Authorities + +- TechSpec: `.compozy/tasks/provider-model-catalog/_techspec.md` (Safety Invariants 1-13, Testing Approach, Observability). +- ADRs: `adrs/adr-001-daemon-owned-provider-model-catalog.md`, `adrs/adr-002-provider-model-config-hard-cut.md`, `adrs/adr-003-extension-model-source-contract.md`. +- Tasks: `task_01.md` through `task_11.md`. +- QA tail template: `.agents/skills/cy-tasks-tail-qa-pair/references/hermes-tail-template.md`. + +## TechSpec Safety Invariants + +| Invariant | Description | Test Cases | +|-----------|-------------|------------| +| SI-1 | Session creation never depends on successful network model discovery. | TC-SCEN-001, TC-FUNC-008, TC-INT-005 | +| SI-2 | Discovery must not create, load, mutate, or stop ACP sessions. | TC-FUNC-009, TC-INT-006 | +| SI-3 | Live discovery uses provider effective auth/home/env policy and explicit timeouts. | TC-FUNC-009, TC-FUNC-014 | +| SI-4 | Source refresh failure records source status and preserves prior stale rows. | TC-FUNC-006, TC-FUNC-013, TC-INT-002 | +| SI-5 | `models.dev` rows never prove account-level availability. | TC-FUNC-005, TC-INT-002 | +| SI-6 | `models.curated` is never an allowlist; manual model IDs remain valid. | TC-FUNC-002, TC-SCEN-002, TC-UI-002 | +| SI-7 | Active ACP `configOptions` override catalog metadata for that session only. | TC-FUNC-010, TC-INT-006, TC-UI-003 | +| SI-8 | Global catalog rows are only written through `internal/modelcatalog.Store`. | TC-FUNC-004, TC-INT-001 | +| SI-9 | No raw secrets, API keys, OAuth data, or credential material in source errors / logs / status / SSE / web / Host API. | TC-SEC-001, TC-FUNC-013, TC-INT-002 | +| SI-10 | SQLite schema changes append a new migration at the registry tail and pass fresh DB plus reopen-after-restart tests. | TC-INT-001 | +| SI-11 | HTTP/UDS request lifetime does not own background refresh; refresh uses `context.WithoutCancel(ctx)` + explicit deadline. | TC-FUNC-014, TC-PERF-002 | +| SI-12 | Live refresh work is serialized/coalesced per `provider_id` before touching `HOME`, native CLI auth state, cache files, or SQLite. | TC-PERF-001, TC-PERF-002 | +| SI-13 | Partial-source success is success; list fails only when every usable source fails and no stale cache exists. | TC-FUNC-007, TC-INT-002 | + +## ADR Decisions + +| ADR | Decision | Test Cases | +|-----|----------|------------| +| ADR-001 | Daemon-owned catalog with HTTP/UDS/CLI/Host API/web parity. | TC-INT-002, TC-INT-003, TC-INT-004, TC-SCEN-002 | +| ADR-002 | Hard cut of `default_model`/`supported_models`/`supports_reasoning_effort`. | TC-FUNC-001, TC-REG-001 | +| ADR-003 | Extension `model.source` capability + Host API `models/list|refresh|status`. | TC-FUNC-011, TC-FUNC-012, TC-INT-005 | + +## Task Coverage + +| Task | Title | Test Cases | +|------|-------|------------| +| 01 | Provider Config and Builtin Model Hard Cut | TC-FUNC-001, TC-FUNC-002, TC-REG-001 | +| 02 | Model Catalog Persistence | TC-INT-001 | +| 03 | Catalog Service and Catalog Sources | TC-FUNC-003, TC-FUNC-004, TC-FUNC-005, TC-FUNC-006, TC-FUNC-007 | +| 04 | Live Provider Discovery Sources | TC-FUNC-008, TC-FUNC-009 | +| 05 | Daemon Catalog Wiring | TC-INT-002, TC-PERF-001, TC-PERF-002 | +| 06 | ACP SDK Upgrade and Config Options | TC-FUNC-010, TC-INT-006, TC-UI-003 | +| 07 | HTTP, UDS, CLI, OpenAI Model Projection | TC-INT-002, TC-INT-003, TC-INT-004, TC-SEC-002 | +| 08 | Extension Model Source Contract | TC-FUNC-011, TC-FUNC-012, TC-INT-005 | +| 09 | Web Model Catalog Experience | TC-UI-001, TC-UI-002, TC-UI-003, TC-SCEN-001, TC-SCEN-002 | +| 10 | Generated Contracts and Runtime Docs | TC-FUNC-015, TC-REG-002 | +| 11 | Cross-Surface Regression Hardening | TC-FUNC-013, TC-FUNC-014, TC-PERF-001, TC-PERF-002 | + +## Public Surface Coverage + +| Surface | Endpoints / Commands | Test Cases | +|---------|----------------------|------------| +| HTTP native catalog | `GET /api/providers/models`, `GET /api/providers/{provider_id}/models`, `POST /api/providers/models/refresh`, `POST /api/providers/{provider_id}/models/refresh`, `GET /api/providers/models/status`, `GET /api/providers/{provider_id}/models/status` | TC-INT-002, TC-INT-003 | +| HTTP-only OpenAI projection | `GET /api/openai/v1/models`, `GET /api/openai/v1/models?provider_id=` | TC-INT-004, TC-SEC-002 | +| UDS native catalog | Same path family registered on UDS group, **never** the OpenAI projection. | TC-INT-002, TC-INT-003, TC-INT-004 | +| CLI | `agh provider models list [provider]`, `agh provider models refresh [provider]`, `agh provider models status [provider]`, with `--source`, `--refresh`, `--include-stale`, `-o json`. | TC-INT-002, TC-INT-003, TC-SCEN-002 | +| Extension Host API | `models/list`, `models/refresh`, `models/status` | TC-FUNC-011, TC-FUNC-012, TC-INT-005 | +| AGH -> extension | `models/list` request shape, capability gate. | TC-FUNC-011, TC-FUNC-012 | +| Web (Settings > Providers) | `web/src/routes/_app/settings/providers.tsx`, source status cards, refresh button, curated/default editor. | TC-UI-001, TC-UI-002 | +| Web (Session create dialog) | `web/src/systems/session/components/session-create-dialog.tsx`, model picker pulled from catalog, manual entry fallback. | TC-UI-003, TC-SCEN-001 | +| Web TanStack adapter | `web/src/systems/model-catalog/` query keys, hooks, adapter, `deriveActiveSessionOptions`. | TC-UI-003 | +| Generated contracts | `openapi/agh.json`, `web/src/generated/agh-openapi.d.ts`, extension TS types. | TC-FUNC-015, TC-REG-002 | +| Docs | `packages/site/content/runtime/core/agents/model-catalog.mdx`, `providers.mdx`, `config-toml.mdx`, `cli/provider/models/*.mdx`, extension authoring docs. | TC-FUNC-015 | +| `config.toml` | `[providers..models]` (default, curated, discovery), `[model_catalog.sources.models_dev]`. | TC-FUNC-001, TC-FUNC-002, TC-INT-001 | +| Observability | Structured logs/events with `refresh_request_id`, `provider_id`, `source_id`, `source_kind`, `model_id`, `extension_name`. | TC-FUNC-013, TC-INT-005, TC-PERF-002 | +| Persistence | `model_catalog_sources`, `model_catalog_rows`, `model_catalog_reasoning_efforts` tables (global migration v23). | TC-INT-001 | + +## Failure-Mode Coverage + +| Failure / Edge Case | Cases | +|---------------------|-------| +| Old TOML keys present (`default_model`, `supported_models`, `supports_reasoning_effort`). | TC-FUNC-001, TC-REG-001 | +| Curated default not in curated list. | TC-FUNC-002 | +| Curated duplicate IDs / blank reasoning efforts / `default_reasoning_effort` not in list. | TC-FUNC-002 | +| `models.dev` HTTP 5xx, network timeout, JSON malformed, legacy field aliases. | TC-FUNC-005, TC-FUNC-006, TC-FUNC-013 | +| `models.dev` disabled via config. | TC-FUNC-005 | +| Live provider source timeout, subprocess failure, missing auth. | TC-FUNC-008, TC-FUNC-009 | +| Live provider source attempts ACP `session/new`/`set_*`. | TC-FUNC-009 | +| Stale source rows preserved across daemon restart. | TC-FUNC-006, TC-INT-001 | +| All sources fail, no stale cache exists. | TC-FUNC-007 | +| Source error contains API key / OAuth token / env secret. | TC-SEC-001, TC-FUNC-013 | +| Source error shape leaks beyond redaction at HTTP/UDS/Web/Host API. | TC-SEC-001, TC-INT-002 | +| Concurrent same-provider refresh. | TC-PERF-001 | +| Concurrent cross-provider refresh storm. | TC-PERF-001 | +| Repeated coalesced refresh returns same status batch. | TC-PERF-001 | +| Request cancellation during refresh detaches refresh lifetime. | TC-PERF-002, TC-FUNC-014 | +| SQLite `BUSY` write contention. | TC-PERF-001 | +| Extension capability missing or revoked. | TC-FUNC-012 | +| Extension manifest declares non-normalizable `model.source` slug. | TC-FUNC-011 | +| Extension `models/list` returns invalid rows. | TC-FUNC-011 | +| `/api/openai/v1/models` registered on UDS by mistake. | TC-INT-004 | +| `/api/openai/v1/models` unauthenticated request. | TC-SEC-002 | +| `/api/openai/v1/models?provider_id=unknown`. | TC-INT-004 | +| ACP `session/set_config_option` succeeds; `session/set_model` fallback only when config option absent. | TC-FUNC-010, TC-INT-006 | +| ACP session exposes no model option; reasoning never sent. | TC-FUNC-010 | +| Web: Settings > Providers refresh button surfaces stale state and last error. | TC-UI-001 | +| Web: New session dialog uses ACP `configOptions` after creation. | TC-UI-003 | +| Web: Manual model entry remains valid when curated empty. | TC-UI-002, TC-SCEN-001 | +| Generated docs / OpenAPI / TS types drift. | TC-FUNC-015, TC-REG-002 | + +## Real-Scenario Mapping (TC-SCEN) + +| TC-SCEN | Operator Journey | Surfaces | TechSpec Anchors | +|---------|-------------------|----------|-------------------| +| TC-SCEN-001 | Operator opens Settings > Providers, edits curated metadata, refreshes models, then creates a session and selects a model. | Web + HTTP + SQLite + ACP | SI-1, SI-6, SI-7 | +| TC-SCEN-002 | Agent driving CLI/HTTP/UDS lists, refreshes, and inspects model status without using the web UI. | CLI + HTTP + UDS + SQLite | SI-4, SI-12, SI-13 | + +## Auditor Coverage + +The TC-SCEN cases must satisfy: + +- C4 actor/role coverage: operator + agent both exercise catalog surfaces. +- C5 channels: HTTP, UDS, CLI, web, Host API. +- C6 task tree: TC-SCEN cases reference Tasks 01-11. +- C8 cross-surface truth: TC-INT-003 / TC-SCEN-002 compare CLI/HTTP/UDS/Host API/web payloads. +- C9 live provider: TC-FUNC-008 documents the live discovery boundary; real-provider runs are opt-in. +- C10 artifact reuse: catalog rows produced in TC-SCEN-001 are reused by TC-SCEN-002. +- C11 disruption probes: stale, timeout, redaction, denial, and SQLite contention. +- C14 final verification: TC-SCEN-001 and TC-SCEN-002 require `make verify` evidence in `qa/verification-report.md`. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-plans/provider-model-catalog-regression.md b/.compozy/tasks/provider-model-catalog/qa/test-plans/provider-model-catalog-regression.md new file mode 100644 index 000000000..4b8110844 --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-plans/provider-model-catalog-regression.md @@ -0,0 +1,79 @@ +# Provider Model Catalog - Regression Suite + +This suite drives the existing test cases in `qa/test-cases/` through tiered execution that Task 13 follows. + +## Tiered Execution + +| Suite | Duration | Frequency | Cases | +|-------|----------|-----------|-------| +| Smoke | ≤15 min | Per change | SMOKE-001 (daemon start, focused gates compile, web build, docs vitest, codegen-check) | +| Targeted | 30-60 min | Per task PR | All TC-FUNC-* + relevant TC-INT-* for changed surfaces | +| Full Release | 2-3 h | Release / Task 13 | All TC-FUNC, TC-INT, TC-PERF, TC-SEC, TC-UI, TC-REG, TC-SCEN | +| Sanity | 10-15 min | After hotfix | TC-FUNC-001, TC-INT-002, TC-SCEN-001 happy path only | + +## P0 Cases (must always pass) + +- TC-FUNC-001: Old TOML keys rejected. +- TC-FUNC-004: Catalog merge + tie-break determinism. +- TC-FUNC-007: Partial-source success / all-source failure. +- TC-FUNC-010: ACP `session/set_config_option` precedence over `session/set_model`. +- TC-FUNC-013: Source error redaction at projection boundary. +- TC-INT-001: Global migration v23 fresh DB + reopen-after-restart. +- TC-INT-002: HTTP/UDS native catalog handlers serve daemon-owned projection. +- TC-INT-003: HTTP/UDS canonical JSON byte equality + CLI parity. +- TC-INT-004: `/api/openai/v1/models` HTTP-only registration + auth + provider filter. +- TC-INT-005: Extension `model.source` + Host API `models/list|refresh|status`. +- TC-INT-006: ACP fixtures from upgraded SDK keep create/load/resume covered. +- TC-PERF-001: Per-provider refresh serialization + coalescing under concurrency. +- TC-PERF-002: Detached refresh lifetime survives request cancellation. +- TC-SEC-001: No raw secrets across logs / status / API / SSE / web / Host API. +- TC-SEC-002: `/api/openai/v1/models` rejects unauthenticated calls with OpenAI-shaped error. +- TC-SCEN-001: Operator real journey through web → HTTP → SQLite → ACP. +- TC-SCEN-002: Agent real journey through CLI → HTTP/UDS → Host API. + +## P1 Cases (≥90% pass required) + +- TC-FUNC-002: Curated config validation rules. +- TC-FUNC-003: Builtin source converts defaults to priority-10 rows. +- TC-FUNC-005: `models.dev` source TTL, disable, legacy alias parsing. +- TC-FUNC-006: Stale fallback when refresh fails after prior success. +- TC-FUNC-008: Live provider source timeout + per-provider env/home policy. +- TC-FUNC-009: Live discovery never calls ACP `session/*` mutators. +- TC-FUNC-011: Extension manifest validation + invalid row rejection. +- TC-FUNC-012: Extension capability missing/revoked is treated as denial. +- TC-FUNC-014: Refresh deadline detached from request context. +- TC-FUNC-015: Codegen drift gate for OpenAPI / TS contracts / docs. +- TC-REG-001: Hard-cut residue scan. +- TC-REG-002: Generated docs and CLI reference stay in sync. +- TC-UI-001: Settings > Providers source status + refresh state. +- TC-UI-002: Settings > Providers manual entry + curated edit. +- TC-UI-003: New session dialog uses ACP `configOptions` post-creation. + +## P2 / Exploratory + +- Manual exploratory probes documented in `qa/verification-report.md`: + - Toggle `[model_catalog.sources.models_dev].enabled = false` and verify status reflects disabled state without outbound HTTP. + - Disable extension grant mid-run and observe CLI/Host API states. + - Force `models.dev` 5xx and observe stale rows persisted. + +## Pass/Fail Criteria + +- **PASS**: All P0 cases pass; ≥90% P1 pass; remaining P1 failures have BUGs filed with root cause + fix; `make verify` clean. +- **FAIL**: Any P0 fails; secret material leaks anywhere; cross-surface parity diverges; SQLite contention causes BUSY errors that escape coalescing; ACP regression fallback path executes when config option exists. +- **CONDITIONAL**: P1 failure only with documented workaround AND scheduled fix in `qa/verification-report.md`. + +## Execution Order + +1. Smoke (SMOKE-001) — block on failure. +2. P0 unit + integration cases (TC-FUNC + TC-INT). +3. P0 perf + security cases (TC-PERF, TC-SEC). +4. P1 cases. +5. UI cases (TC-UI) under Playwright. +6. Real-scenario cases (TC-SCEN). +7. Final `make verify`. + +## Reporting + +- Update `qa/verification-report.md` after each case batch. +- File `qa/issues/BUG-NNN.md` for every reproduced defect with TC-ID linkage. +- Update `qa/test-cases/.md` execution history table. diff --git a/.compozy/tasks/provider-model-catalog/qa/test-plans/provider-model-catalog-test-plan.md b/.compozy/tasks/provider-model-catalog/qa/test-plans/provider-model-catalog-test-plan.md new file mode 100644 index 000000000..4aaee01aa --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/test-plans/provider-model-catalog-test-plan.md @@ -0,0 +1,190 @@ +# Provider Model Catalog - Master QA Plan + +## Executive Summary + +The provider model catalog program (Tasks 01-11) replaces the flat provider model fields with a daemon-owned, persisted, refreshable, agent-manageable catalog. It hard-cuts `default_model`, `supported_models`, and `supports_reasoning_effort`; introduces nested `[providers..models]`, `[model_catalog.sources.models_dev]`, and discovery config; persists rows in three new SQLite tables; exposes HTTP/UDS/CLI/Host API/web/`/api/openai/v1/models` projections; and upgrades ACP to `coder/acp-go-sdk@v0.12.2` with `session/set_config_option` semantics. + +This plan defines the QA contract that Task 13 must execute. Every TechSpec safety invariant (SI-1..SI-13), every ADR decision, every public surface, every failure mode, and every cross-surface parity boundary has a concrete test case in `qa/test-cases/`. + +### Objectives + +- Prove the hard cut is complete: no production code reads `default_model`, `supported_models`, or `supports_reasoning_effort`; old TOML keys fail with deterministic errors. +- Prove the catalog merge policy is deterministic: priority ordering, freshness tie-break, source-id tie-break, lower-priority enrichment, merged availability states, partial success. +- Prove HTTP, UDS, CLI, Host API, web, and the OpenAI projection serve the same persisted catalog state. +- Prove refresh stays correct under concurrency, request cancellation, SQLite write contention, and source failure. +- Prove redaction is enforced at persistence, projection, and log boundaries. +- Prove ACP sessions respect `configOptions` and only fall back to `session/set_model` when config options are absent. +- Prove operator and agent can manage the catalog without web UI through CLI/HTTP/UDS/Host API. + +### Out of Scope + +- Droid discovery. +- Fake ACP sessions for discovery. +- `models.dev` as account-level availability proof. +- `models.curated` as an allowlist. +- Real-provider `models.dev`, OpenAI, Anthropic, Gemini, OpenRouter, Vercel, Ollama, OpenCode HTTP calls — opt-in only via env tags, not gated by `make verify`. + +## Scope + +### In-Scope Surfaces + +- Go runtime: `internal/config`, `internal/store/globaldb`, `internal/modelcatalog`, `internal/acp`, `internal/api/core`, `internal/api/httpapi`, `internal/api/udsapi`, `internal/cli`, `internal/extension`, `internal/daemon`. +- Generated contracts: `openapi/agh.json`, `web/src/generated/agh-openapi.d.ts`, extension TS types. +- Web app: `web/src/systems/model-catalog/`, `web/src/systems/session/`, `web/src/systems/settings/`, `web/src/routes/_app/settings/providers.tsx`, web E2E fixtures. +- Docs: `packages/site/content/runtime/core/agents/{providers.mdx,model-catalog.mdx,extensions/*.mdx}`, `packages/site/content/runtime/core/configuration/config-toml.mdx`, generated CLI docs under `packages/site/content/runtime/cli/provider/models/*.mdx`. + +### Out-of-Scope + +- Real-provider live discovery validation (covered as opt-in scenario with explicit boundaries). +- Pricing/cost rendering changes outside the catalog payload. +- AGH Network protocol changes. + +## Behavioral Scenario Charter + +- **Startup situation**: Greenfield AGH alpha (no production users). Operator runs daemon locally with isolated `AGH_HOME`, custom ports, and a tmux-bridge socket. Provider env may include real or stubbed credentials per scenario. +- **Operator intent**: Add or refine a provider, see which models AGH knows about, refresh catalog state, and start a session against a chosen model with optional reasoning effort. +- **Expected business outcome**: The operator sees a coherent, deterministic, source-attributed catalog; manual model entry remains valid; sessions start without depending on network discovery; agent and operator perceive the same catalog state across surfaces. +- **AGH surfaces used**: HTTP (`/api/providers/...`, `/api/openai/v1/models`), UDS, CLI (`agh provider models {list|refresh|status}`), web Settings > Providers, web new-session dialog, extension Host API, ACP `session/set_config_option`. +- **Real provider/LLM expectation**: The daemon must function with stubbed live discovery (default in `make verify`); opt-in real-provider runs (`MODELCATALOG_LIVE=1`) document a single end-to-end refresh against `models.dev` and one configured ACP provider. +- **Blocked live-provider boundary**: `make verify` and CI runs use stub HTTP servers and fake subprocesses. Real-provider runs are opt-in; missing credentials are reported as source status, not failures. +- **Scenario contract minimums covered**: TC-SCEN-001 + TC-SCEN-002 collectively satisfy operator and agent journeys, cross-surface parity, manual entry, refresh under stress, and stale-state observation. + +## Test Strategy + +1. **Smoke readiness (entry criteria only)**: SMOKE-001 verifies daemon starts, web build succeeds, codegen is clean, focused Go gates compile. Smoke is not release-grade evidence. +2. **Unit tests** cover pure logic per package: config validation, schema migrations, catalog merge, redaction, source parsing, conversion helpers, ACP config option capture/apply. +3. **Integration tests** cover daemon-served HTTP/UDS handlers, CLI client, Host API capability gating, deterministic JSON byte parity, and migration boot reconciliation against a real `globaldb` instance. +4. **E2E (runtime + browser)** cover operator journeys end-to-end through `make test-e2e-runtime` and `make test-e2e-web` with fresh QA labs created via `agh-qa-bootstrap`. +5. **Failure / chaos** cover stale fallback, all-source failure, SQLite contention, request cancellation, concurrent refresh coalescing, and credential redaction. +6. **Codegen and docs** are gated through `make codegen-check`, `make bun-typecheck`, and the `provider-model-catalog-docs` vitest suite. + +Each test case in `qa/test-cases/` declares Audit Coverage IDs that map back to `qa/test-plans/00-coverage-matrix.md`. + +## Environment Requirements + +- Go 1.23.x with `CGO_ENABLED=1` (`-race` parity). +- Bun and Node toolchain compatible with the repo `.tool-versions` / `.nvmrc`. +- macOS 15+ or Linux x86_64; SQLite 3.45+. +- `coder/acp-go-sdk@v0.12.2` available through `go mod`. +- Isolated lab via `agh-qa-bootstrap`: unique `AGH_HOME`, daemon ports, `AGH_WEB_API_PROXY_TARGET`, tmux-bridge socket. +- Browser: Chromium under Playwright; `browser-use:browser` primary, `agent-browser` fallback. +- Provider env: synthetic credentials by default; opt-in real credentials only under `MODELCATALOG_LIVE=1`. + +## Entry Criteria + +- `git status` clean for production code under test (only QA artifacts may be uncommitted). +- `make verify` passed at the previous commit. +- `agh-qa-bootstrap` produced a fresh `bootstrap-manifest.json` for the run. +- Unique `AGH_HOME`, ports, and `tmux-bridge` socket allocated per worktree. +- Bootstrap manifest exports `AGH_WEB_API_PROXY_TARGET` for any web QA. + +## Exit Criteria + +- All P0 cases pass. +- ≥90% of P1 cases pass; remaining failures have `qa/issues/BUG-NNN.md` with root-cause + fix. +- Cross-surface parity test (TC-INT-003) shows byte-equal canonical JSON between native HTTP and UDS, and structurally equivalent CLI / Host API rows. +- Redaction tests (TC-SEC-001, TC-FUNC-013) show no API key, OAuth token, or env-shaped secret in any logged or projected payload. +- `make verify` passes after any QA-driven fixes. +- `qa/verification-report.md` records bootstrap manifest path, lab root, runtime home, base URL, commands, results, bug links, and residual risk. + +## Risk Assessment + +| Risk | Probability | Impact | Mitigation | +|------|-------------|--------|------------| +| Hard-cut residue silently rehydrates old fields. | Medium | High | TC-FUNC-001 + TC-REG-001 + repository scan. | +| Refresh under concurrency corrupts SQLite rows or status. | Medium | Critical | TC-PERF-001 + per-provider serialization assertions. | +| Refresh request cancellation cancels detached work. | Medium | High | TC-FUNC-014 + TC-PERF-002 + `context.WithoutCancel` assertions. | +| Source error leaks credentials into logs/UI/Host API. | Low | Critical | TC-SEC-001 + redaction at persistence and projection. | +| Generated contracts drift from runtime payload. | Medium | High | TC-FUNC-015, `make codegen-check`. | +| ACP `session/set_config_option` regresses to legacy `set_model`. | Low | High | TC-FUNC-010 + TC-INT-006 fixtures from upgraded SDK. | +| `/api/openai/v1/models` accidentally registered on UDS. | Low | High | TC-INT-004 explicit registration check. | +| `models.dev` becomes account availability proof under UI label drift. | Low | Medium | TC-FUNC-005 + TC-UI-001 stale label assertions. | +| Browser E2E flake on slow runners. | Medium | Medium | Use Playwright retries, deterministic seed via `web/e2e/fixtures/runtime-seed.ts`. | + +## Timeline and Deliverables + +- Day 1: Bootstrap fresh lab, run focused gates, replay TC-FUNC and TC-INT cases. +- Day 2: TC-PERF, TC-SEC, TC-UI cases; file BUGs as discovered. +- Day 3: TC-SCEN cases, fix loops with regression tests, finalize `verification-report.md`, commit. + +Deliverables are listed in Task 12 / Task 13 specs and in `qa/verification-report.md`. + +## Scenario Contract + +The following minimums must collectively be satisfied by the P0/P1 real-scenario cases (`TC-SCEN-001`, `TC-SCEN-002`): + +- Agents: operator (human) + remote agent (CLI/HTTP/Host API consumer). +- Roles: catalog editor, catalog reader, session creator, extension model source provider. +- Channels: HTTP, UDS, CLI, web, Host API, generated docs, generated TS types. +- Task tree: every public surface that Tasks 07-09 touched. +- Provider-backed sessions: at least one ACP-backed session uses `session/set_config_option` semantics (mock SDK fixture acceptable when real provider is blocked). +- Cross-surface objects: catalog row, source status, refresh request id, model availability state, source error. +- Artifacts used later: catalog row written via Settings > Providers (TC-SCEN-001) is read by CLI in TC-SCEN-002. +- Disruption probes: stale fallback, refresh coalescing, redaction, extension denial, request cancellation. +- Required surfaces: HTTP, UDS, CLI, web, Host API, OpenAI projection. + +## Auditor Mapping + +- C4 actor/role coverage → TC-SCEN-001 (operator) + TC-SCEN-002 (agent). +- C5 channels → TC-INT-002, TC-INT-003, TC-INT-004, TC-INT-005, TC-UI-001..003. +- C6 task tree → TC-FUNC + TC-INT cover Tasks 01-11. +- C8 cross-surface truth → TC-INT-003. +- C9 live provider → TC-FUNC-008 (stub) + opt-in `MODELCATALOG_LIVE=1` annex. +- C10 artifact reuse → TC-SCEN-001 → TC-SCEN-002 catalog row hand-off. +- C11 disruption probes → TC-PERF-001, TC-PERF-002, TC-FUNC-013, TC-FUNC-014. +- C14 final verification → `qa/verification-report.md` records `make verify` output. + +## Verification Commands (Required) + +Task 13 must run all of the following from a clean isolated lab. Substitute paths with the bootstrap manifest output where applicable. + +```bash +# 1. Activate isolated lab +.agents/skills/agh-qa-bootstrap/scripts/bootstrap.sh \ + --scenario provider-model-catalog \ + --output .compozy/tasks/provider-model-catalog/qa/lab +export AGH_HOME=$(jq -r '.runtime_home' .compozy/tasks/provider-model-catalog/qa/lab/bootstrap-manifest.json) +export AGH_WEB_API_PROXY_TARGET=$(jq -r '.web_api_proxy_target' .compozy/tasks/provider-model-catalog/qa/lab/bootstrap-manifest.json) + +# 2. Codegen + docs gates +make codegen +make codegen-check +cd packages/site && bun run test -- provider-model-catalog-docs && cd - + +# 3. Focused Go gates +go test -race ./internal/config ./internal/store/globaldb ./internal/modelcatalog/... \ + ./internal/acp ./internal/api/... ./internal/cli ./internal/extension/... + +# 4. Bun gates +make bun-typecheck +make bun-test +make web-build + +# 5. E2E lanes +make test-e2e-runtime +make test-e2e-web + +# 6. Optional live-provider annex (opt-in) +MODELCATALOG_LIVE=1 go test -tags=live ./internal/modelcatalog/... -run TestLive + +# 7. Repo-wide gate +make verify +``` + +`make verify` is the final blocking gate. It must run last and pass with zero warnings. + +## Bug Report Template + +Every reproduced defect must use `assets/issue-template.md` (see `qa/issues/BUG-NNN-template.md`). Each bug records reproduction, root cause, fix, verification, and links the failing TC-ID. + +## Verification Report Template + +Task 13 closes the run by writing `qa/verification-report.md` (template at `qa/verification-report-template.md`) with: + +- Bootstrap manifest path. +- Lab root, runtime home, base URL, ports, tmux socket. +- Commands executed (verbatim) with results and durations. +- Test case index with pass/fail/blocked status. +- Bug links and root-cause summaries. +- Residual risk + recommended follow-up. +- Final `make verify` evidence. diff --git a/.compozy/tasks/provider-model-catalog/qa/verification-report-template.md b/.compozy/tasks/provider-model-catalog/qa/verification-report-template.md new file mode 100644 index 000000000..1e2b5599b --- /dev/null +++ b/.compozy/tasks/provider-model-catalog/qa/verification-report-template.md @@ -0,0 +1,110 @@ +# Provider Model Catalog - Verification Report Template + +> Task 13 must rename this file to `verification-report.md` and fill every section before reporting completion. + +## Run Metadata + +- **Date:** YYYY-MM-DD +- **Operator:** +- **Branch:** +- **Commit:** +- **Bootstrap manifest path:** `.compozy/tasks/provider-model-catalog/qa/lab/bootstrap-manifest.json` +- **Lab root:** +- **Runtime home (`AGH_HOME`):** +- **Daemon ports:** , , +- **`AGH_WEB_API_PROXY_TARGET`:** +- **tmux-bridge socket:** + +## Smoke Readiness + +| Step | Command | Result | Notes | +|------|---------|--------|-------| +| 1 | `make build` | | | +| 2 | `make codegen-check` | | | +| 3 | `make bun-typecheck && make bun-test` | | | +| 4 | Focused Go gates | | | +| 5 | `agh daemon start --foreground` | | | + +## Test Case Results + +| TC | Title | Priority | Result | Notes / BUG-IDs | +|----|-------|----------|--------|-----------------| +| TC-FUNC-001 | Provider config hard cut | P0 | | | +| TC-FUNC-002 | Curated validation rules | P1 | | | +| TC-FUNC-003 | Builtin source priority 10 | P1 | | | +| TC-FUNC-004 | Merge determinism | P0 | | | +| TC-FUNC-005 | `models.dev` TTL/disable/aliases | P1 | | | +| TC-FUNC-006 | Stale fallback | P1 | | | +| TC-FUNC-007 | Partial vs all-source failure | P0 | | | +| TC-FUNC-008 | Live provider timeout | P1 | | | +| TC-FUNC-009 | No ACP session calls from discovery | P1 | | | +| TC-FUNC-010 | ACP `set_config_option` precedence | P0 | | | +| TC-FUNC-011 | Extension manifest validation | P1 | | | +| TC-FUNC-012 | Extension capability denial | P1 | | | +| TC-FUNC-013 | Source error redaction | P0 | | | +| TC-FUNC-014 | Detached refresh deadline | P1 | | | +| TC-FUNC-015 | Codegen + docs drift | P1 | | | +| TC-INT-001 | Migration v23 fresh + reopen | P0 | | | +| TC-INT-002 | HTTP/UDS handler payloads | P0 | | | +| TC-INT-003 | Canonical JSON parity | P0 | | | +| TC-INT-004 | OpenAI projection HTTP-only | P0 | | | +| TC-INT-005 | Extension success/denial | P0 | | | +| TC-INT-006 | ACP SDK upgrade flows | P0 | | | +| TC-PERF-001 | Refresh concurrency + coalesce | P0 | | | +| TC-PERF-002 | Detached refresh + shutdown join | P0 | | | +| TC-SEC-001 | Secret redaction across surfaces | P0 | | | +| TC-SEC-002 | OpenAI auth + envelope | P0 | | | +| TC-UI-001 | Settings source status + refresh | P1 | | | +| TC-UI-002 | Manual entry + curated edit | P1 | | | +| TC-UI-003 | New session ACP override | P1 | | | +| TC-REG-001 | Hard-cut residue scan | P1 | | | +| TC-REG-002 | Generated docs + CLI sync | P1 | | | +| TC-SCEN-001 | Operator real journey | P0 | | | +| TC-SCEN-002 | Agent real journey | P0 | | | + +## Verification Commands Executed + +For each command record verbatim invocation, exit code, and duration. Attach full logs under `qa/lab/logs/`. + +```bash +# Example +make codegen-check # exit 0, 12s +make bun-test # exit 0, 1m24s +go test -race ./internal/modelcatalog/... # exit 0, 2m11s +make test-e2e-runtime # exit 0, 4m02s +make test-e2e-web # exit 0, 6m18s +make verify # exit 0, 14m37s +``` + +## Filed Bugs + +| BUG | Severity | TC | Status | Fix Commit | +|-----|----------|----|--------|------------| +| | | | | | + +## Live-Provider Annex (Optional) + +If `MODELCATALOG_LIVE=1` was set, document the real-provider boundary here: + +- Provider exercised: +- Credential source: +- Endpoints hit: `models.dev/api.json` (real), `` (real) +- Result: + +If not run, state explicitly: "Live-provider annex not executed; default run uses stub HTTP servers and fake subprocesses." + +## Residual Risk + +- + +## Final Verification + +- `make verify` exit code: <0 / non-zero> +- Duration: +- Log path: `qa/lab/logs/make-verify.log` + +## Sign-Off + +- Reporter: +- Date: +- Decision: PASS | FAIL | CONDITIONAL (with documented workaround) From fbafcb5d13e0bd1a161c0777212fe11b401a1b4d Mon Sep 17 00:00:00 2001 From: Pedro Nauck Date: Thu, 7 May 2026 12:31:35 -0300 Subject: [PATCH 13/13] fix: review rounds --- internal/cli/config.go | 3 +- internal/daemon/daemon_integration_test.go | 28 ++++++++++++ ...aemon_nightly_combined_integration_test.go | 28 ++++++++++++ .../daemon/daemon_sandbox_integration_test.go | 28 ++++++++++++ .../e2e/runtime_harness_integration_test.go | 28 ++++++++++++ packages/ui/src/components/metric.tsx | 10 ++--- .../session-provider-override.spec.ts | 44 +++++++++++++------ .../agent/components/agent-sessions-list.tsx | 32 ++++++++------ .../components/automation-run-history.tsx | 4 +- .../components/knowledge-delete-dialog.tsx | 2 +- .../components/knowledge-edit-dialog.tsx | 2 +- .../shell/inspector-activity-feed.tsx | 2 +- .../shell/inspector-members-list.tsx | 2 +- .../components/work/work-inspector.tsx | 2 +- .../components/settings-page-shell.tsx | 2 +- .../skill/components/skill-detail-panel.tsx | 4 +- 16 files changed, 177 insertions(+), 44 deletions(-) diff --git a/internal/cli/config.go b/internal/cli/config.go index baec56168..c987f85c6 100644 --- a/internal/cli/config.go +++ b/internal/cli/config.go @@ -27,6 +27,7 @@ const ( configProvidersKey = "providers" configModelsKey = "models" configDiscoveryKey = "discovery" + configDefaultKey = "default" configSessionMCPKey = "session_mcp" ) @@ -1465,7 +1466,7 @@ func isProviderMutationPath(path []string) bool { } } if len(path) == 4 && path[0] == configProvidersKey && path[2] == configModelsKey { - if path[3] == "default" { + if path[3] == configDefaultKey { return true } } diff --git a/internal/daemon/daemon_integration_test.go b/internal/daemon/daemon_integration_test.go index 873f05c5c..bd4bd3434 100644 --- a/internal/daemon/daemon_integration_test.go +++ b/internal/daemon/daemon_integration_test.go @@ -3975,10 +3975,38 @@ func (daemonSessionStopACPAgent) Cancel(context.Context, acpsdk.CancelNotificati return nil } +func (daemonSessionStopACPAgent) CloseSession( + context.Context, + acpsdk.CloseSessionRequest, +) (acpsdk.CloseSessionResponse, error) { + return acpsdk.CloseSessionResponse{}, nil +} + +func (daemonSessionStopACPAgent) ListSessions( + context.Context, + acpsdk.ListSessionsRequest, +) (acpsdk.ListSessionsResponse, error) { + return acpsdk.ListSessionsResponse{Sessions: []acpsdk.SessionInfo{}}, nil +} + func (daemonSessionStopACPAgent) NewSession(context.Context, acpsdk.NewSessionRequest) (acpsdk.NewSessionResponse, error) { return acpsdk.NewSessionResponse{SessionId: "daemon-stop-helper"}, nil } +func (daemonSessionStopACPAgent) ResumeSession( + context.Context, + acpsdk.ResumeSessionRequest, +) (acpsdk.ResumeSessionResponse, error) { + return acpsdk.ResumeSessionResponse{}, nil +} + +func (daemonSessionStopACPAgent) SetSessionConfigOption( + context.Context, + acpsdk.SetSessionConfigOptionRequest, +) (acpsdk.SetSessionConfigOptionResponse, error) { + return acpsdk.SetSessionConfigOptionResponse{ConfigOptions: []acpsdk.SessionConfigOption{}}, nil +} + func (daemonSessionStopACPAgent) LoadSession(context.Context, acpsdk.LoadSessionRequest) (acpsdk.LoadSessionResponse, error) { return acpsdk.LoadSessionResponse{}, nil } diff --git a/internal/daemon/daemon_nightly_combined_integration_test.go b/internal/daemon/daemon_nightly_combined_integration_test.go index 64549c7fc..964ef85bb 100644 --- a/internal/daemon/daemon_nightly_combined_integration_test.go +++ b/internal/daemon/daemon_nightly_combined_integration_test.go @@ -490,6 +490,20 @@ func (a *daemonNightlyCombinedACPAgent) Cancel(context.Context, acpsdk.CancelNot return nil } +func (a *daemonNightlyCombinedACPAgent) CloseSession( + context.Context, + acpsdk.CloseSessionRequest, +) (acpsdk.CloseSessionResponse, error) { + return acpsdk.CloseSessionResponse{}, nil +} + +func (a *daemonNightlyCombinedACPAgent) ListSessions( + context.Context, + acpsdk.ListSessionsRequest, +) (acpsdk.ListSessionsResponse, error) { + return acpsdk.ListSessionsResponse{Sessions: []acpsdk.SessionInfo{}}, nil +} + func (a *daemonNightlyCombinedACPAgent) NewSession( context.Context, acpsdk.NewSessionRequest, @@ -497,6 +511,20 @@ func (a *daemonNightlyCombinedACPAgent) NewSession( return acpsdk.NewSessionResponse{SessionId: "daemon-nightly-combined-helper"}, nil } +func (a *daemonNightlyCombinedACPAgent) ResumeSession( + context.Context, + acpsdk.ResumeSessionRequest, +) (acpsdk.ResumeSessionResponse, error) { + return acpsdk.ResumeSessionResponse{}, nil +} + +func (a *daemonNightlyCombinedACPAgent) SetSessionConfigOption( + context.Context, + acpsdk.SetSessionConfigOptionRequest, +) (acpsdk.SetSessionConfigOptionResponse, error) { + return acpsdk.SetSessionConfigOptionResponse{ConfigOptions: []acpsdk.SessionConfigOption{}}, nil +} + func (a *daemonNightlyCombinedACPAgent) LoadSession( context.Context, acpsdk.LoadSessionRequest, diff --git a/internal/daemon/daemon_sandbox_integration_test.go b/internal/daemon/daemon_sandbox_integration_test.go index 41556225e..5cdb9becd 100644 --- a/internal/daemon/daemon_sandbox_integration_test.go +++ b/internal/daemon/daemon_sandbox_integration_test.go @@ -260,6 +260,20 @@ func (a *daemonSandboxACPAgent) Cancel(context.Context, acpsdk.CancelNotificatio return nil } +func (a *daemonSandboxACPAgent) CloseSession( + context.Context, + acpsdk.CloseSessionRequest, +) (acpsdk.CloseSessionResponse, error) { + return acpsdk.CloseSessionResponse{}, nil +} + +func (a *daemonSandboxACPAgent) ListSessions( + context.Context, + acpsdk.ListSessionsRequest, +) (acpsdk.ListSessionsResponse, error) { + return acpsdk.ListSessionsResponse{Sessions: []acpsdk.SessionInfo{}}, nil +} + func (a *daemonSandboxACPAgent) NewSession( context.Context, acpsdk.NewSessionRequest, @@ -267,6 +281,20 @@ func (a *daemonSandboxACPAgent) NewSession( return acpsdk.NewSessionResponse{SessionId: "daemon-sandbox-helper"}, nil } +func (a *daemonSandboxACPAgent) ResumeSession( + context.Context, + acpsdk.ResumeSessionRequest, +) (acpsdk.ResumeSessionResponse, error) { + return acpsdk.ResumeSessionResponse{}, nil +} + +func (a *daemonSandboxACPAgent) SetSessionConfigOption( + context.Context, + acpsdk.SetSessionConfigOptionRequest, +) (acpsdk.SetSessionConfigOptionResponse, error) { + return acpsdk.SetSessionConfigOptionResponse{ConfigOptions: []acpsdk.SessionConfigOption{}}, nil +} + func (a *daemonSandboxACPAgent) LoadSession( context.Context, acpsdk.LoadSessionRequest, diff --git a/internal/testutil/e2e/runtime_harness_integration_test.go b/internal/testutil/e2e/runtime_harness_integration_test.go index da26b1bd9..44629393a 100644 --- a/internal/testutil/e2e/runtime_harness_integration_test.go +++ b/internal/testutil/e2e/runtime_harness_integration_test.go @@ -311,6 +311,20 @@ func (a *e2eACPAgent) Cancel(context.Context, acpsdk.CancelNotification) error { return nil } +func (a *e2eACPAgent) CloseSession( + context.Context, + acpsdk.CloseSessionRequest, +) (acpsdk.CloseSessionResponse, error) { + return acpsdk.CloseSessionResponse{}, nil +} + +func (a *e2eACPAgent) ListSessions( + context.Context, + acpsdk.ListSessionsRequest, +) (acpsdk.ListSessionsResponse, error) { + return acpsdk.ListSessionsResponse{Sessions: []acpsdk.SessionInfo{}}, nil +} + func (a *e2eACPAgent) NewSession( context.Context, acpsdk.NewSessionRequest, @@ -318,6 +332,20 @@ func (a *e2eACPAgent) NewSession( return acpsdk.NewSessionResponse{SessionId: "e2e-helper-session"}, nil } +func (a *e2eACPAgent) ResumeSession( + context.Context, + acpsdk.ResumeSessionRequest, +) (acpsdk.ResumeSessionResponse, error) { + return acpsdk.ResumeSessionResponse{}, nil +} + +func (a *e2eACPAgent) SetSessionConfigOption( + context.Context, + acpsdk.SetSessionConfigOptionRequest, +) (acpsdk.SetSessionConfigOptionResponse, error) { + return acpsdk.SetSessionConfigOptionResponse{ConfigOptions: []acpsdk.SessionConfigOption{}}, nil +} + func (a *e2eACPAgent) LoadSession( context.Context, acpsdk.LoadSessionRequest, diff --git a/packages/ui/src/components/metric.tsx b/packages/ui/src/components/metric.tsx index 795a7ed6d..95401a198 100644 --- a/packages/ui/src/components/metric.tsx +++ b/packages/ui/src/components/metric.tsx @@ -55,14 +55,14 @@ function Metric({ > {label} -
+
{value} @@ -70,7 +70,7 @@ function Metric({ {detail !== undefined ? ( {detail} @@ -79,7 +79,7 @@ function Metric({ {subtext !== undefined ? (

{subtext}

diff --git a/web/e2e/__tests__/session-provider-override.spec.ts b/web/e2e/__tests__/session-provider-override.spec.ts index 0bae123a3..8e1bcc63e 100644 --- a/web/e2e/__tests__/session-provider-override.spec.ts +++ b/web/e2e/__tests__/session-provider-override.spec.ts @@ -99,31 +99,47 @@ test("operator can create a provider/model override session and gets an inline r await expect(appPage.getByTestId("session-create-agent-select")).toContainText( browserLifecycleAgent ); - await expect(appPage.getByTestId("session-create-provider-select")).toHaveValue("claude"); + const providerSelect = appPage.getByTestId("session-create-provider-select"); + await expect(providerSelect).toContainText("Claude Code"); + await expect(appPage.getByTestId("session-create-provider-runtime")).toContainText("claude"); + await providerSelect.click(); const dialogOptions = await appPage - .getByTestId("session-create-provider-select") - .locator("option") - .evaluateAll(options => options.map(option => (option as HTMLOptionElement).value)); - expect(dialogOptions).toEqual(workspaceDetail.providers.map(provider => provider.name)); + .locator('[data-testid^="provider-command-item-"]') + .evaluateAll(items => + items + .map(item => item.getAttribute("data-testid")?.replace("provider-command-item-", "")) + .filter((value): value is string => Boolean(value)) + .sort() + ); + expect(dialogOptions).toEqual(workspaceDetail.providers.map(provider => provider.name).sort()); await browserArtifacts.captureScreenshot("session-provider-dialog-desktop", appPage); await appPage.setViewportSize({ width: 375, height: 812 }); - await expect(appPage.getByTestId("session-create-provider-select")).toBeVisible(); + await expect(providerSelect).toBeVisible(); await browserArtifacts.captureScreenshot("session-provider-dialog-mobile", appPage); await appPage.setViewportSize({ width: 1280, height: 800 }); - await appPage.getByTestId("session-create-provider-select").selectOption(overrideProvider); + await appPage.getByTestId(`provider-command-item-${overrideProvider}`).click(); + const catalogRefreshResponse = appPage.waitForResponse( + response => + response.request().method() === "POST" && + response.url().endsWith(`/api/providers/${overrideProvider}/models/refresh`) + ); + const refreshCatalog = appPage.getByTestId("session-create-catalog-refresh"); + await expect(refreshCatalog).toBeEnabled(); + await refreshCatalog.click(); + expect((await catalogRefreshResponse).ok()).toBe(true); + await expect(appPage.getByTestId("session-create-catalog-empty")).toBeVisible(); + await appPage.getByTestId("session-create-model-select").click(); - await expect(appPage.getByTestId("model-command-item-qa-browser-model")).toBeVisible(); - await appPage.getByTestId("model-command-item-qa-browser-model").click(); + await appPage.getByTestId("model-command-input").fill("qa-browser-model"); + await expect(appPage.getByTestId("model-command-item-custom")).toBeVisible(); + await appPage.getByTestId("model-command-item-custom").click(); await expect(appPage.getByTestId("session-create-model-select")).toContainText( "qa-browser-model" ); - await expect(appPage.getByTestId("session-create-reasoning-default")).toContainText("medium"); - await appPage.getByTestId("session-create-reasoning-select").click(); - await expect(appPage.getByTestId("reasoning-command-item-high")).toBeVisible(); - await appPage.getByTestId("reasoning-command-item-high").click(); + await expect(appPage.getByTestId("session-create-reasoning-select")).toBeDisabled(); const createRequestPromise = appPage.waitForRequest( request => request.method() === "POST" && request.url().endsWith("/api/sessions") @@ -147,9 +163,9 @@ test("operator can create a provider/model override session and gets an inline r agent_name: browserLifecycleAgent, model: "qa-browser-model", provider: overrideProvider, - reasoning_effort: "high", workspace: workspace.id, }); + expect(createRequestBody).not.toHaveProperty("reasoning_effort"); expect(createResponse.ok()).toBeTruthy(); const createdSession = (await createResponse.json()) as SessionEnvelope; diff --git a/web/src/systems/agent/components/agent-sessions-list.tsx b/web/src/systems/agent/components/agent-sessions-list.tsx index 3d45904ec..e92353d53 100644 --- a/web/src/systems/agent/components/agent-sessions-list.tsx +++ b/web/src/systems/agent/components/agent-sessions-list.tsx @@ -36,25 +36,29 @@ export function AgentSessionsList({ if (isError) { return ( - +
+ +
); } if (sessions.length === 0) { return ( - +
+ +
); } diff --git a/web/src/systems/automation/components/automation-run-history.tsx b/web/src/systems/automation/components/automation-run-history.tsx index 2d23d02d2..c3a8e6f62 100644 --- a/web/src/systems/automation/components/automation-run-history.tsx +++ b/web/src/systems/automation/components/automation-run-history.tsx @@ -58,7 +58,7 @@ export function AutomationRunHistory({ />
) : error ? ( -
+
) : runs.length === 0 ? ( -
+
) : ( diff --git a/web/src/systems/knowledge/components/knowledge-delete-dialog.tsx b/web/src/systems/knowledge/components/knowledge-delete-dialog.tsx index f9f6e26de..f8bed3559 100644 --- a/web/src/systems/knowledge/components/knowledge-delete-dialog.tsx +++ b/web/src/systems/knowledge/components/knowledge-delete-dialog.tsx @@ -55,7 +55,7 @@ function KnowledgeDeleteDialog({ {error}
) : null} - +
) : null} - +