From c18ed88aa7e7e5e4902ade3a3717e8571f953821 Mon Sep 17 00:00:00 2001 From: Om Gupta Date: Mon, 4 May 2026 16:57:46 +0530 Subject: [PATCH 1/2] fix(byok): support user OpenAI/Anthropic API keys + working effort pill Three connected bugs that surfaced when a user supplied their own OpenAI/Anthropic key (BYOK) instead of using the Anton/OpenRouter proxy. 1. resolveModel only had fallbacks for `anton` and `openrouter`. For direct-API providers (openai, anthropic, google, groq, mistral) it relied entirely on pi SDK's hardcoded registry, so any model the registry didn't know about (e.g. `gpt-5.5`) failed to start a session. New `buildDirectApiModel` builder mirrors `buildOpenRouterModel` for these providers and routes to the right `api`/`baseUrl`. resolveModel now also threads an optional baseUrl override so users with self-hosted/proxy endpoints work. 2. Mid-conversation `switchModel` was brittle: it called pi SDK's `setModel` but didn't reconcile the rest of the session's model-derived state. Hardened it to (a) fail fast with a clear error when the new provider has no API key, (b) clamp thinking level to 'off' on reasoning -> non-reasoning transitions so the upstream API doesn't reject the request, (c) refresh compactionConfig.maxContextTokens so the gauge and compaction threshold match the new model's window. 3. The composer's Effort pill was hidden for API sessions on `gpt-5.x` because `supportsReasoningEffort()`'s regex didn't include the gpt-5 family. Worked for the `codex` harness because that branch returns true unconditionally. Pi SDK and the server-side wiring were already correct; only the UI gate needed the fix. Regex now matches the same families `buildDirectApiModel` detects, with a comment pointing the two heuristics at each other so they don't drift. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/agent-core/src/session.ts | 132 +++++++++++++++--- packages/agent-server/src/server.ts | 4 +- .../desktop/src/components/chat/ChatInput.tsx | 7 +- 3 files changed, 123 insertions(+), 20 deletions(-) diff --git a/packages/agent-core/src/session.ts b/packages/agent-core/src/session.ts index 6b5c4f86..db495753 100644 --- a/packages/agent-core/src/session.ts +++ b/packages/agent-core/src/session.ts @@ -28,6 +28,7 @@ import { loadSession, loadUserRules, loadWorkspaceRules, + providerHasKey, saveSession, saveSessionTasks, } from '@anton/agent-config' @@ -223,7 +224,7 @@ const OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1' * OpenRouter uses the OpenAI-compatible completions API for all models. * Model IDs are in "provider/model" format (e.g. "anthropic/claude-sonnet-4.6"). */ -function buildOpenRouterModel(modelId: string) { +function buildOpenRouterModel(modelId: string, baseUrlOverride?: string) { // Detect reasoning models by well-known patterns const reasoning = /\b(o[34]|r1|thinking|reason)\b/i.test(modelId) || /gemini.*pro/i.test(modelId) @@ -232,7 +233,7 @@ function buildOpenRouterModel(modelId: string) { name: modelId, api: 'openai-completions' as const, provider: 'openrouter', - baseUrl: OPENROUTER_BASE_URL, + baseUrl: baseUrlOverride || OPENROUTER_BASE_URL, reasoning, input: ['text', 'image'], cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, @@ -241,26 +242,89 @@ function buildOpenRouterModel(modelId: string) { } } +/** + * Direct-API providers we know how to talk to when the user supplies their own + * API key (BYOK). The `api` field selects the streaming protocol pi-ai uses. + */ +const DIRECT_API_PROVIDER_DEFAULTS: Record = { + openai: { api: 'openai-responses', baseUrl: 'https://api.openai.com/v1' }, + anthropic: { api: 'anthropic-messages', baseUrl: 'https://api.anthropic.com' }, + google: { + api: 'google-generative-ai', + baseUrl: 'https://generativelanguage.googleapis.com/v1beta', + }, + groq: { api: 'openai-completions', baseUrl: 'https://api.groq.com/openai/v1' }, + mistral: { api: 'mistral-conversations', baseUrl: 'https://api.mistral.ai' }, +} + +/** + * Build a Model-compatible object for a direct-API provider when the user + * supplies their own API key but the model ID isn't in pi-ai's built-in + * registry. The actual API call still has to succeed against the upstream + * provider, but at least we no longer reject the request locally. + */ +function buildDirectApiModel( + provider: string, + modelId: string, + baseUrlOverride?: string, +): Model | undefined { + const defaults = DIRECT_API_PROVIDER_DEFAULTS[provider] + if (!defaults) return undefined + + const reasoning = + /\b(o[34]|r1|thinking|reason)\b/i.test(modelId) || + /^gpt-5/i.test(modelId) || + /claude-(opus|sonnet)-4/i.test(modelId) || + /gemini.*pro/i.test(modelId) + + return { + id: modelId, + name: modelId, + api: defaults.api, + provider, + baseUrl: baseUrlOverride || defaults.baseUrl, + reasoning, + input: ['text', 'image'], + cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 }, + contextWindow: 128_000, + maxTokens: 16_384, + } as unknown as Model +} + /** * Resolve a model by provider + ID. - * Tries pi-ai's built-in registry first, then falls back to the anton catalog - * or OpenRouter's generic model builder. + * Tries pi-ai's built-in registry first, then falls back to the anton catalog, + * OpenRouter's generic builder, or a BYOK builder for direct-API providers + * (openai, anthropic, google, groq, mistral) when a user supplies their own key. + * + * `options.baseUrl` lets callers override the upstream URL — used by users + * pointing a provider at a self-hosted or proxy endpoint via config. */ -export function resolveModel(provider: string, modelId: string): Model | undefined { +export function resolveModel( + provider: string, + modelId: string, + options?: { baseUrl?: string }, +): Model | undefined { // pi-ai's registry (hardcoded at build time) const piModel = (piGetModel as (p: string, m: string) => Model | undefined)( provider, modelId, ) - if (piModel) return piModel + if (piModel) { + return options?.baseUrl ? ({ ...piModel, baseUrl: options.baseUrl } as Model) : piModel + } // Anton (GRU LiteLLM proxy) — custom runtime registry if (provider === 'anton') return getAntonModel(modelId) // OpenRouter — any model ID is valid (it's a proxy for many providers) - if (provider === 'openrouter') return buildOpenRouterModel(modelId) as Model + if (provider === 'openrouter') + return buildOpenRouterModel(modelId, options?.baseUrl) as Model - return undefined + // BYOK: openai/anthropic/google/groq/mistral — accept any model ID when the + // user has supplied their own API key. The upstream provider validates the + // model name on the actual request. + return buildDirectApiModel(provider, modelId, options?.baseUrl) } import { type AskUserHandler, @@ -622,11 +686,12 @@ export class Session { } // Runtime strings from config — cast to the SDK's nominal types - const model = resolveModel(opts.provider, opts.model) + const providerBaseUrl = opts.config.providers?.[opts.provider]?.baseUrl + const model = resolveModel(opts.provider, opts.model, { baseUrl: providerBaseUrl }) if (!model) { throw new Error( - `Unknown model "${opts.model}" for provider "${opts.provider}". Model IDs must exactly match pi SDK's registry. For openrouter, use format like "anthropic/claude-sonnet-4.6". For anton, use the model name directly like "gpt-4.1" or "claude-sonnet-4.6".`, + `Unknown provider "${opts.provider}" for model "${opts.model}". Supported BYOK providers: openai, anthropic, google, groq, mistral, openrouter, anton. For pi SDK's built-in registry, model IDs must match exactly.`, ) } @@ -1440,24 +1505,57 @@ export class Session { } /** - * Switch model mid-session. pi SDK handles this gracefully — - * keeps all messages, next LLM call uses the new model. + * Switch model mid-session. pi SDK keeps all messages; the new model + * applies on the next LLM call. + * + * `setModel` alone isn't enough — three pieces of session state have to be + * reconciled or the next turn misbehaves: + * - API key: fail fast here with a clear error instead of deep inside pi-ai + * - thinking level: clamp on reasoning→non-reasoning transitions + * - compaction.maxContextTokens: refresh when the context window changes */ switchModel(provider: string, model: string): void { - const newModel = resolveModel(provider, model) + const providerBaseUrl = this.config.providers?.[provider]?.baseUrl + const newModel = resolveModel(provider, model, { baseUrl: providerBaseUrl }) if (!newModel) { throw new Error( - `Unknown model "${model}" for provider "${provider}". Model IDs must exactly match pi SDK's registry. For openrouter, use format like "anthropic/claude-sonnet-4.6". For anton, use the model name directly like "gpt-4.1" or "claude-sonnet-4.6".`, + `Unknown provider "${provider}" for model "${model}". Supported BYOK providers: openai, anthropic, google, groq, mistral, openrouter, anton. For pi SDK's built-in registry, model IDs must match exactly.`, + ) + } + + if (!providerHasKey(provider, this.config)) { + throw new Error( + `No API key configured for provider "${provider}". Set one in the desktop app's Provider settings or via the matching environment variable.`, ) } + + const previousProvider = this.provider + const previousModel = this.model + const wasReasoning = this.resolvedModel.reasoning + this.piAgent.setModel(newModel) this.resolvedModel = newModel this.provider = provider this.model = model + + // Reasoning → non-reasoning: drop thinking level so the upstream API + // doesn't reject the request. Non-reasoning → reasoning stays opt-in + // via setThinkingLevel. + if (wasReasoning && !newModel.reasoning) { + this.piAgent.setThinkingLevel('off') + } + + this.compactionConfig = { + ...this.compactionConfig, + maxContextTokens: getDefaultCompactionConfig(model).maxContextTokens, + } + + this.log.info( + { previousProvider, previousModel, provider, model }, + 'switched model mid-session', + ) + this.persist() - // Context window may have changed (e.g. opus 1M → sonnet 200k). - // Refresh the gauge immediately when a turn is active; otherwise - // the server emits explicitly at the call site. this.pushContextUpdate() } diff --git a/packages/agent-server/src/server.ts b/packages/agent-server/src/server.ts index 7c51bd62..96c2e8ca 100644 --- a/packages/agent-server/src/server.ts +++ b/packages/agent-server/src/server.ts @@ -2832,7 +2832,7 @@ export class AgentServer { const provCfg = this.config.providers[chosenProvider] || DEFAULT_PROVIDERS[chosenProvider] const modelId = provCfg?.models?.[0] if (!modelId) return - const fallbackModel = resolveModel(chosenProvider, modelId) + const fallbackModel = resolveModel(chosenProvider, modelId, { baseUrl: provCfg?.baseUrl }) if (!fallbackModel) return const providerName = chosenProvider @@ -4201,7 +4201,7 @@ export class AgentServer { return { ok: true } } - if (!resolveModel(provider, model)) { + if (!resolveModel(provider, model, { baseUrl: providerCfg.baseUrl })) { return { ok: false, error: `Unknown model "${model}" for provider "${provider}" (not in pi SDK registry).`, diff --git a/packages/desktop/src/components/chat/ChatInput.tsx b/packages/desktop/src/components/chat/ChatInput.tsx index ca779ed1..a058cc96 100644 --- a/packages/desktop/src/components/chat/ChatInput.tsx +++ b/packages/desktop/src/components/chat/ChatInput.tsx @@ -64,12 +64,17 @@ const MAX_IMAGE_BYTES = 10 * 1024 * 1024 * - Claude Code harness: CLI has no thinking/budget flag — always false. * - Codex harness: always true (o-series under the hood, per-turn effort). * - API-key models: regex matches known reasoning-capable families. + * + * Keep this in sync with `buildDirectApiModel`'s reasoning detection in + * packages/agent-core/src/session.ts — both decide the same question. */ function supportsReasoningEffort(provider: string, model: string): boolean { if (provider === 'claude') return false if (provider === 'codex') return true const m = model.toLowerCase() - return /opus|sonnet|gemini-2\.5|o1|o3|o4|reason|thinking|deepseek-r/.test(m) + return /gpt-5|opus|sonnet|gemini-2\.5|gemini.*pro|o1|o3|o4|r1|reason|thinking|deepseek-r/.test( + m, + ) } /** From c99f60118a1c81b2afb05823e679256a82d6d6ba Mon Sep 17 00:00:00 2001 From: Om Gupta Date: Mon, 4 May 2026 17:09:21 +0530 Subject: [PATCH 2/2] fix(routine-chat): mount ConfirmDialog inside the input dock Place pending-confirm prompts immediately above the composer (inside `conv-dock__inner`) instead of higher in the chat shell, so the dialog sits where the user is already looking. Adjust `.chat-shell__confirm` spacing to match the new placement. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../desktop/src/components/RoutineChat.tsx | 21 +++++++++---------- packages/desktop/src/index.css | 2 +- 2 files changed, 11 insertions(+), 12 deletions(-) diff --git a/packages/desktop/src/components/RoutineChat.tsx b/packages/desktop/src/components/RoutineChat.tsx index 9282eee3..b55aaef1 100644 --- a/packages/desktop/src/components/RoutineChat.tsx +++ b/packages/desktop/src/components/RoutineChat.tsx @@ -255,22 +255,21 @@ export function RoutineChat() { )} - {pendingConfirm && ( -
- handleConfirm(true)} - onDeny={() => handleConfirm(false)} - /> -
- )} - {(messages.length > 0 || agentSession) && (
+ {pendingConfirm && ( +
+ handleConfirm(true)} + onDeny={() => handleConfirm(false)} + /> +
+ )}