Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
132 changes: 115 additions & 17 deletions packages/agent-core/src/session.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ import {
loadSession,
loadUserRules,
loadWorkspaceRules,
providerHasKey,
saveSession,
saveSessionTasks,
} from '@anton/agent-config'
Expand Down Expand Up @@ -223,7 +224,7 @@ const OPENROUTER_BASE_URL = 'https://openrouter.ai/api/v1'
* OpenRouter uses the OpenAI-compatible completions API for all models.
* Model IDs are in "provider/model" format (e.g. "anthropic/claude-sonnet-4.6").
*/
function buildOpenRouterModel(modelId: string) {
function buildOpenRouterModel(modelId: string, baseUrlOverride?: string) {
// Detect reasoning models by well-known patterns
const reasoning = /\b(o[34]|r1|thinking|reason)\b/i.test(modelId) || /gemini.*pro/i.test(modelId)

Expand All @@ -232,7 +233,7 @@ function buildOpenRouterModel(modelId: string) {
name: modelId,
api: 'openai-completions' as const,
provider: 'openrouter',
baseUrl: OPENROUTER_BASE_URL,
baseUrl: baseUrlOverride || OPENROUTER_BASE_URL,
reasoning,
input: ['text', 'image'],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
Expand All @@ -241,26 +242,89 @@ function buildOpenRouterModel(modelId: string) {
}
}

/**
* Direct-API providers we know how to talk to when the user supplies their own
* API key (BYOK). The `api` field selects the streaming protocol pi-ai uses.
*/
const DIRECT_API_PROVIDER_DEFAULTS: Record<string, { api: string; baseUrl: string }> = {
openai: { api: 'openai-responses', baseUrl: 'https://api.openai.com/v1' },
anthropic: { api: 'anthropic-messages', baseUrl: 'https://api.anthropic.com' },
google: {
api: 'google-generative-ai',
baseUrl: 'https://generativelanguage.googleapis.com/v1beta',
},
groq: { api: 'openai-completions', baseUrl: 'https://api.groq.com/openai/v1' },
mistral: { api: 'mistral-conversations', baseUrl: 'https://api.mistral.ai' },
}

/**
* Build a Model-compatible object for a direct-API provider when the user
* supplies their own API key but the model ID isn't in pi-ai's built-in
* registry. The actual API call still has to succeed against the upstream
* provider, but at least we no longer reject the request locally.
*/
function buildDirectApiModel(
provider: string,
modelId: string,
baseUrlOverride?: string,
): Model<Api> | undefined {
const defaults = DIRECT_API_PROVIDER_DEFAULTS[provider]
if (!defaults) return undefined

const reasoning =
/\b(o[34]|r1|thinking|reason)\b/i.test(modelId) ||
/^gpt-5/i.test(modelId) ||
/claude-(opus|sonnet)-4/i.test(modelId) ||
/gemini.*pro/i.test(modelId)

return {
id: modelId,
name: modelId,
api: defaults.api,
provider,
baseUrl: baseUrlOverride || defaults.baseUrl,
reasoning,
input: ['text', 'image'],
cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
contextWindow: 128_000,
maxTokens: 16_384,
} as unknown as Model<Api>
}

/**
* Resolve a model by provider + ID.
* Tries pi-ai's built-in registry first, then falls back to the anton catalog
* or OpenRouter's generic model builder.
* Tries pi-ai's built-in registry first, then falls back to the anton catalog,
* OpenRouter's generic builder, or a BYOK builder for direct-API providers
* (openai, anthropic, google, groq, mistral) when a user supplies their own key.
*
* `options.baseUrl` lets callers override the upstream URL — used by users
* pointing a provider at a self-hosted or proxy endpoint via config.
*/
export function resolveModel(provider: string, modelId: string): Model<Api> | undefined {
export function resolveModel(
provider: string,
modelId: string,
options?: { baseUrl?: string },
): Model<Api> | undefined {
// pi-ai's registry (hardcoded at build time)
const piModel = (piGetModel as (p: string, m: string) => Model<Api> | undefined)(
provider,
modelId,
)
if (piModel) return piModel
if (piModel) {
return options?.baseUrl ? ({ ...piModel, baseUrl: options.baseUrl } as Model<Api>) : piModel
}

// Anton (GRU LiteLLM proxy) — custom runtime registry
if (provider === 'anton') return getAntonModel(modelId)

// OpenRouter — any model ID is valid (it's a proxy for many providers)
if (provider === 'openrouter') return buildOpenRouterModel(modelId) as Model<Api>
if (provider === 'openrouter')
return buildOpenRouterModel(modelId, options?.baseUrl) as Model<Api>

return undefined
// BYOK: openai/anthropic/google/groq/mistral — accept any model ID when the
// user has supplied their own API key. The upstream provider validates the
// model name on the actual request.
return buildDirectApiModel(provider, modelId, options?.baseUrl)
}
import {
type AskUserHandler,
Expand Down Expand Up @@ -622,11 +686,12 @@ export class Session {
}

// Runtime strings from config — cast to the SDK's nominal types
const model = resolveModel(opts.provider, opts.model)
const providerBaseUrl = opts.config.providers?.[opts.provider]?.baseUrl
const model = resolveModel(opts.provider, opts.model, { baseUrl: providerBaseUrl })

if (!model) {
throw new Error(
`Unknown model "${opts.model}" for provider "${opts.provider}". Model IDs must exactly match pi SDK's registry. For openrouter, use format like "anthropic/claude-sonnet-4.6". For anton, use the model name directly like "gpt-4.1" or "claude-sonnet-4.6".`,
`Unknown provider "${opts.provider}" for model "${opts.model}". Supported BYOK providers: openai, anthropic, google, groq, mistral, openrouter, anton. For pi SDK's built-in registry, model IDs must match exactly.`,
)
}

Expand Down Expand Up @@ -1440,24 +1505,57 @@ export class Session {
}

/**
* Switch model mid-session. pi SDK handles this gracefully —
* keeps all messages, next LLM call uses the new model.
* Switch model mid-session. pi SDK keeps all messages; the new model
* applies on the next LLM call.
*
* `setModel` alone isn't enough — three pieces of session state have to be
* reconciled or the next turn misbehaves:
* - API key: fail fast here with a clear error instead of deep inside pi-ai
* - thinking level: clamp on reasoning→non-reasoning transitions
* - compaction.maxContextTokens: refresh when the context window changes
*/
switchModel(provider: string, model: string): void {
const newModel = resolveModel(provider, model)
const providerBaseUrl = this.config.providers?.[provider]?.baseUrl
const newModel = resolveModel(provider, model, { baseUrl: providerBaseUrl })
if (!newModel) {
throw new Error(
`Unknown model "${model}" for provider "${provider}". Model IDs must exactly match pi SDK's registry. For openrouter, use format like "anthropic/claude-sonnet-4.6". For anton, use the model name directly like "gpt-4.1" or "claude-sonnet-4.6".`,
`Unknown provider "${provider}" for model "${model}". Supported BYOK providers: openai, anthropic, google, groq, mistral, openrouter, anton. For pi SDK's built-in registry, model IDs must match exactly.`,
)
}

if (!providerHasKey(provider, this.config)) {
throw new Error(
`No API key configured for provider "${provider}". Set one in the desktop app's Provider settings or via the matching environment variable.`,
)
}

const previousProvider = this.provider
const previousModel = this.model
const wasReasoning = this.resolvedModel.reasoning

this.piAgent.setModel(newModel)
this.resolvedModel = newModel
this.provider = provider
this.model = model

// Reasoning → non-reasoning: drop thinking level so the upstream API
// doesn't reject the request. Non-reasoning → reasoning stays opt-in
// via setThinkingLevel.
if (wasReasoning && !newModel.reasoning) {
this.piAgent.setThinkingLevel('off')
}

this.compactionConfig = {
...this.compactionConfig,
maxContextTokens: getDefaultCompactionConfig(model).maxContextTokens,
}

this.log.info(
{ previousProvider, previousModel, provider, model },
'switched model mid-session',
)

this.persist()
// Context window may have changed (e.g. opus 1M → sonnet 200k).
// Refresh the gauge immediately when a turn is active; otherwise
// the server emits explicitly at the call site.
this.pushContextUpdate()
}

Expand Down
4 changes: 2 additions & 2 deletions packages/agent-server/src/server.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2832,7 +2832,7 @@ export class AgentServer {
const provCfg = this.config.providers[chosenProvider] || DEFAULT_PROVIDERS[chosenProvider]
const modelId = provCfg?.models?.[0]
if (!modelId) return
const fallbackModel = resolveModel(chosenProvider, modelId)
const fallbackModel = resolveModel(chosenProvider, modelId, { baseUrl: provCfg?.baseUrl })
if (!fallbackModel) return

const providerName = chosenProvider
Expand Down Expand Up @@ -4201,7 +4201,7 @@ export class AgentServer {
return { ok: true }
}

if (!resolveModel(provider, model)) {
if (!resolveModel(provider, model, { baseUrl: providerCfg.baseUrl })) {
return {
ok: false,
error: `Unknown model "${model}" for provider "${provider}" (not in pi SDK registry).`,
Expand Down
21 changes: 10 additions & 11 deletions packages/desktop/src/components/RoutineChat.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -255,22 +255,21 @@ export function RoutineChat() {
</>
)}

{pendingConfirm && (
<div className="chat-shell__confirm">
<ConfirmDialog
command={pendingConfirm.command}
reason={pendingConfirm.reason}
onApprove={() => handleConfirm(true)}
onDeny={() => handleConfirm(false)}
/>
</div>
)}

<PlanReviewOverlay />

{(messages.length > 0 || agentSession) && (
<div className="conv-dock">
<div className="conv-dock__inner">
{pendingConfirm && (
<div className="chat-shell__confirm">
<ConfirmDialog
command={pendingConfirm.command}
reason={pendingConfirm.reason}
onApprove={() => handleConfirm(true)}
onDeny={() => handleConfirm(false)}
/>
</div>
)}
<ChatInput
onSend={handleSend}
onSteer={handleSteer}
Expand Down
7 changes: 6 additions & 1 deletion packages/desktop/src/components/chat/ChatInput.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,17 @@ const MAX_IMAGE_BYTES = 10 * 1024 * 1024
* - Claude Code harness: CLI has no thinking/budget flag — always false.
* - Codex harness: always true (o-series under the hood, per-turn effort).
* - API-key models: regex matches known reasoning-capable families.
*
* Keep this in sync with `buildDirectApiModel`'s reasoning detection in
* packages/agent-core/src/session.ts — both decide the same question.
*/
function supportsReasoningEffort(provider: string, model: string): boolean {
if (provider === 'claude') return false
if (provider === 'codex') return true
const m = model.toLowerCase()
return /opus|sonnet|gemini-2\.5|o1|o3|o4|reason|thinking|deepseek-r/.test(m)
return /gpt-5|opus|sonnet|gemini-2\.5|gemini.*pro|o1|o3|o4|r1|reason|thinking|deepseek-r/.test(
m,
)
}

/**
Expand Down
2 changes: 1 addition & 1 deletion packages/desktop/src/index.css
Original file line number Diff line number Diff line change
Expand Up @@ -1267,7 +1267,7 @@ button {
}

.chat-shell__confirm {
padding: 0 24px;
margin-bottom: 12px;
}

.chat-shell__sync-loader {
Expand Down