diff --git a/pkg/model/provider/anthropic/thinking.go b/pkg/model/provider/anthropic/thinking.go index a4afb8725..f5c2d497a 100644 --- a/pkg/model/provider/anthropic/thinking.go +++ b/pkg/model/provider/anthropic/thinking.go @@ -43,23 +43,22 @@ func (c *Client) floorMaxTokensForNoThinking(maxTokens int64) int64 { // Anthropic's max_tokens represents the combined budget for thinking + output tokens. // Returns the adjusted maxTokens value and an error if user-set max_tokens is too low. // -// Only fixed token budgets need adjustment. Adaptive and effort-based budgets -// don't need it since the model manages its own thinking allocation. +// It operates on the resolved budget (see [Client.resolveThinkingBudget]) so an +// effort level that falls back to a token budget on a non-adaptive model gets +// the same headroom as any other token budget. Only fixed token budgets need +// adjustment; adaptive and effort-based budgets are managed by the model itself. func (c *Client) adjustMaxTokensForThinking(maxTokens int64) (int64, error) { - if c.ModelConfig.ThinkingBudget == nil { - return maxTokens, nil - } - // Adaptive and effort-based budgets: no token adjustment needed. - if _, ok := anthropicThinkingEffort(c.ModelConfig.ThinkingBudget); ok { + budget := c.resolveThinkingBudget() + if budget == nil { return maxTokens, nil } - // Models that require adaptive thinking will have their token budget coerced - // to adaptive at request time, so no adjustment is needed here either. - if modelinfo.RejectsTokenThinking(c.ModelConfig.Model) { + // Adaptive and effort-based budgets: no token adjustment needed; the model + // manages its own thinking allocation within max_tokens. + if _, ok := anthropicThinkingEffort(budget); ok { return maxTokens, nil } - thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens) + thinkingTokens := int64(budget.Tokens) if thinkingTokens <= 0 { return maxTokens, nil } @@ -131,25 +130,45 @@ func validThinkingTokens(tokens, maxTokens int64) (int64, bool) { return tokens, true } -// coerceAdaptiveThinking returns an adaptive ThinkingBudget when the configured -// model rejects token-based thinking budgets but the user supplied one. -// Otherwise it returns the configured budget unchanged. It never mutates -// c.ModelConfig.ThinkingBudget. -func (c *Client) coerceAdaptiveThinking() *latest.ThinkingBudget { +// resolveThinkingBudget returns the ThinkingBudget to actually send to the API, +// adapting the configured budget to what the target model accepts. It never +// mutates c.ModelConfig.ThinkingBudget. +// +// Two model-specific rewrites happen here, in opposite directions: +// - A token budget on a model that rejects token-based thinking (Opus 4.6+) +// becomes adaptive thinking. +// - An effort or adaptive budget on a model that does not support adaptive +// thinking (Haiku 4.5, Sonnet 4.5 and earlier, ...) becomes a token budget, +// since `thinking.type=adaptive`/`output_config.effort` are rejected with a +// 400 on those models (issue #3362). This is the "regular thinking options" +// path for effort levels set via the TUI Shift+Tab cycle. +// +// Disabled, zero, and negative budgets are passed through unchanged so +// downstream code keeps treating them as "thinking off". +func (c *Client) resolveThinkingBudget() *latest.ThinkingBudget { budget := c.ModelConfig.ThinkingBudget - if budget == nil { - return nil + if budget == nil || budget.IsDisabled() { + return budget } + if _, ok := anthropicThinkingEffort(budget); ok { - return budget // already adaptive or effort-based. - } - // Only coerce a real, positive token budget. Disabled/zero/negative - // budgets are passed through so downstream code keeps treating them as - // "thinking off" instead of silently enabling adaptive thinking. - if budget.IsDisabled() || budget.Tokens <= 0 { - return budget + // Effort or adaptive budget. + if modelinfo.SupportsAdaptiveThinking(c.ModelConfig.Model) { + return budget + } + tokens, ok := effortBudgetTokens(budget) + if !ok { + return budget + } + slog.Warn("Anthropic: model does not support adaptive thinking; using token-based thinking budget", + "model", c.ModelConfig.Model, + "effort", budget.Effort, + "budget_tokens", tokens) + return &latest.ThinkingBudget{Tokens: tokens} } - if !modelinfo.RejectsTokenThinking(c.ModelConfig.Model) { + + // Token budget. Only coerce a real, positive value. + if budget.Tokens <= 0 || !modelinfo.RejectsTokenThinking(c.ModelConfig.Model) { return budget } slog.Warn("Anthropic: model rejects token-based thinking budgets; switching to adaptive thinking", @@ -158,6 +177,26 @@ func (c *Client) coerceAdaptiveThinking() *latest.ThinkingBudget { return &latest.ThinkingBudget{Effort: "adaptive"} } +// effortBudgetTokens maps an effort-based or adaptive ThinkingBudget onto a +// token budget, for models that only support token-based extended thinking. +// It covers both plain effort levels ("high") and adaptive forms ("adaptive", +// "adaptive/low"). Returns (0, false) for token-count or unrecognised budgets. +func effortBudgetTokens(b *latest.ThinkingBudget) (int, bool) { + level, ok := b.AdaptiveEffort() + if !ok { + l, ok := b.EffortLevel() + if !ok { + return 0, false + } + return effort.BedrockTokens(l) + } + l, ok := effort.Parse(level) + if !ok { + return 0, false + } + return effort.BedrockTokens(l) +} + // anthropicThinkingEffort returns the Anthropic API effort level for the given // ThinkingBudget. It covers both explicit adaptive mode and string effort // levels. Returns ("", false) when the budget uses token counts or is nil. @@ -218,7 +257,7 @@ func anthropicThinkingDisplay(opts map[string]any) (string, bool) { // based on the model's ThinkingBudget and provider_opts.thinking_display. // Returns true when thinking is enabled (i.e., temperature/top_p must not be set). func (c *Client) applyThinkingConfig(params *anthropic.MessageNewParams, maxTokens int64) bool { - budget := c.coerceAdaptiveThinking() + budget := c.resolveThinkingBudget() if budget == nil { return false } @@ -250,7 +289,7 @@ func (c *Client) applyThinkingConfig(params *anthropic.MessageNewParams, maxToke // applyBetaThinkingConfig configures extended thinking on a BetaMessageNewParams // based on the model's ThinkingBudget and provider_opts.thinking_display. func (c *Client) applyBetaThinkingConfig(params *anthropic.BetaMessageNewParams, maxTokens int64) { - budget := c.coerceAdaptiveThinking() + budget := c.resolveThinkingBudget() if budget == nil { return } diff --git a/pkg/model/provider/anthropic/thinking_test.go b/pkg/model/provider/anthropic/thinking_test.go index a87ee3d82..8b0eb14af 100644 --- a/pkg/model/provider/anthropic/thinking_test.go +++ b/pkg/model/provider/anthropic/thinking_test.go @@ -99,10 +99,15 @@ func TestAnthropicThinkingDisplay(t *testing.T) { } } -// defaultTestModel is an Anthropic model that does NOT require the -// adaptive-thinking workaround, so token-based thinking budgets are -// preserved as-is. -const defaultTestModel = "claude-sonnet-4-5" +// defaultTestModel is an Anthropic model that supports adaptive thinking but is +// NOT in the token-rejecting set (Opus 4.6+), so effort/adaptive budgets use the +// adaptive-thinking API while token-based budgets are preserved as-is. +const defaultTestModel = "claude-sonnet-4-6" + +// nonAdaptiveTestModel is an Anthropic model that does NOT support adaptive +// thinking (issue #3362), so effort/adaptive budgets must fall back to +// token-based extended thinking. +const nonAdaptiveTestModel = "claude-haiku-4-5" // clientWith builds a minimal Client with the given ThinkingBudget and // provider_opts on defaultTestModel. @@ -247,6 +252,66 @@ func TestApplyThinkingConfig(t *testing.T) { wantAdaptive: true, wantEffort: "low", }, + // Issue #3362: an effort level (as set via the TUI Shift+Tab cycle) uses + // adaptive thinking only on models that support it, and falls back to a + // token budget everywhere else. + { + name: "plain effort level on adaptive model uses adaptive", + budget: &latest.ThinkingBudget{Effort: "high"}, + maxTokens: 8192, + wantEnabled: true, + wantAdaptive: true, + wantEffort: "high", + }, + { + name: "effort level on non-adaptive model falls back to token budget", + model: "claude-haiku-4-5", + budget: &latest.ThinkingBudget{Effort: "medium"}, + maxTokens: 16384, + wantEnabled: true, + wantTokens: 8192, + }, + { + name: "effort high on non-adaptive model maps to 16384 tokens", + model: "claude-haiku-4-5", + budget: &latest.ThinkingBudget{Effort: "high"}, + maxTokens: 32768, + wantEnabled: true, + wantTokens: 16384, + }, + { + name: "adaptive budget on non-adaptive model falls back to token budget", + model: "claude-haiku-4-5", + budget: &latest.ThinkingBudget{Effort: "adaptive"}, + maxTokens: 32768, + wantEnabled: true, + wantTokens: 16384, + }, + { + name: "non-adaptive token fallback keeps display", + model: "claude-haiku-4-5", + budget: &latest.ThinkingBudget{Effort: "low"}, + opts: map[string]any{"thinking_display": "omitted"}, + maxTokens: 8192, + wantEnabled: true, + wantTokens: 2048, + wantDisplayJSON: "omitted", + }, + { + name: "non-adaptive token fallback dropped when exceeding max_tokens", + model: "claude-haiku-4-5", + budget: &latest.ThinkingBudget{Effort: "high"}, // 16384 > maxTokens + maxTokens: 8192, + wantEnabled: false, + }, + { + name: "sonnet-4-5 effort falls back to token budget", + model: "claude-sonnet-4-5", + budget: &latest.ThinkingBudget{Effort: "high"}, + maxTokens: 32768, + wantEnabled: true, + wantTokens: 16384, + }, } for _, tt := range tests { @@ -346,6 +411,33 @@ func TestApplyBetaThinkingConfig(t *testing.T) { wantEffort: "high", wantDisplayJSON: "omitted", }, + // Issue #3362: effort/adaptive budgets fall back to token thinking on + // models without adaptive support. + { + name: "plain effort on adaptive model uses adaptive", + budget: &latest.ThinkingBudget{Effort: "high"}, + maxTokens: 8192, + wantAdaptive: true, + wantEffort: "high", + }, + { + name: "effort on non-adaptive model falls back to token budget", + model: "claude-haiku-4-5", + budget: &latest.ThinkingBudget{Effort: "medium"}, + maxTokens: 16384, + wantEnabled: true, + wantTokens: 8192, + }, + { + name: "adaptive on non-adaptive model falls back to token budget with display", + model: "claude-haiku-4-5", + budget: &latest.ThinkingBudget{Effort: "adaptive"}, + opts: map[string]any{"thinking_display": "omitted"}, + maxTokens: 32768, + wantEnabled: true, + wantTokens: 16384, + wantDisplayJSON: "omitted", + }, } for _, tt := range tests { @@ -428,23 +520,29 @@ func TestAdjustMaxTokensForThinking(t *testing.T) { }) } -func TestCoerceAdaptiveThinking(t *testing.T) { +func TestResolveThinkingBudget(t *testing.T) { t.Parallel() t.Run("nil budget stays nil", func(t *testing.T) { c := clientWithModel("claude-opus-4-7", nil, nil) - assert.Nil(t, c.coerceAdaptiveThinking()) + assert.Nil(t, c.resolveThinkingBudget()) }) t.Run("non-affected model preserves token budget", func(t *testing.T) { in := &latest.ThinkingBudget{Tokens: 4096} c := clientWithModel(defaultTestModel, in, nil) - assert.Same(t, in, c.coerceAdaptiveThinking(), "budget pointer must not be replaced") + assert.Same(t, in, c.resolveThinkingBudget(), "budget pointer must not be replaced") + }) + + t.Run("adaptive-capable model preserves effort budget", func(t *testing.T) { + in := &latest.ThinkingBudget{Effort: "high"} + c := clientWithModel(defaultTestModel, in, nil) + assert.Same(t, in, c.resolveThinkingBudget()) }) t.Run("opus-4-6 token budget is coerced to adaptive", func(t *testing.T) { in := &latest.ThinkingBudget{Tokens: 4096} c := clientWithModel("claude-opus-4-6", in, nil) - got := c.coerceAdaptiveThinking() + got := c.resolveThinkingBudget() require.NotNil(t, got) assert.Equal(t, "adaptive", got.Effort) assert.Equal(t, 0, got.Tokens) @@ -456,9 +554,33 @@ func TestCoerceAdaptiveThinking(t *testing.T) { t.Run("opus-4-7 adaptive budget is preserved as-is", func(t *testing.T) { in := &latest.ThinkingBudget{Effort: "adaptive/low"} c := clientWithModel("claude-opus-4-7", in, nil) - assert.Same(t, in, c.coerceAdaptiveThinking()) + assert.Same(t, in, c.resolveThinkingBudget()) }) + // Issue #3362: effort/adaptive budgets on models without adaptive-thinking + // support fall back to a token budget instead of a 400. + effortFallbackCases := map[string]struct { + budget *latest.ThinkingBudget + wantTokens int + }{ + "effort high -> 16384": {&latest.ThinkingBudget{Effort: "high"}, 16384}, + "effort medium -> 8192": {&latest.ThinkingBudget{Effort: "medium"}, 8192}, + "effort low -> 2048": {&latest.ThinkingBudget{Effort: "low"}, 2048}, + "adaptive -> 16384 (high)": {&latest.ThinkingBudget{Effort: "adaptive"}, 16384}, + "adaptive/low -> 2048": {&latest.ThinkingBudget{Effort: "adaptive/low"}, 2048}, + } + for name, tc := range effortFallbackCases { + t.Run("haiku-4-5 "+name, func(t *testing.T) { + c := clientWithModel(nonAdaptiveTestModel, tc.budget, nil) + got := c.resolveThinkingBudget() + require.NotNil(t, got) + assert.Equal(t, tc.wantTokens, got.Tokens) + assert.Empty(t, got.Effort) + // Original must not be mutated. + assert.Empty(t, tc.budget.Tokens) + }) + } + // Disabled or non-positive token budgets must NOT be silently coerced to // adaptive thinking on Opus 4.6/4.7 — the user has either explicitly // disabled thinking or supplied an invalid value. @@ -471,7 +593,7 @@ func TestCoerceAdaptiveThinking(t *testing.T) { for name, in := range disabledCases { t.Run("opus-4-7 "+name+" passes through", func(t *testing.T) { c := clientWithModel("claude-opus-4-7", in, nil) - assert.Same(t, in, c.coerceAdaptiveThinking()) + assert.Same(t, in, c.resolveThinkingBudget()) }) } } diff --git a/pkg/modelinfo/modelinfo.go b/pkg/modelinfo/modelinfo.go index b82a19335..fc24e8e74 100644 --- a/pkg/modelinfo/modelinfo.go +++ b/pkg/modelinfo/modelinfo.go @@ -123,6 +123,74 @@ func RejectsTokenThinking(modelID string) bool { return isClaudeOpus46To48(modelID) } +// SupportsAdaptiveThinking reports whether an Anthropic Claude model accepts +// adaptive extended thinking (`thinking.type=adaptive`) together with the +// `output_config.effort` parameter. +// +// Adaptive thinking and effort levels arrived with the Claude 4.6 generation. +// Earlier models (Sonnet 4.5 and older, Haiku 4.5, Opus 4.5 and older, and all +// Claude 3.x) reject `thinking.type=adaptive`/`output_config.effort` with a 400 +// and must use token-based extended thinking (`thinking.type=enabled`) instead. +// +// Supported: Opus 4.6/4.7/4.8 (which additionally reject token budgets, see +// [RejectsTokenThinking]), Sonnet 4.6, the Claude 5 families (e.g. Sonnet 5), +// and the codenamed frontier models (Fable, Mythos). Bedrock-style identifiers +// such as "global.anthropic.claude-sonnet-4-6" are recognised too. +// +// The set is a superset of [RejectsTokenThinking]: a model that rejects token +// budgets must accept adaptive thinking. +// +// See https://platform.claude.com/docs/en/build-with-claude/adaptive-thinking +func SupportsAdaptiveThinking(modelID string) bool { + m := normalize(modelID) + if bare, ok := bedrockClaudeModelName(m); ok { + m = bare + } + // Codenamed frontier models ship with adaptive thinking. + if strings.Contains(m, "fable") || strings.Contains(m, "mythos") { + return true + } + // Only Opus and Sonnet gained adaptive thinking; Haiku, Claude 3.x, and + // non-Claude models do not parse and fall through to false. + major, minor, ok := claudeOpusSonnetVersion(m) + if !ok { + return false + } + // Claude 5+ (Sonnet 5, ...) always support it; within the 4.x line only + // 4.6 and later do. + return major >= 5 || (major == 4 && minor >= 6) +} + +// claudeOpusSonnetVersion extracts the major and minor version of a normalized +// bare Opus/Sonnet id such as "claude-opus-4-6", "claude-sonnet-4.5", or +// "claude-sonnet-5". It reports ok=false for other families (Haiku, Claude 3.x +// like "claude-3-opus-20240229", non-Claude), so only Opus/Sonnet parse. +// +// The major must be one or two digits; a longer run is a date stamp, not a +// version, which excludes Claude 3.x ids where the family precedes the number +// ("claude-3-opus-..."). A minor after '-' or '.' is likewise capped at two +// digits so date-stamped 4.0 ids ("claude-opus-4-20250514") yield minor 0. +func claudeOpusSonnetVersion(m string) (major, minor int, ok bool) { + for _, fam := range []string{"opus", "sonnet"} { + _, rest, found := strings.Cut(m, fam+"-") + if !found { + continue + } + maj, w := leadingInt(rest) + if w == 0 || w > 2 { + return 0, 0, false + } + rest = rest[w:] + if rest != "" && (rest[0] == '-' || rest[0] == '.') { + if n, mw := leadingInt(rest[1:]); mw > 0 && mw <= 2 { + minor = n + } + } + return maj, minor, true + } + return 0, 0, false +} + // UsesThinkingLevel reports whether a Google Gemini model uses level-based // thinking configuration (`thinkingLevel`) rather than token-based budgets. // diff --git a/pkg/modelinfo/modelinfo_test.go b/pkg/modelinfo/modelinfo_test.go index f1932fd7f..f5d6e979b 100644 --- a/pkg/modelinfo/modelinfo_test.go +++ b/pkg/modelinfo/modelinfo_test.go @@ -163,6 +163,89 @@ func TestRejectsTokenThinking(t *testing.T) { } } +func TestSupportsAdaptiveThinking(t *testing.T) { + t.Parallel() + + cases := []struct { + model string + want bool + }{ + // Opus 4.6+ (also reject token thinking). + {"claude-opus-4-6", true}, + {"claude-opus-4-7", true}, + {"claude-opus-4-8", true}, + {"claude-opus-4-8-20260601", true}, + {"claude-opus-4.7", true}, // dotted minor + // Sonnet 4.6+. + {"claude-sonnet-4-6", true}, + {"claude-sonnet-4-6-20251114", true}, + // Claude 5 families. + {"claude-sonnet-5", true}, + {"claude-opus-5", true}, + // Codenamed frontier models. + {"claude-fable-5", true}, + {"claude-mythos-5", true}, + {"claude-mythos-preview", true}, + // Not supported: token-only models. + {"claude-haiku-4-5", false}, + {"claude-sonnet-4-5", false}, + {"claude-sonnet-4-0", false}, + {"claude-opus-4-5", false}, + {"claude-opus-4-1", false}, + {"claude-opus-4-0", false}, + {"claude-opus-4-20250514", false}, // dated 4.0, trailing digits are a date + {"claude-opus-4-1-20250805", false}, + // Claude 3.x (family precedes the version number). + {"claude-3-opus-20240229", false}, + {"claude-3-5-sonnet-20241022", false}, + {"claude-3-7-sonnet-20250219", false}, + {"claude-3-haiku-20240307", false}, + // Bedrock-style identifiers. + {"anthropic.claude-opus-4-8-20260601-v1:0", true}, + {"global.anthropic.claude-sonnet-4-6-20251114-v1:0", true}, + {"us.anthropic.claude-sonnet-4-6-v1:0", true}, + {"global.anthropic.claude-sonnet-4-5-20250929-v1:0", false}, + {"us.anthropic.claude-haiku-4-5-v1:0", false}, + // Case-insensitive and whitespace-tolerant. + {"CLAUDE-SONNET-4-6", true}, + {" claude-opus-4-8 ", true}, + // Non-Claude and empty. + {"gpt-5", false}, + {"gemini-3-pro", false}, + {"", false}, + } + for _, tc := range cases { + t.Run(tc.model, func(t *testing.T) { + t.Parallel() + assert.Equal(t, tc.want, SupportsAdaptiveThinking(tc.model)) + }) + } +} + +// TestSupportsAdaptiveThinkingSupersetOfRejects guards the invariant that every +// model which rejects token-based thinking (and therefore requires adaptive) +// also reports support for adaptive thinking. A violation would send a token +// budget to a model that rejects it, or vice versa. +func TestSupportsAdaptiveThinkingSupersetOfRejects(t *testing.T) { + t.Parallel() + + models := []string{ + "claude-opus-4-5", "claude-opus-4-6", "claude-opus-4-7", "claude-opus-4-8", + "claude-opus-4-8-20260601", "claude-sonnet-4-5", "claude-sonnet-4-6", + "claude-sonnet-5", "claude-haiku-4-5", "claude-fable-5", + "anthropic.claude-opus-4-8-v1:0", "global.anthropic.claude-opus-4-6-v1:0", + } + for _, m := range models { + t.Run(m, func(t *testing.T) { + t.Parallel() + if RejectsTokenThinking(m) { + assert.True(t, SupportsAdaptiveThinking(m), + "%q rejects token thinking but does not support adaptive thinking", m) + } + }) + } +} + func TestUsesThinkingLevel(t *testing.T) { t.Parallel()