Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
95 changes: 67 additions & 28 deletions pkg/model/provider/anthropic/thinking.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,23 +43,22 @@ func (c *Client) floorMaxTokensForNoThinking(maxTokens int64) int64 {
// Anthropic's max_tokens represents the combined budget for thinking + output tokens.
// Returns the adjusted maxTokens value and an error if user-set max_tokens is too low.
//
// Only fixed token budgets need adjustment. Adaptive and effort-based budgets
// don't need it since the model manages its own thinking allocation.
// It operates on the resolved budget (see [Client.resolveThinkingBudget]) so an
// effort level that falls back to a token budget on a non-adaptive model gets
// the same headroom as any other token budget. Only fixed token budgets need
// adjustment; adaptive and effort-based budgets are managed by the model itself.
func (c *Client) adjustMaxTokensForThinking(maxTokens int64) (int64, error) {
if c.ModelConfig.ThinkingBudget == nil {
return maxTokens, nil
}
// Adaptive and effort-based budgets: no token adjustment needed.
if _, ok := anthropicThinkingEffort(c.ModelConfig.ThinkingBudget); ok {
budget := c.resolveThinkingBudget()
if budget == nil {
return maxTokens, nil
}
// Models that require adaptive thinking will have their token budget coerced
// to adaptive at request time, so no adjustment is needed here either.
if modelinfo.RejectsTokenThinking(c.ModelConfig.Model) {
// Adaptive and effort-based budgets: no token adjustment needed; the model
// manages its own thinking allocation within max_tokens.
if _, ok := anthropicThinkingEffort(budget); ok {
return maxTokens, nil
}

thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens)
thinkingTokens := int64(budget.Tokens)
if thinkingTokens <= 0 {
return maxTokens, nil
}
Expand Down Expand Up @@ -131,25 +130,45 @@ func validThinkingTokens(tokens, maxTokens int64) (int64, bool) {
return tokens, true
}

// coerceAdaptiveThinking returns an adaptive ThinkingBudget when the configured
// model rejects token-based thinking budgets but the user supplied one.
// Otherwise it returns the configured budget unchanged. It never mutates
// c.ModelConfig.ThinkingBudget.
func (c *Client) coerceAdaptiveThinking() *latest.ThinkingBudget {
// resolveThinkingBudget returns the ThinkingBudget to actually send to the API,
// adapting the configured budget to what the target model accepts. It never
// mutates c.ModelConfig.ThinkingBudget.
//
// Two model-specific rewrites happen here, in opposite directions:
// - A token budget on a model that rejects token-based thinking (Opus 4.6+)
// becomes adaptive thinking.
// - An effort or adaptive budget on a model that does not support adaptive
// thinking (Haiku 4.5, Sonnet 4.5 and earlier, ...) becomes a token budget,
// since `thinking.type=adaptive`/`output_config.effort` are rejected with a
// 400 on those models (issue #3362). This is the "regular thinking options"
// path for effort levels set via the TUI Shift+Tab cycle.
//
// Disabled, zero, and negative budgets are passed through unchanged so
// downstream code keeps treating them as "thinking off".
func (c *Client) resolveThinkingBudget() *latest.ThinkingBudget {
budget := c.ModelConfig.ThinkingBudget
if budget == nil {
return nil
if budget == nil || budget.IsDisabled() {
return budget
}

if _, ok := anthropicThinkingEffort(budget); ok {
return budget // already adaptive or effort-based.
}
// Only coerce a real, positive token budget. Disabled/zero/negative
// budgets are passed through so downstream code keeps treating them as
// "thinking off" instead of silently enabling adaptive thinking.
if budget.IsDisabled() || budget.Tokens <= 0 {
return budget
// Effort or adaptive budget.
if modelinfo.SupportsAdaptiveThinking(c.ModelConfig.Model) {
return budget
}
tokens, ok := effortBudgetTokens(budget)
if !ok {
return budget
}
slog.Warn("Anthropic: model does not support adaptive thinking; using token-based thinking budget",
"model", c.ModelConfig.Model,
"effort", budget.Effort,
"budget_tokens", tokens)
return &latest.ThinkingBudget{Tokens: tokens}
}
if !modelinfo.RejectsTokenThinking(c.ModelConfig.Model) {

// Token budget. Only coerce a real, positive value.
if budget.Tokens <= 0 || !modelinfo.RejectsTokenThinking(c.ModelConfig.Model) {
return budget
}
slog.Warn("Anthropic: model rejects token-based thinking budgets; switching to adaptive thinking",
Expand All @@ -158,6 +177,26 @@ func (c *Client) coerceAdaptiveThinking() *latest.ThinkingBudget {
return &latest.ThinkingBudget{Effort: "adaptive"}
}

// effortBudgetTokens maps an effort-based or adaptive ThinkingBudget onto a
// token budget, for models that only support token-based extended thinking.
// It covers both plain effort levels ("high") and adaptive forms ("adaptive",
// "adaptive/low"). Returns (0, false) for token-count or unrecognised budgets.
func effortBudgetTokens(b *latest.ThinkingBudget) (int, bool) {
level, ok := b.AdaptiveEffort()
if !ok {
l, ok := b.EffortLevel()
if !ok {
return 0, false
}
return effort.BedrockTokens(l)
}
l, ok := effort.Parse(level)
if !ok {
return 0, false
}
return effort.BedrockTokens(l)
}

// anthropicThinkingEffort returns the Anthropic API effort level for the given
// ThinkingBudget. It covers both explicit adaptive mode and string effort
// levels. Returns ("", false) when the budget uses token counts or is nil.
Expand Down Expand Up @@ -218,7 +257,7 @@ func anthropicThinkingDisplay(opts map[string]any) (string, bool) {
// based on the model's ThinkingBudget and provider_opts.thinking_display.
// Returns true when thinking is enabled (i.e., temperature/top_p must not be set).
func (c *Client) applyThinkingConfig(params *anthropic.MessageNewParams, maxTokens int64) bool {
budget := c.coerceAdaptiveThinking()
budget := c.resolveThinkingBudget()
if budget == nil {
return false
}
Expand Down Expand Up @@ -250,7 +289,7 @@ func (c *Client) applyThinkingConfig(params *anthropic.MessageNewParams, maxToke
// applyBetaThinkingConfig configures extended thinking on a BetaMessageNewParams
// based on the model's ThinkingBudget and provider_opts.thinking_display.
func (c *Client) applyBetaThinkingConfig(params *anthropic.BetaMessageNewParams, maxTokens int64) {
budget := c.coerceAdaptiveThinking()
budget := c.resolveThinkingBudget()
if budget == nil {
return
}
Expand Down
142 changes: 132 additions & 10 deletions pkg/model/provider/anthropic/thinking_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -99,10 +99,15 @@ func TestAnthropicThinkingDisplay(t *testing.T) {
}
}

// defaultTestModel is an Anthropic model that does NOT require the
// adaptive-thinking workaround, so token-based thinking budgets are
// preserved as-is.
const defaultTestModel = "claude-sonnet-4-5"
// defaultTestModel is an Anthropic model that supports adaptive thinking but is
// NOT in the token-rejecting set (Opus 4.6+), so effort/adaptive budgets use the
// adaptive-thinking API while token-based budgets are preserved as-is.
const defaultTestModel = "claude-sonnet-4-6"

// nonAdaptiveTestModel is an Anthropic model that does NOT support adaptive
// thinking (issue #3362), so effort/adaptive budgets must fall back to
// token-based extended thinking.
const nonAdaptiveTestModel = "claude-haiku-4-5"

// clientWith builds a minimal Client with the given ThinkingBudget and
// provider_opts on defaultTestModel.
Expand Down Expand Up @@ -247,6 +252,66 @@ func TestApplyThinkingConfig(t *testing.T) {
wantAdaptive: true,
wantEffort: "low",
},
// Issue #3362: an effort level (as set via the TUI Shift+Tab cycle) uses
// adaptive thinking only on models that support it, and falls back to a
// token budget everywhere else.
{
name: "plain effort level on adaptive model uses adaptive",
budget: &latest.ThinkingBudget{Effort: "high"},
maxTokens: 8192,
wantEnabled: true,
wantAdaptive: true,
wantEffort: "high",
},
{
name: "effort level on non-adaptive model falls back to token budget",
model: "claude-haiku-4-5",
budget: &latest.ThinkingBudget{Effort: "medium"},
maxTokens: 16384,
wantEnabled: true,
wantTokens: 8192,
},
{
name: "effort high on non-adaptive model maps to 16384 tokens",
model: "claude-haiku-4-5",
budget: &latest.ThinkingBudget{Effort: "high"},
maxTokens: 32768,
wantEnabled: true,
wantTokens: 16384,
},
{
name: "adaptive budget on non-adaptive model falls back to token budget",
model: "claude-haiku-4-5",
budget: &latest.ThinkingBudget{Effort: "adaptive"},
maxTokens: 32768,
wantEnabled: true,
wantTokens: 16384,
},
{
name: "non-adaptive token fallback keeps display",
model: "claude-haiku-4-5",
budget: &latest.ThinkingBudget{Effort: "low"},
opts: map[string]any{"thinking_display": "omitted"},
maxTokens: 8192,
wantEnabled: true,
wantTokens: 2048,
wantDisplayJSON: "omitted",
},
{
name: "non-adaptive token fallback dropped when exceeding max_tokens",
model: "claude-haiku-4-5",
budget: &latest.ThinkingBudget{Effort: "high"}, // 16384 > maxTokens
maxTokens: 8192,
wantEnabled: false,
},
{
name: "sonnet-4-5 effort falls back to token budget",
model: "claude-sonnet-4-5",
budget: &latest.ThinkingBudget{Effort: "high"},
maxTokens: 32768,
wantEnabled: true,
wantTokens: 16384,
},
}

for _, tt := range tests {
Expand Down Expand Up @@ -346,6 +411,33 @@ func TestApplyBetaThinkingConfig(t *testing.T) {
wantEffort: "high",
wantDisplayJSON: "omitted",
},
// Issue #3362: effort/adaptive budgets fall back to token thinking on
// models without adaptive support.
{
name: "plain effort on adaptive model uses adaptive",
budget: &latest.ThinkingBudget{Effort: "high"},
maxTokens: 8192,
wantAdaptive: true,
wantEffort: "high",
},
{
name: "effort on non-adaptive model falls back to token budget",
model: "claude-haiku-4-5",
budget: &latest.ThinkingBudget{Effort: "medium"},
maxTokens: 16384,
wantEnabled: true,
wantTokens: 8192,
},
{
name: "adaptive on non-adaptive model falls back to token budget with display",
model: "claude-haiku-4-5",
budget: &latest.ThinkingBudget{Effort: "adaptive"},
opts: map[string]any{"thinking_display": "omitted"},
maxTokens: 32768,
wantEnabled: true,
wantTokens: 16384,
wantDisplayJSON: "omitted",
},
}

for _, tt := range tests {
Expand Down Expand Up @@ -428,23 +520,29 @@ func TestAdjustMaxTokensForThinking(t *testing.T) {
})
}

func TestCoerceAdaptiveThinking(t *testing.T) {
func TestResolveThinkingBudget(t *testing.T) {
t.Parallel()
t.Run("nil budget stays nil", func(t *testing.T) {
c := clientWithModel("claude-opus-4-7", nil, nil)
assert.Nil(t, c.coerceAdaptiveThinking())
assert.Nil(t, c.resolveThinkingBudget())
})

t.Run("non-affected model preserves token budget", func(t *testing.T) {
in := &latest.ThinkingBudget{Tokens: 4096}
c := clientWithModel(defaultTestModel, in, nil)
assert.Same(t, in, c.coerceAdaptiveThinking(), "budget pointer must not be replaced")
assert.Same(t, in, c.resolveThinkingBudget(), "budget pointer must not be replaced")
})

t.Run("adaptive-capable model preserves effort budget", func(t *testing.T) {
in := &latest.ThinkingBudget{Effort: "high"}
c := clientWithModel(defaultTestModel, in, nil)
assert.Same(t, in, c.resolveThinkingBudget())
})

t.Run("opus-4-6 token budget is coerced to adaptive", func(t *testing.T) {
in := &latest.ThinkingBudget{Tokens: 4096}
c := clientWithModel("claude-opus-4-6", in, nil)
got := c.coerceAdaptiveThinking()
got := c.resolveThinkingBudget()
require.NotNil(t, got)
assert.Equal(t, "adaptive", got.Effort)
assert.Equal(t, 0, got.Tokens)
Expand All @@ -456,9 +554,33 @@ func TestCoerceAdaptiveThinking(t *testing.T) {
t.Run("opus-4-7 adaptive budget is preserved as-is", func(t *testing.T) {
in := &latest.ThinkingBudget{Effort: "adaptive/low"}
c := clientWithModel("claude-opus-4-7", in, nil)
assert.Same(t, in, c.coerceAdaptiveThinking())
assert.Same(t, in, c.resolveThinkingBudget())
})

// Issue #3362: effort/adaptive budgets on models without adaptive-thinking
// support fall back to a token budget instead of a 400.
effortFallbackCases := map[string]struct {
budget *latest.ThinkingBudget
wantTokens int
}{
"effort high -> 16384": {&latest.ThinkingBudget{Effort: "high"}, 16384},
"effort medium -> 8192": {&latest.ThinkingBudget{Effort: "medium"}, 8192},
"effort low -> 2048": {&latest.ThinkingBudget{Effort: "low"}, 2048},
"adaptive -> 16384 (high)": {&latest.ThinkingBudget{Effort: "adaptive"}, 16384},
"adaptive/low -> 2048": {&latest.ThinkingBudget{Effort: "adaptive/low"}, 2048},
}
for name, tc := range effortFallbackCases {
t.Run("haiku-4-5 "+name, func(t *testing.T) {
c := clientWithModel(nonAdaptiveTestModel, tc.budget, nil)
got := c.resolveThinkingBudget()
require.NotNil(t, got)
assert.Equal(t, tc.wantTokens, got.Tokens)
assert.Empty(t, got.Effort)
// Original must not be mutated.
assert.Empty(t, tc.budget.Tokens)
})
}

// Disabled or non-positive token budgets must NOT be silently coerced to
// adaptive thinking on Opus 4.6/4.7 — the user has either explicitly
// disabled thinking or supplied an invalid value.
Expand All @@ -471,7 +593,7 @@ func TestCoerceAdaptiveThinking(t *testing.T) {
for name, in := range disabledCases {
t.Run("opus-4-7 "+name+" passes through", func(t *testing.T) {
c := clientWithModel("claude-opus-4-7", in, nil)
assert.Same(t, in, c.coerceAdaptiveThinking())
assert.Same(t, in, c.resolveThinkingBudget())
})
}
}
Expand Down
Loading
Loading