Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 7 additions & 3 deletions agent-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@
"description": "Whether to track usage"
},
"thinking_budget": {
"description": "Controls reasoning effort/budget. Use 'none' or 0 to disable thinking. OpenAI: string levels ('minimal','low','medium','high'), default 'medium'. Anthropic: integer token budget (1024-32768), default 8192. Amazon Bedrock (Claude): same as Anthropic. Google Gemini 2.5: integer token budget (-1 for dynamic, 0 to disable, 24576 max), default -1. Google Gemini 3: string levels ('minimal' Flash only,'low','medium','high'), default 'high' for Pro, 'medium' for Flash.",
"description": "Controls reasoning effort/budget. Use 'none' or 0 to disable thinking. OpenAI: string levels ('minimal','low','medium','high'). Anthropic: integer token budget (1024-32768), 'adaptive' (lets the model decide), or effort levels ('low','medium','high','max') which use adaptive thinking with the given effort. Amazon Bedrock (Claude): integer token budget or effort levels ('low','medium','high') mapped to token budgets. Google Gemini 2.5: integer token budget (-1 for dynamic, 0 to disable, 24576 max). Google Gemini 3: string levels ('minimal' Flash only,'low','medium','high'). Thinking is only enabled when explicitly configured.",
"oneOf": [
{
"type": "string",
Expand All @@ -544,9 +544,11 @@
"minimal",
"low",
"medium",
"high"
"high",
"max",
"adaptive"
],
"description": "Reasoning effort level (OpenAI, Gemini 3). Use 'none' to disable thinking."
"description": "Reasoning effort level. 'adaptive'/'max' are Anthropic-specific. Use 'none' to disable thinking."
},
{
"type": "integer",
Expand All @@ -562,6 +564,8 @@
"low",
"medium",
"high",
"max",
"adaptive",
-1,
1024,
8192,
Expand Down
16 changes: 13 additions & 3 deletions examples/thinking_budget.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
agents:
root:
model: gpt-5-mini-min # <- try with gpt-5-mini-high
# model: claude-4-5-sonnet-min # <- try with claude-4-5-sonnet-high
# model: claude-4-5-sonnet-min # <- try with claude-4-5-sonnet-high or claude-opus-4-6-adaptive
# model: gemini-2-5-flash-dynamic-thinking # <- try with -no-thinking, -low or -high variants
description: a helpful assistant that thinks
instruction: you are a helpful assistant who can also use tools, but only if you need to
Expand All @@ -29,15 +29,25 @@ models:
claude-4-5-sonnet-min:
provider: anthropic
model: claude-sonnet-4-5-20250929
thinking_budget: 1024 # <- tokens, 1024 is the minimum
thinking_budget: 1024 # <- explicit token budget (1024-32768) for older models

claude-4-5-sonnet-high:
provider: anthropic
model: claude-sonnet-4-5-20250929
thinking_budget: 32768 # <- tokens, 32768 is the Anthropic suggested maximum without batching
thinking_budget: 32768 # <- explicit token budget (32768 is the Anthropic suggested maximum)
provider_opts:
interleaved_thinking: true # <- enables interleaved thinking, aka tool calling during model reasoning

claude-opus-4-6-adaptive:
provider: anthropic
model: claude-opus-4-6
thinking_budget: adaptive # <- lets the model decide when and how much to think (recommended for 4.6)

claude-opus-4-6-low:
provider: anthropic
model: claude-opus-4-6
thinking_budget: low # <- adaptive thinking with low effort: "low", "medium", "high", "max"

gemini-2-5-flash-dynamic-thinking:
provider: google
model: gemini-2.5-flash
Expand Down
44 changes: 42 additions & 2 deletions pkg/config/latest/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -397,7 +397,10 @@ type ModelConfig struct {
TrackUsage *bool `json:"track_usage,omitempty"`
// ThinkingBudget controls reasoning effort/budget:
// - For OpenAI: accepts string levels "minimal", "low", "medium", "high"
// - For Anthropic: accepts integer token budget (1024-32000)
// - For Anthropic: accepts integer token budget (1024-32000), "adaptive",
// or string levels "low", "medium", "high", "max" (uses adaptive thinking with effort)
// - For Bedrock Claude: accepts integer token budget or string levels
// "minimal", "low", "medium", "high" (mapped to token budgets via EffortTokens)
// - For other providers: may be ignored
ThinkingBudget *ThinkingBudget `json:"thinking_budget,omitempty"`
// Routing defines rules for routing requests to different models.
Expand Down Expand Up @@ -670,6 +673,7 @@ func (d DeferConfig) MarshalYAML() (any, error) {
// ThinkingBudget represents reasoning budget configuration.
// It accepts either a string effort level or an integer token budget:
// - String: "minimal", "low", "medium", "high" (for OpenAI)
// - String: "adaptive" (for Anthropic models that support adaptive thinking)
// - Integer: token count (for Anthropic, range 1024-32768)
type ThinkingBudget struct {
// Effort stores string-based reasoning effort levels
Expand Down Expand Up @@ -717,14 +721,50 @@ func (t ThinkingBudget) MarshalYAML() (any, error) {
// NOT disabled when:
// - Tokens > 0 or Tokens == -1 (explicit token budget)
// - Effort is a real level like "medium" or "high"
// - Effort is "adaptive"
func (t *ThinkingBudget) IsDisabled() bool {
if t == nil {
return false
}
if t.Tokens == 0 && t.Effort == "" {
return true
}
return t.Effort == "none"
return strings.EqualFold(t.Effort, "none")
}

// IsAdaptive returns true if the thinking budget is set to adaptive mode.
// Adaptive thinking lets the model decide how much thinking to do.
func (t *ThinkingBudget) IsAdaptive() bool {
if t == nil {
return false
}
return strings.EqualFold(t.Effort, "adaptive")
}

// EffortTokens maps a string effort level to a token budget for providers
// that only support token-based thinking (e.g. Bedrock Claude).
//
// The Anthropic direct API uses adaptive thinking + output_config.effort
// for string levels instead; see anthropicEffort in the anthropic package.
//
// Returns (tokens, true) when a mapping exists, or (0, false) when
// the budget uses an explicit token count or an unrecognised effort string.
func (t *ThinkingBudget) EffortTokens() (int, bool) {
if t == nil || t.Effort == "" {
return 0, false
}
switch strings.ToLower(strings.TrimSpace(t.Effort)) {
case "minimal":
return 1024, true
case "low":
return 2048, true
case "medium":
return 8192, true
case "high":
return 16384, true
default:
return 0, false
}
}

// MarshalJSON implements custom marshaling to output simple string or int format
Expand Down
71 changes: 71 additions & 0 deletions pkg/config/latest/types_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,77 @@ func TestThinkingBudget_MarshalUnmarshal_Zero(t *testing.T) {
require.Equal(t, "thinking_budget: 0\n", string(output))
}

func TestThinkingBudget_IsDisabled(t *testing.T) {
t.Parallel()

for _, tt := range []struct {
name string
b *ThinkingBudget
want bool
}{
{"nil", nil, false},
{"zero tokens", &ThinkingBudget{Tokens: 0}, true},
{"none effort", &ThinkingBudget{Effort: "none"}, true},
{"positive tokens", &ThinkingBudget{Tokens: 8192}, false},
{"medium effort", &ThinkingBudget{Effort: "medium"}, false},
{"adaptive effort", &ThinkingBudget{Effort: "adaptive"}, false},
{"negative tokens (dynamic)", &ThinkingBudget{Tokens: -1}, false},
} {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
require.Equal(t, tt.want, tt.b.IsDisabled())
})
}
}

func TestThinkingBudget_IsAdaptive(t *testing.T) {
t.Parallel()

for _, tt := range []struct {
name string
b *ThinkingBudget
want bool
}{
{"nil", nil, false},
{"adaptive", &ThinkingBudget{Effort: "adaptive"}, true},
{"medium", &ThinkingBudget{Effort: "medium"}, false},
{"tokens", &ThinkingBudget{Tokens: 8192}, false},
} {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
require.Equal(t, tt.want, tt.b.IsAdaptive())
})
}
}

func TestThinkingBudget_EffortTokens(t *testing.T) {
t.Parallel()

for _, tt := range []struct {
name string
b *ThinkingBudget
wantTokens int
wantOK bool
}{
{"nil", nil, 0, false},
{"minimal", &ThinkingBudget{Effort: "minimal"}, 1024, true},
{"low", &ThinkingBudget{Effort: "low"}, 2048, true},
{"medium", &ThinkingBudget{Effort: "medium"}, 8192, true},
{"high", &ThinkingBudget{Effort: "high"}, 16384, true},
{"adaptive", &ThinkingBudget{Effort: "adaptive"}, 0, false},
{"none", &ThinkingBudget{Effort: "none"}, 0, false},
{"explicit tokens", &ThinkingBudget{Tokens: 4096}, 0, false},
{"empty effort", &ThinkingBudget{}, 0, false},
} {
t.Run(tt.name, func(t *testing.T) {
t.Parallel()
tokens, ok := tt.b.EffortTokens()
require.Equal(t, tt.wantOK, ok)
require.Equal(t, tt.wantTokens, tokens)
})
}
}

func TestAgents_UnmarshalYAML_RejectsUnknownFields(t *testing.T) {
t.Parallel()

Expand Down
44 changes: 31 additions & 13 deletions pkg/model/provider/anthropic/beta_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -95,20 +95,38 @@ func (c *Client) createBetaStream(
// For interleaved thinking to make sense, we use a default of 16384 tokens for the thinking budget
thinkingEnabled := c.ModelOptions.Thinking() == nil || *c.ModelOptions.Thinking()
if thinkingEnabled {
thinkingTokens := int64(16384)
if c.ModelConfig.ThinkingBudget != nil {
thinkingTokens = int64(c.ModelConfig.ThinkingBudget.Tokens)
if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.IsAdaptive() {
// Adaptive thinking: let the model decide how much thinking to do
adaptive := anthropic.NewBetaThinkingConfigAdaptiveParam()
params.Thinking = anthropic.BetaThinkingConfigParamUnion{
OfAdaptive: &adaptive,
}
slog.Debug("Anthropic Beta API using adaptive thinking")
} else if effort, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok {
// Effort level: use adaptive thinking + output_config.effort
adaptive := anthropic.NewBetaThinkingConfigAdaptiveParam()
params.Thinking = anthropic.BetaThinkingConfigParamUnion{
OfAdaptive: &adaptive,
}
params.OutputConfig.Effort = anthropic.BetaOutputConfigEffort(effort)
slog.Debug("Anthropic Beta API using adaptive thinking with effort",
"effort", effort)
} else {
slog.Info("Anthropic Beta API using default thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens)
}
switch {
case thinkingTokens >= 1024 && thinkingTokens < maxTokens:
params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(thinkingTokens)
slog.Debug("Anthropic Beta API using thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens)
case thinkingTokens >= maxTokens:
slog.Warn("Anthropic Beta API thinking_budget must be less than max_tokens, ignoring", "tokens", thinkingTokens, "max_tokens", maxTokens)
default:
slog.Warn("Anthropic Beta API thinking_budget below minimum (1024), ignoring", "tokens", thinkingTokens)
thinkingTokens := int64(16384)
if c.ModelConfig.ThinkingBudget != nil {
thinkingTokens = int64(c.ModelConfig.ThinkingBudget.Tokens)
} else {
slog.Info("Anthropic Beta API using default thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens)
}
switch {
case thinkingTokens >= 1024 && thinkingTokens < maxTokens:
params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(thinkingTokens)
slog.Debug("Anthropic Beta API using thinking_budget with interleaved thinking", "budget_tokens", thinkingTokens)
case thinkingTokens >= maxTokens:
slog.Warn("Anthropic Beta API thinking_budget must be less than max_tokens, ignoring", "tokens", thinkingTokens, "max_tokens", maxTokens)
default:
slog.Warn("Anthropic Beta API thinking_budget below minimum (1024), ignoring", "tokens", thinkingTokens)
}
}
} else {
slog.Debug("Anthropic Beta API: Thinking disabled via /think command")
Expand Down
56 changes: 54 additions & 2 deletions pkg/model/provider/anthropic/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,23 @@ func (c *Client) getResponseTrailer() http.Header {
// adjustMaxTokensForThinking checks if max_tokens needs adjustment for thinking_budget.
// Anthropic's max_tokens represents the combined budget for thinking + output tokens.
// Returns the adjusted maxTokens value and an error if user-set max_tokens is too low.
//
// This only applies to fixed token budgets. Adaptive thinking and effort-based
// budgets don't need adjustment since the model manages its own thinking allocation.
func (c *Client) adjustMaxTokensForThinking(maxTokens int64) (int64, error) {
if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.Tokens <= 0 {
if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.IsAdaptive() {
return maxTokens, nil
}
// Effort-based budgets use adaptive thinking — no token adjustment needed.
if _, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok {
return maxTokens, nil
}

thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens)
if thinkingTokens <= 0 {
return maxTokens, nil
}

minRequired := thinkingTokens + 1024 // configured thinking budget + minimum output buffer

if maxTokens <= thinkingTokens {
Expand Down Expand Up @@ -297,7 +308,25 @@ func (c *Client) CreateChatCompletionStream(

// Apply thinking budget first, as it affects whether we can set temperature
thinkingEnabled := false
if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.Tokens > 0 {
if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.IsAdaptive() {
// Adaptive thinking: let the model decide how much thinking to do
adaptive := anthropic.NewThinkingConfigAdaptiveParam()
params.Thinking = anthropic.ThinkingConfigParamUnion{
OfAdaptive: &adaptive,
}
thinkingEnabled = true
slog.Debug("Anthropic API using adaptive thinking (standard messages)")
} else if effort, ok := anthropicEffort(c.ModelConfig.ThinkingBudget); ok {
// Effort level: use adaptive thinking + output_config.effort
adaptive := anthropic.NewThinkingConfigAdaptiveParam()
params.Thinking = anthropic.ThinkingConfigParamUnion{
OfAdaptive: &adaptive,
}
params.OutputConfig.Effort = anthropic.OutputConfigEffort(effort)
thinkingEnabled = true
slog.Debug("Anthropic API using adaptive thinking with effort",
"effort", effort)
} else if c.ModelConfig.ThinkingBudget != nil && c.ModelConfig.ThinkingBudget.Tokens > 0 {
thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens)
switch {
case thinkingTokens >= 1024 && thinkingTokens < maxTokens:
Expand Down Expand Up @@ -895,6 +924,29 @@ func differenceIDs(a, b map[string]struct{}) []string {
return missing
}

// anthropicEffort maps a ThinkingBudget effort string to an Anthropic API
// effort level ("low", "medium", "high", "max"). Returns ("", false) when
// the budget uses token counts, adaptive mode, or an unrecognised string.
func anthropicEffort(b *latest.ThinkingBudget) (string, bool) {
if b == nil {
return "", false
}
switch strings.ToLower(strings.TrimSpace(b.Effort)) {
case "low":
return "low", true
case "minimal": // "minimal" is not in the Anthropic API; map to closest
return "low", true
case "medium":
return "medium", true
case "high":
return "high", true
case "max":
return "max", true
default:
return "", false
}
}

// anthropicContextLimit returns a reasonable default context window for Anthropic models.
// We default to 200k tokens, which is what 3.5-4.5 models support; adjust as needed over time.
func anthropicContextLimit(model string) int64 {
Expand Down
26 changes: 16 additions & 10 deletions pkg/model/provider/bedrock/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -275,16 +275,23 @@ func (c *Client) buildInferenceConfig() *types.InferenceConfiguration {
return cfg
}

// resolveThinkingTokens returns the effective token budget for thinking.
// It handles both explicit token counts and effort-level strings.
// Returns 0 if no valid thinking budget is configured.
func (c *Client) resolveThinkingTokens() int {
if c.ModelConfig.ThinkingBudget == nil {
return 0
}
if tokens, ok := c.ModelConfig.ThinkingBudget.EffortTokens(); ok {
return tokens
}
return c.ModelConfig.ThinkingBudget.Tokens
}

// isThinkingEnabled mirrors the validation in buildAdditionalModelRequestFields
// to determine if thinking params will affect inference config (temp/topP constraints).
func (c *Client) isThinkingEnabled() bool {
if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.Tokens <= 0 {
return false
}

tokens := c.ModelConfig.ThinkingBudget.Tokens

// Check minimum (Claude requires at least 1024 tokens for thinking)
tokens := c.resolveThinkingTokens()
if tokens < 1024 {
return false
}
Expand All @@ -310,12 +317,11 @@ func (c *Client) promptCachingEnabled() bool {

// buildAdditionalModelRequestFields configures Claude's extended thinking (reasoning) mode.
func (c *Client) buildAdditionalModelRequestFields() document.Interface {
if c.ModelConfig.ThinkingBudget == nil || c.ModelConfig.ThinkingBudget.Tokens <= 0 {
tokens := c.resolveThinkingTokens()
if tokens <= 0 {
return nil
}

tokens := c.ModelConfig.ThinkingBudget.Tokens

// Validate minimum (Claude requires at least 1024 tokens for thinking)
if tokens < 1024 {
slog.Warn("Bedrock thinking_budget below minimum (1024), ignoring",
Expand Down
Loading
Loading