Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion agent-schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -634,7 +634,7 @@
},
"provider_opts": {
"type": "object",
"description": "Provider-specific options. Sampling parameters: top_k (integer, supported by anthropic, google, amazon-bedrock, and custom OpenAI-compatible providers like vLLM/Ollama), repetition_penalty (float, forwarded to custom OpenAI-compatible providers), min_p (float, forwarded to custom providers), seed (integer, forwarded to OpenAI). Infrastructure options: dmr: runtime_flags. anthropic/amazon-bedrock (Claude): interleaved_thinking (boolean, default true). openai: transport ('sse' or 'websocket') to choose between SSE and WebSocket streaming for the Responses API. openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance). Google: google_search (boolean) enables Google Search grounding, google_maps (boolean) enables Google Maps grounding, code_execution (boolean) enables server-side code execution.",
"description": "Provider-specific options. Sampling parameters: top_k (integer, supported by anthropic, google, amazon-bedrock, and custom OpenAI-compatible providers like vLLM/Ollama), repetition_penalty (float, forwarded to custom OpenAI-compatible providers), min_p (float, forwarded to custom providers), seed (integer, forwarded to OpenAI). Infrastructure options: dmr: runtime_flags. anthropic/amazon-bedrock (Claude): interleaved_thinking (boolean, default true), thinking_display ('summarized', 'omitted', or 'display') controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking by default ('omitted'); set thinking_display: summarized (or thinking_display: display) to receive thinking blocks. openai: transport ('sse' or 'websocket') to choose between SSE and WebSocket streaming for the Responses API. openai/anthropic/google: rerank_prompt (string) to fully override the system prompt used for RAG reranking (advanced - prefer using results.reranking.criteria for domain-specific guidance). Google: google_search (boolean) enables Google Search grounding, google_maps (boolean) enables Google Maps grounding, code_execution (boolean) enables server-side code execution.",
"additionalProperties": true
},
"track_usage": {
Expand Down
16 changes: 16 additions & 0 deletions docs/configuration/models/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,22 @@ models:
interleaved_thinking: false # disable if needed
```

## Thinking Display (Anthropic)

For Anthropic Claude models, `thinking_display` controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking content by default (`omitted`); set this provider option to receive summarized thinking:

```yaml
models:
opus-4-7:
provider: anthropic
model: claude-opus-4-7
thinking_budget: adaptive
provider_opts:
thinking_display: summarized # "summarized", "display", or "omitted"
```

See the [Anthropic provider page](/providers/anthropic/#thinking-display) for details.

## Examples by Provider

```yaml
Expand Down
22 changes: 22 additions & 0 deletions docs/providers/anthropic/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,28 @@ Object form (forward-compatible with future budget types):

See the full schema on the [Model Configuration]({{ '/configuration/models/#task-budget' | relative_url }}) page.

## Thinking Display

Controls whether thinking blocks are returned in responses when thinking is enabled. Claude Opus 4.7 hides thinking content by default (`omitted`); earlier Claude 4 models default to `summarized`. Set `thinking_display` in `provider_opts` to override:

```yaml
models:
claude-opus-4-7:
provider: anthropic
model: claude-opus-4-7
thinking_budget: adaptive
provider_opts:
thinking_display: summarized # "summarized", "display", or "omitted"
```

Valid values:

- `summarized`: thinking blocks are returned with summarized thinking text (default for Claude 4 models prior to Opus 4.7).
- `display`: thinking blocks are returned for display (use this to re-enable thinking output on Opus 4.7).
- `omitted`: thinking blocks are returned with an empty thinking field; the signature is still returned for multi-turn continuity (default for Opus 4.7). Useful to reduce time-to-first-text-token when streaming.

Note: `thinking_display` applies to both `thinking_budget` with token counts and adaptive/effort-based budgets. Full thinking tokens are billed regardless of the `thinking_display` value.

<div class="callout callout-info" markdown="1">
<div class="callout-title">ℹ️ Note
</div>
Expand Down
7 changes: 7 additions & 0 deletions examples/thinking_budget.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,13 @@ models:
model: claude-opus-4-6
thinking_budget: low # <- adaptive thinking with low effort: "low", "medium", "high", "max"

claude-opus-4-7-summarized:
provider: anthropic
model: claude-opus-4-6 # <- Opus 4.7 hides thinking by default; use the same flag with any recent Claude model
thinking_budget: adaptive
provider_opts:
thinking_display: summarized # <- "summarized", "display", or "omitted" (Opus 4.7 defaults to omitted)

gemini-2-5-flash-dynamic-thinking:
provider: google
model: gemini-2.5-flash
Expand Down
12 changes: 1 addition & 11 deletions pkg/model/provider/anthropic/beta_client.go
Original file line number Diff line number Diff line change
Expand Up @@ -90,17 +90,7 @@ func (c *Client) createBetaStream(
// Configure thinking if a thinking budget is set in the model config.
// The beta client is also used for structured output and file attachments,
// which don't require thinking.
if budget := c.ModelConfig.ThinkingBudget; budget != nil {
if effort, ok := anthropicThinkingEffort(budget); ok {
adaptive := anthropic.BetaThinkingConfigAdaptiveParam{}
params.Thinking = anthropic.BetaThinkingConfigParamUnion{OfAdaptive: &adaptive}
params.OutputConfig.Effort = anthropic.BetaOutputConfigEffort(effort)
slog.Debug("Anthropic Beta API using adaptive thinking", "effort", effort)
} else if tokens, ok := validThinkingTokens(int64(budget.Tokens), maxTokens); ok {
params.Thinking = anthropic.BetaThinkingConfigParamOfEnabled(tokens)
slog.Debug("Anthropic Beta API using thinking_budget", "budget_tokens", tokens)
}
}
c.applyBetaThinkingConfig(&params, maxTokens)

// Forward task_budget via `output_config.task_budget` (Anthropic
// Opus 4.7+) and enable the corresponding beta header. Older Claude
Expand Down
121 changes: 1 addition & 120 deletions pkg/model/provider/anthropic/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@ import (

"github.com/docker/docker-agent/pkg/chat"
"github.com/docker/docker-agent/pkg/config/latest"
"github.com/docker/docker-agent/pkg/effort"
"github.com/docker/docker-agent/pkg/environment"
"github.com/docker/docker-agent/pkg/httpclient"
"github.com/docker/docker-agent/pkg/model/provider/base"
Expand All @@ -35,79 +34,6 @@ type Client struct {
fileManager *FileManager
}

// adjustMaxTokensForThinking checks if max_tokens needs adjustment for thinking_budget.
// Anthropic's max_tokens represents the combined budget for thinking + output tokens.
// Returns the adjusted maxTokens value and an error if user-set max_tokens is too low.
//
// Only fixed token budgets need adjustment. Adaptive and effort-based budgets
// don't need it since the model manages its own thinking allocation.
func (c *Client) adjustMaxTokensForThinking(maxTokens int64) (int64, error) {
if c.ModelConfig.ThinkingBudget == nil {
return maxTokens, nil
}
// Adaptive and effort-based budgets: no token adjustment needed.
if _, ok := anthropicThinkingEffort(c.ModelConfig.ThinkingBudget); ok {
return maxTokens, nil
}

thinkingTokens := int64(c.ModelConfig.ThinkingBudget.Tokens)
if thinkingTokens <= 0 {
return maxTokens, nil
}

minRequired := thinkingTokens + 1024 // configured thinking budget + minimum output buffer

if maxTokens <= thinkingTokens {
userSetMaxTokens := c.ModelConfig.MaxTokens != nil
if userSetMaxTokens {
// User explicitly set max_tokens too low - return error
slog.Error("Anthropic: max_tokens must be greater than thinking_budget",
"max_tokens", maxTokens,
"thinking_budget", thinkingTokens)
return 0, fmt.Errorf("anthropic: max_tokens (%d) must be greater than thinking_budget (%d); increase max_tokens to at least %d",
maxTokens, thinkingTokens, minRequired)
}
// Auto-adjust when user didn't set max_tokens
slog.Info("Anthropic: auto-adjusting max_tokens to accommodate thinking_budget",
"original_max_tokens", maxTokens,
"thinking_budget", thinkingTokens,
"new_max_tokens", minRequired)
// return the configured thinking budget + 8192 because that's the default
// max_tokens value for anthropic models when unspecified by the user
return thinkingTokens + 8192, nil
}

return maxTokens, nil
}

// interleavedThinkingEnabled returns false unless explicitly enabled via
// models:provider_opts:interleaved_thinking: true
func (c *Client) interleavedThinkingEnabled() bool {
// Default to false if not provided
if c == nil || len(c.ModelConfig.ProviderOpts) == 0 {
return false
}
v, ok := c.ModelConfig.ProviderOpts["interleaved_thinking"]
if !ok {
return false
}
switch t := v.(type) {
case bool:
return t
case string:
s := strings.TrimSpace(strings.ToLower(t))
return s != "false" && s != "0" && s != "no"
case int:
return t != 0
case int64:
return t != 0
case float64:
return t != 0
default:
return false
}
}

// NewClient creates a new Anthropic client from the provided configuration
func NewClient(ctx context.Context, cfg *latest.ModelConfig, env environment.Provider, opts ...options.Opt) (*Client, error) {
if cfg == nil {
Expand Down Expand Up @@ -288,20 +214,7 @@ func (c *Client) CreateChatCompletionStream(
}

// Apply thinking budget first, as it affects whether we can set temperature
thinkingEnabled := false
if budget := c.ModelConfig.ThinkingBudget; budget != nil {
if effortStr, ok := anthropicThinkingEffort(budget); ok {
adaptive := anthropic.ThinkingConfigAdaptiveParam{}
params.Thinking = anthropic.ThinkingConfigParamUnion{OfAdaptive: &adaptive}
params.OutputConfig.Effort = anthropic.OutputConfigEffort(effortStr)
thinkingEnabled = true
slog.Debug("Anthropic API using adaptive thinking", "effort", effortStr)
} else if tokens, ok := validThinkingTokens(int64(budget.Tokens), maxTokens); ok {
params.Thinking = anthropic.ThinkingConfigParamOfEnabled(tokens)
thinkingEnabled = true
slog.Debug("Anthropic API using thinking_budget", "budget_tokens", tokens)
}
}
thinkingEnabled := c.applyThinkingConfig(&params, maxTokens)

// Temperature and TopP cannot be set when extended thinking is enabled
// (Anthropic requires temperature=1.0 which is the default when thinking is on)
Expand Down Expand Up @@ -753,38 +666,6 @@ func contentArray(m map[string]any) []any {
return nil
}

// validThinkingTokens validates that the token budget is within the
// acceptable range for Anthropic (>= 1024 and < maxTokens).
// Returns (tokens, true) if valid, or (0, false) with a warning log if not.
func validThinkingTokens(tokens, maxTokens int64) (int64, bool) {
if tokens < 1024 {
slog.Warn("Anthropic thinking_budget below minimum (1024), ignoring", "tokens", tokens)
return 0, false
}
if tokens >= maxTokens {
slog.Warn("Anthropic thinking_budget must be less than max_tokens, ignoring", "tokens", tokens, "max_tokens", maxTokens)
return 0, false
}
return tokens, true
}

// anthropicThinkingEffort returns the Anthropic API effort level for the given
// ThinkingBudget. It covers both explicit adaptive mode and string effort
// levels. Returns ("", false) when the budget uses token counts or is nil.
func anthropicThinkingEffort(b *latest.ThinkingBudget) (string, bool) {
if b == nil {
return "", false
}
if e, ok := b.AdaptiveEffort(); ok {
return e, true
}
l, ok := b.EffortLevel()
if !ok {
return "", false
}
return effort.ForAnthropic(l)
}

// anthropicContextLimit returns a reasonable default context window for Anthropic models.
// We default to 200k tokens, which is what 3.5-4.5 models support; adjust as needed over time.
func anthropicContextLimit(model string) int64 {
Expand Down
Loading
Loading