From f5ec1b5e7be36adf9a1ce0d40b3f53cb711add88 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Wed, 22 Oct 2025 14:32:31 +0200 Subject: [PATCH 1/3] chore: set cache control markers on tools Signed-off-by: Danny Kopping --- intercept_anthropic_messages_base.go | 22 ++++++++++++++++++++++ intercept_anthropic_messages_streaming.go | 14 ++++++++++++++ 2 files changed, 36 insertions(+) diff --git a/intercept_anthropic_messages_base.go b/intercept_anthropic_messages_base.go index 35e8642..2a9a8f8 100644 --- a/intercept_anthropic_messages_base.go +++ b/intercept_anthropic_messages_base.go @@ -1,9 +1,13 @@ package aibridge import ( + "fmt" + "net/http" + "net/http/httputil" "strings" "github.com/anthropics/anthropic-sdk-go" + "github.com/anthropics/anthropic-sdk-go/option" "github.com/coder/aibridge/mcp" "github.com/google/uuid" @@ -44,6 +48,16 @@ func (i *AnthropicMessagesInterceptionBase) injectTools() { return } + // Any existing tool definitions. + for _, tool := range i.req.Tools { + if tool.OfTool == nil { + continue + } + + // Explicitly unset all cache control settings, we'll set one at the end. + tool.OfTool.CacheControl = anthropic.CacheControlEphemeralParam{} + } + // Inject tools. for _, tool := range i.mcpProxy.ListTools() { i.req.Tools = append(i.req.Tools, anthropic.ToolUnionParam{ @@ -55,10 +69,18 @@ func (i *AnthropicMessagesInterceptionBase) injectTools() { Name: tool.ID, Description: anthropic.String(tool.Description), Type: anthropic.ToolTypeCustom, + // Explicitly unset all cache control settings, we'll set one at the end. + CacheControl: anthropic.CacheControlEphemeralParam{}, }, }) } + // See https://docs.claude.com/en/docs/build-with-claude/prompt-caching. + // "The cache_control parameter on the last tool definition caches all tool definitions." + if count := len(i.req.Tools); count > 0 { + i.req.Tools[count-1].OfTool.CacheControl = anthropic.NewCacheControlEphemeralParam() + } + // Note: Parallel tool calls are disabled to avoid tool_use/tool_result block mismatches. i.req.ToolChoice = anthropic.ToolChoiceUnionParam{ OfAny: &anthropic.ToolChoiceAnyParam{ diff --git a/intercept_anthropic_messages_streaming.go b/intercept_anthropic_messages_streaming.go index 4437a44..bd15ae1 100644 --- a/intercept_anthropic_messages_streaming.go +++ b/intercept_anthropic_messages_streaming.go @@ -61,6 +61,20 @@ func (i *AnthropicMessagesStreamingInterception) ProcessRequest(w http.ResponseW return fmt.Errorf("developer error: req is nil") } + // Explicitly unset any cache control markers on "assistant" messages; these should never be set + // since it's more beneficial for us to cache tool definitions, and Anthropic only allows for 4 + // cache markers... + // https://docs.claude.com/en/docs/build-with-claude/prompt-caching#when-to-use-multiple-breakpoints + for _, msg := range i.req.Messages { + if msg.Role == anthropic.MessageParamRoleAssistant { + for _, c := range msg.Content { + if c.OfText != nil { + c.OfText.CacheControl = anthropic.CacheControlEphemeralParam{} + } + } + } + } + // Allow us to interrupt watch via cancel. ctx, cancel := context.WithCancel(r.Context()) defer cancel() From 263508afe78d8b462f8f9a32d9f4ca95d14eeaf3 Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 23 Oct 2025 18:03:23 +0200 Subject: [PATCH 2/3] chore: fix imports Signed-off-by: Danny Kopping --- intercept_anthropic_messages_base.go | 4 ---- 1 file changed, 4 deletions(-) diff --git a/intercept_anthropic_messages_base.go b/intercept_anthropic_messages_base.go index 2a9a8f8..58317d9 100644 --- a/intercept_anthropic_messages_base.go +++ b/intercept_anthropic_messages_base.go @@ -1,13 +1,9 @@ package aibridge import ( - "fmt" - "net/http" - "net/http/httputil" "strings" "github.com/anthropics/anthropic-sdk-go" - "github.com/anthropics/anthropic-sdk-go/option" "github.com/coder/aibridge/mcp" "github.com/google/uuid" From 33200b6e986af8776c4b22fde10f17d58d09fbbd Mon Sep 17 00:00:00 2001 From: Danny Kopping Date: Thu, 23 Oct 2025 18:14:33 +0200 Subject: [PATCH 3/3] chore: implement in both streaming/blocking cases Signed-off-by: Danny Kopping --- intercept_anthropic_messages_base.go | 22 ++++++++++++++++++++++ intercept_anthropic_messages_blocking.go | 1 + intercept_anthropic_messages_streaming.go | 15 +-------------- 3 files changed, 24 insertions(+), 14 deletions(-) diff --git a/intercept_anthropic_messages_base.go b/intercept_anthropic_messages_base.go index 58317d9..9c4550f 100644 --- a/intercept_anthropic_messages_base.go +++ b/intercept_anthropic_messages_base.go @@ -86,6 +86,28 @@ func (i *AnthropicMessagesInterceptionBase) injectTools() { } } +// removeUnnecessaryCacheMarkers removes any cache control settings which are unnecessarily set by the client +// and/or may interfere with the cache control we need to implement ourselves. +func (i *AnthropicMessagesInterceptionBase) removeUnnecessaryCacheMarkers() { + if i.req == nil { + return + } + + // Explicitly unset any cache control markers on "assistant" messages; these should never be set + // since it's more beneficial for us to cache tool definitions, and Anthropic only allows for 4 + // cache markers... + // https://docs.claude.com/en/docs/build-with-claude/prompt-caching#when-to-use-multiple-breakpoints + for _, msg := range i.req.Messages { + if msg.Role == anthropic.MessageParamRoleAssistant { + for _, c := range msg.Content { + if c.OfText != nil { + c.OfText.CacheControl = anthropic.CacheControlEphemeralParam{} + } + } + } + } +} + // isSmallFastModel checks if the model is a small/fast model (Haiku 3.5). // These models are optimized for tasks like code autocomplete and other small, quick operations. // See `ANTHROPIC_SMALL_FAST_MODEL`: https://docs.anthropic.com/en/docs/claude-code/settings#environment-variables diff --git a/intercept_anthropic_messages_blocking.go b/intercept_anthropic_messages_blocking.go index ccfff75..77348e0 100644 --- a/intercept_anthropic_messages_blocking.go +++ b/intercept_anthropic_messages_blocking.go @@ -41,6 +41,7 @@ func (i *AnthropicMessagesBlockingInterception) ProcessRequest(w http.ResponseWr ctx := r.Context() + i.removeUnnecessaryCacheMarkers() i.injectTools() var ( diff --git a/intercept_anthropic_messages_streaming.go b/intercept_anthropic_messages_streaming.go index bd15ae1..3f8a0cc 100644 --- a/intercept_anthropic_messages_streaming.go +++ b/intercept_anthropic_messages_streaming.go @@ -61,20 +61,6 @@ func (i *AnthropicMessagesStreamingInterception) ProcessRequest(w http.ResponseW return fmt.Errorf("developer error: req is nil") } - // Explicitly unset any cache control markers on "assistant" messages; these should never be set - // since it's more beneficial for us to cache tool definitions, and Anthropic only allows for 4 - // cache markers... - // https://docs.claude.com/en/docs/build-with-claude/prompt-caching#when-to-use-multiple-breakpoints - for _, msg := range i.req.Messages { - if msg.Role == anthropic.MessageParamRoleAssistant { - for _, c := range msg.Content { - if c.OfText != nil { - c.OfText.CacheControl = anthropic.CacheControlEphemeralParam{} - } - } - } - } - // Allow us to interrupt watch via cancel. ctx, cancel := context.WithCancel(r.Context()) defer cancel() @@ -94,6 +80,7 @@ func (i *AnthropicMessagesStreamingInterception) ProcessRequest(w http.ResponseW logger.Warn(ctx, "failed to determine last user prompt", slog.Error(err)) } + i.removeUnnecessaryCacheMarkers() // Only inject tools into "actual" request. i.injectTools() }