diff --git a/intercept_anthropic_messages_base.go b/intercept_anthropic_messages_base.go index 35e8642..9c4550f 100644 --- a/intercept_anthropic_messages_base.go +++ b/intercept_anthropic_messages_base.go @@ -44,6 +44,16 @@ func (i *AnthropicMessagesInterceptionBase) injectTools() { return } + // Any existing tool definitions. + for _, tool := range i.req.Tools { + if tool.OfTool == nil { + continue + } + + // Explicitly unset all cache control settings, we'll set one at the end. + tool.OfTool.CacheControl = anthropic.CacheControlEphemeralParam{} + } + // Inject tools. for _, tool := range i.mcpProxy.ListTools() { i.req.Tools = append(i.req.Tools, anthropic.ToolUnionParam{ @@ -55,10 +65,18 @@ func (i *AnthropicMessagesInterceptionBase) injectTools() { Name: tool.ID, Description: anthropic.String(tool.Description), Type: anthropic.ToolTypeCustom, + // Explicitly unset all cache control settings, we'll set one at the end. + CacheControl: anthropic.CacheControlEphemeralParam{}, }, }) } + // See https://docs.claude.com/en/docs/build-with-claude/prompt-caching. + // "The cache_control parameter on the last tool definition caches all tool definitions." + if count := len(i.req.Tools); count > 0 { + i.req.Tools[count-1].OfTool.CacheControl = anthropic.NewCacheControlEphemeralParam() + } + // Note: Parallel tool calls are disabled to avoid tool_use/tool_result block mismatches. i.req.ToolChoice = anthropic.ToolChoiceUnionParam{ OfAny: &anthropic.ToolChoiceAnyParam{ @@ -68,6 +86,28 @@ func (i *AnthropicMessagesInterceptionBase) injectTools() { } } +// removeUnnecessaryCacheMarkers removes any cache control settings which are unnecessarily set by the client +// and/or may interfere with the cache control we need to implement ourselves. +func (i *AnthropicMessagesInterceptionBase) removeUnnecessaryCacheMarkers() { + if i.req == nil { + return + } + + // Explicitly unset any cache control markers on "assistant" messages; these should never be set + // since it's more beneficial for us to cache tool definitions, and Anthropic only allows for 4 + // cache markers... + // https://docs.claude.com/en/docs/build-with-claude/prompt-caching#when-to-use-multiple-breakpoints + for _, msg := range i.req.Messages { + if msg.Role == anthropic.MessageParamRoleAssistant { + for _, c := range msg.Content { + if c.OfText != nil { + c.OfText.CacheControl = anthropic.CacheControlEphemeralParam{} + } + } + } + } +} + // isSmallFastModel checks if the model is a small/fast model (Haiku 3.5). // These models are optimized for tasks like code autocomplete and other small, quick operations. // See `ANTHROPIC_SMALL_FAST_MODEL`: https://docs.anthropic.com/en/docs/claude-code/settings#environment-variables diff --git a/intercept_anthropic_messages_blocking.go b/intercept_anthropic_messages_blocking.go index ccfff75..77348e0 100644 --- a/intercept_anthropic_messages_blocking.go +++ b/intercept_anthropic_messages_blocking.go @@ -41,6 +41,7 @@ func (i *AnthropicMessagesBlockingInterception) ProcessRequest(w http.ResponseWr ctx := r.Context() + i.removeUnnecessaryCacheMarkers() i.injectTools() var ( diff --git a/intercept_anthropic_messages_streaming.go b/intercept_anthropic_messages_streaming.go index 4437a44..3f8a0cc 100644 --- a/intercept_anthropic_messages_streaming.go +++ b/intercept_anthropic_messages_streaming.go @@ -80,6 +80,7 @@ func (i *AnthropicMessagesStreamingInterception) ProcessRequest(w http.ResponseW logger.Warn(ctx, "failed to determine last user prompt", slog.Error(err)) } + i.removeUnnecessaryCacheMarkers() // Only inject tools into "actual" request. i.injectTools() }