diff --git a/src/common/constants/providers.test.ts b/src/common/constants/providers.test.ts index 16b417307..f6732abaa 100644 --- a/src/common/constants/providers.test.ts +++ b/src/common/constants/providers.test.ts @@ -10,11 +10,13 @@ describe("Provider Registry", () => { expect(Object.keys(PROVIDER_REGISTRY).length).toBeGreaterThan(0); }); - test("all registry values are import functions", () => { - // Registry should map provider names to async import functions + test("all registry values are import functions that return promises", () => { + // Registry should map provider names to functions returning promises for (const importFn of Object.values(PROVIDER_REGISTRY)) { expect(typeof importFn).toBe("function"); - expect(importFn.constructor.name).toBe("AsyncFunction"); + // Verify calling the function returns a Promise (don't await - just type check) + const result = importFn(); + expect(result).toBeInstanceOf(Promise); } }); diff --git a/src/common/constants/providers.ts b/src/common/constants/providers.ts index 2650cf274..b3bd7ec55 100644 --- a/src/common/constants/providers.ts +++ b/src/common/constants/providers.ts @@ -9,56 +9,56 @@ /** * Dynamically import the Anthropic provider package */ -export async function importAnthropic() { - return await import("@ai-sdk/anthropic"); +export function importAnthropic() { + return import("@ai-sdk/anthropic"); } /** * Dynamically import the OpenAI provider package */ -export async function importOpenAI() { - return await import("@ai-sdk/openai"); +export function importOpenAI() { + return import("@ai-sdk/openai"); } /** * Dynamically import the Ollama provider package */ -export async function importOllama() { - return await import("ollama-ai-provider-v2"); +export function importOllama() { + return import("ollama-ai-provider-v2"); } /** * Dynamically import the Google provider package */ -export async function importGoogle() { - return await import("@ai-sdk/google"); +export function importGoogle() { + return import("@ai-sdk/google"); } /** * Dynamically import the OpenRouter provider package */ -export async function importOpenRouter() { - return await import("@openrouter/ai-sdk-provider"); +export function importOpenRouter() { + return import("@openrouter/ai-sdk-provider"); } /** * Dynamically import the xAI provider package */ -export async function importXAI() { - return await import("@ai-sdk/xai"); +export function importXAI() { + return import("@ai-sdk/xai"); } /** * Dynamically import the Amazon Bedrock provider package */ -export async function importBedrock() { +export function importBedrock() { return import("@ai-sdk/amazon-bedrock"); } /** * Dynamically import the Gateway provider from the AI SDK */ -export async function importMuxGateway() { +export function importMuxGateway() { return import("ai"); } diff --git a/src/common/utils/ai/cacheStrategy.test.ts b/src/common/utils/ai/cacheStrategy.test.ts index 5c837700c..24a25e64a 100644 --- a/src/common/utils/ai/cacheStrategy.test.ts +++ b/src/common/utils/ai/cacheStrategy.test.ts @@ -11,15 +11,22 @@ import { describe("cacheStrategy", () => { describe("supportsAnthropicCache", () => { - it("should return true for Anthropic models", () => { + it("should return true for direct Anthropic models", () => { expect(supportsAnthropicCache("anthropic:claude-3-5-sonnet-20241022")).toBe(true); expect(supportsAnthropicCache("anthropic:claude-3-5-haiku-20241022")).toBe(true); }); + it("should return true for gateway providers routing to Anthropic", () => { + expect(supportsAnthropicCache("mux-gateway:anthropic/claude-opus-4-5")).toBe(true); + expect(supportsAnthropicCache("mux-gateway:anthropic/claude-sonnet-4-5-20250514")).toBe(true); + expect(supportsAnthropicCache("openrouter:anthropic/claude-3.5-sonnet")).toBe(true); + }); + it("should return false for non-Anthropic models", () => { expect(supportsAnthropicCache("openai:gpt-4")).toBe(false); expect(supportsAnthropicCache("google:gemini-2.0")).toBe(false); expect(supportsAnthropicCache("openrouter:meta-llama/llama-3.1")).toBe(false); + expect(supportsAnthropicCache("mux-gateway:openai/gpt-5.1")).toBe(false); }); }); @@ -83,6 +90,46 @@ describe("cacheStrategy", () => { }); expect(result[1]).toEqual(messages[1]); // Last message unchanged }); + + it("should add cache control to last content part for array content", () => { + // Messages with array content (typical for user/assistant with multiple parts) + const messages: ModelMessage[] = [ + { + role: "user", + content: [ + { type: "text", text: "Hello" }, + { type: "text", text: "World" }, + ], + }, + { + role: "assistant", + content: [ + { type: "text", text: "Hi there!" }, + { type: "text", text: "How can I help?" }, + ], + }, + { role: "user", content: "Final question" }, + ]; + const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet"); + + expect(result[0]).toEqual(messages[0]); // First message unchanged + + // Second message (array content): cache control on LAST content part only + const secondMsg = result[1]; + expect(secondMsg.role).toBe("assistant"); + expect(Array.isArray(secondMsg.content)).toBe(true); + const content = secondMsg.content as Array<{ + type: string; + text: string; + providerOptions?: unknown; + }>; + expect(content[0].providerOptions).toBeUndefined(); // First part unchanged + expect(content[1].providerOptions).toEqual({ + anthropic: { cacheControl: { type: "ephemeral" } }, + }); // Last part has cache control + + expect(result[2]).toEqual(messages[2]); // Last message unchanged + }); }); describe("createCachedSystemMessage", () => { @@ -198,5 +245,48 @@ describe("cacheStrategy", () => { applyCacheControlToTools(mockTools, "anthropic:claude-3-5-sonnet"); expect(mockTools).toEqual(originalTools); }); + + it("should handle provider-defined tools without recreating them", () => { + // Provider-defined tools (like Anthropic's webSearch) have type: "provider-defined" + // and cannot be recreated with createTool() - they have special internal properties + const providerDefinedTool = { + type: "provider-defined" as const, + id: "web_search", + name: "web_search_20250305", + args: { maxUses: 1000 }, + // Note: no description or execute - these are handled internally by the SDK + }; + + const toolsWithProviderDefined: Record = { + readFile: tool({ + description: "Read a file", + inputSchema: z.object({ path: z.string() }), + execute: () => Promise.resolve({ success: true }), + }), + // Provider-defined tool as last tool (typical for Anthropic web search) + web_search: providerDefinedTool as unknown as Tool, + }; + + const result = applyCacheControlToTools( + toolsWithProviderDefined, + "anthropic:claude-3-5-sonnet" + ); + + // Verify all tools are present + expect(Object.keys(result)).toEqual(Object.keys(toolsWithProviderDefined)); + + // First tool should be unchanged + expect(result.readFile).toEqual(toolsWithProviderDefined.readFile); + + // Provider-defined tool should have cache control added but retain its type + const cachedWebSearch = result.web_search as unknown as { + type: string; + providerOptions: unknown; + }; + expect(cachedWebSearch.type).toBe("provider-defined"); + expect(cachedWebSearch.providerOptions).toEqual({ + anthropic: { cacheControl: { type: "ephemeral" } }, + }); + }); }); }); diff --git a/src/common/utils/ai/cacheStrategy.ts b/src/common/utils/ai/cacheStrategy.ts index a2c1ccffb..70d82f5f3 100644 --- a/src/common/utils/ai/cacheStrategy.ts +++ b/src/common/utils/ai/cacheStrategy.ts @@ -1,15 +1,75 @@ -import type { ModelMessage, Tool } from "ai"; +import { tool as createTool, type ModelMessage, type Tool } from "ai"; /** - * Check if a model supports Anthropic cache control + * Check if a model supports Anthropic cache control. + * Matches: + * - Direct Anthropic provider: "anthropic:claude-opus-4-5" + * - Gateway providers routing to Anthropic: "mux-gateway:anthropic/claude-opus-4-5" + * - OpenRouter Anthropic models: "openrouter:anthropic/claude-3.5-sonnet" */ export function supportsAnthropicCache(modelString: string): boolean { - return modelString.startsWith("anthropic:"); + // Direct Anthropic provider + if (modelString.startsWith("anthropic:")) { + return true; + } + // Gateway/router providers routing to Anthropic (format: "provider:anthropic/model") + const [, modelId] = modelString.split(":"); + if (modelId?.startsWith("anthropic/")) { + return true; + } + return false; +} + +/** Cache control providerOptions for Anthropic */ +const ANTHROPIC_CACHE_CONTROL = { + anthropic: { + cacheControl: { type: "ephemeral" as const }, + }, +}; + +/** + * Add providerOptions to the last content part of a message. + * The SDK requires providerOptions on content parts, not on the message itself. + * + * For system messages with string content, we use message-level providerOptions + * (which the SDK handles correctly). For user/assistant messages with array + * content, we add providerOptions to the last content part. + */ +function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage { + const content = msg.content; + + // String content (typically system messages): use message-level providerOptions + // The SDK correctly translates this for system messages + if (typeof content === "string") { + return { + ...msg, + providerOptions: ANTHROPIC_CACHE_CONTROL, + }; + } + + // Array content: add providerOptions to the last part + // Use type assertion since we're adding providerOptions which is valid but not in base types + if (Array.isArray(content) && content.length > 0) { + const lastIndex = content.length - 1; + const newContent = content.map((part, i) => + i === lastIndex ? { ...part, providerOptions: ANTHROPIC_CACHE_CONTROL } : part + ); + // Type assertion needed: ModelMessage types are strict unions but providerOptions + // on content parts is valid per SDK docs + const result = { ...msg, content: newContent }; + return result as ModelMessage; + } + + // Empty or unexpected content: return as-is + return msg; } /** * Apply cache control to messages for Anthropic models. * Caches all messages except the last user message for optimal cache hits. + * + * NOTE: The SDK requires providerOptions on content parts, not on the message. + * We add cache_control to the last content part of the second-to-last message. */ export function applyCacheControl(messages: ModelMessage[], modelString: string): ModelMessage[] { // Only apply cache control for Anthropic models @@ -28,16 +88,7 @@ export function applyCacheControl(messages: ModelMessage[], modelString: string) return messages.map((msg, index) => { if (index === cacheIndex) { - return { - ...msg, - providerOptions: { - anthropic: { - cacheControl: { - type: "ephemeral" as const, - }, - }, - }, - }; + return addCacheControlToLastContentPart(msg); } return msg; }); @@ -77,6 +128,9 @@ export function createCachedSystemMessage( * 2. Conversation history (1 breakpoint) * 3. Last tool only (1 breakpoint) - caches all tools up to and including this one * = 3 total, leaving 1 for future use + * + * NOTE: The SDK requires providerOptions to be passed during tool() creation, + * not added afterwards. We re-create the last tool with providerOptions included. */ export function applyCacheControlToTools>( tools: T, @@ -95,23 +149,41 @@ export function applyCacheControlToTools>( // Anthropic caches everything up to the cache breakpoint, so marking // only the last tool will cache all tools const cachedTools = {} as unknown as T; - for (const [key, tool] of Object.entries(tools)) { + for (const [key, existingTool] of Object.entries(tools)) { if (key === lastToolKey) { - // Last tool gets cache control - const cachedTool = { - ...tool, - providerOptions: { - anthropic: { - cacheControl: { - type: "ephemeral" as const, + // For provider-defined tools (like Anthropic's webSearch), we cannot recreate them + // with createTool() - they have special properties. Instead, spread providerOptions + // directly onto the tool object. While this doesn't work for regular tools (SDK + // requires providerOptions at creation time), provider-defined tools handle it. + const isProviderDefinedTool = (existingTool as { type?: string }).type === "provider-defined"; + + if (isProviderDefinedTool) { + // Provider-defined tools: add providerOptions directly (SDK handles it differently) + cachedTools[key as keyof T] = { + ...existingTool, + providerOptions: { + anthropic: { + cacheControl: { type: "ephemeral" }, }, }, - }, - }; - cachedTools[key as keyof T] = cachedTool as unknown as T[keyof T]; + } as unknown as T[keyof T]; + } else { + // Regular tools: re-create with providerOptions (SDK requires this at creation time) + const cachedTool = createTool({ + description: existingTool.description, + inputSchema: existingTool.inputSchema, + execute: existingTool.execute, + providerOptions: { + anthropic: { + cacheControl: { type: "ephemeral" }, + }, + }, + }); + cachedTools[key as keyof T] = cachedTool as unknown as T[keyof T]; + } } else { - // Other tools are copied as-is (use unknown for type safety) - cachedTools[key as keyof T] = tool as unknown as T[keyof T]; + // Other tools are copied as-is + cachedTools[key as keyof T] = existingTool as unknown as T[keyof T]; } } diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts index 873e6a8c3..04527bcfc 100644 --- a/src/common/utils/tools/tools.ts +++ b/src/common/utils/tools/tools.ts @@ -125,7 +125,8 @@ export async function getToolsForModel( const { anthropic } = await import("@ai-sdk/anthropic"); allTools = { ...baseTools, - web_search: anthropic.tools.webSearch_20250305({ maxUses: 1000 }), + // Type assertion needed due to SDK version mismatch between ai and @ai-sdk/anthropic + web_search: anthropic.tools.webSearch_20250305({ maxUses: 1000 }) as Tool, }; break; } @@ -136,9 +137,10 @@ export async function getToolsForModel( const { openai } = await import("@ai-sdk/openai"); allTools = { ...baseTools, + // Type assertion needed due to SDK version mismatch between ai and @ai-sdk/openai web_search: openai.tools.webSearch({ searchContextSize: "high", - }), + }) as Tool, }; } break; diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index b9010fd36..2995d46fb 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -94,6 +94,63 @@ if (typeof globalFetchWithExtras.certificate === "function") { globalFetchWithExtras.certificate.bind(globalFetchWithExtras); } +/** + * Wrap fetch to inject Anthropic cache_control directly into the request body. + * The AI SDK's providerOptions.anthropic.cacheControl doesn't get translated + * to raw cache_control for tools or message content parts, so we inject it + * at the HTTP level. + * + * Injects cache_control on: + * 1. Last tool (caches all tool definitions) + * 2. Second-to-last message's last content part (caches conversation history) + */ +function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fetch { + const cachingFetch = async ( + input: Parameters[0], + init?: Parameters[1] + ): Promise => { + // Only modify POST requests with JSON body + if (init?.method?.toUpperCase() !== "POST" || typeof init?.body !== "string") { + return baseFetch(input, init); + } + + try { + const json = JSON.parse(init.body) as Record; + + // Inject cache_control on the last tool if tools array exists + if (Array.isArray(json.tools) && json.tools.length > 0) { + const lastTool = json.tools[json.tools.length - 1] as Record; + lastTool.cache_control ??= { type: "ephemeral" }; + } + + // Inject cache_control on second-to-last message's last content part + // This caches conversation history up to (but not including) the current user message + if (Array.isArray(json.messages) && json.messages.length >= 2) { + const secondToLastMsg = json.messages[json.messages.length - 2] as Record; + const content = secondToLastMsg.content; + + if (Array.isArray(content) && content.length > 0) { + // Array content: add cache_control to last part + const lastPart = content[content.length - 1] as Record; + lastPart.cache_control ??= { type: "ephemeral" }; + } + // Note: String content messages are rare after SDK conversion; skip for now + } + + // Update body with modified JSON + const newBody = JSON.stringify(json); + const headers = new Headers(init?.headers); + headers.delete("content-length"); // Body size changed + return baseFetch(input, { ...init, headers, body: newBody }); + } catch { + // If parsing fails, pass through unchanged + return baseFetch(input, init); + } + }; + + return Object.assign(cachingFetch, baseFetch) as typeof fetch; +} + /** * Get fetch function for provider - use custom if provided, otherwise unlimited timeout default */ @@ -354,7 +411,16 @@ export class AIService extends EventEmitter { // Lazy-load Anthropic provider to reduce startup time const { createAnthropic } = await PROVIDER_REGISTRY.anthropic(); - const provider = createAnthropic({ ...normalizedConfig, headers }); + // Wrap fetch to inject cache_control on tools and messages + // (SDK doesn't translate providerOptions to cache_control for these) + // Use getProviderFetch to preserve any user-configured custom fetch (e.g., proxies) + const baseFetch = getProviderFetch(providerConfig); + const fetchWithCacheControl = wrapFetchWithAnthropicCacheControl(baseFetch); + const provider = createAnthropic({ + ...normalizedConfig, + headers, + fetch: fetchWithCacheControl, + }); return Ok(provider(modelId)); } @@ -668,9 +734,21 @@ export class AIService extends EventEmitter { } const { createGateway } = await PROVIDER_REGISTRY["mux-gateway"](); + // For Anthropic models via gateway, wrap fetch to inject cache_control on tools + // (gateway provider doesn't process providerOptions.anthropic.cacheControl) + // Use getProviderFetch to preserve any user-configured custom fetch (e.g., proxies) + const baseFetch = getProviderFetch(providerConfig); + const isAnthropicModel = modelId.startsWith("anthropic/"); + const fetchWithCacheControl = isAnthropicModel + ? wrapFetchWithAnthropicCacheControl(baseFetch) + : baseFetch; + // Use configured baseURL or fall back to default gateway URL + const gatewayBaseURL = + providerConfig.baseURL ?? "https://gateway.mux.coder.com/api/v1/ai-gateway/v1/ai"; const gateway = createGateway({ apiKey: couponCode, - baseURL: "https://gateway.mux.coder.com/api/v1/ai-gateway/v1/ai", + baseURL: gatewayBaseURL, + fetch: fetchWithCacheControl, }); return Ok(gateway(modelId)); }