diff --git a/src/common/constants/providers.test.ts b/src/common/constants/providers.test.ts
index 16b417307..f6732abaa 100644
--- a/src/common/constants/providers.test.ts
+++ b/src/common/constants/providers.test.ts
@@ -10,11 +10,13 @@ describe("Provider Registry", () => {
     expect(Object.keys(PROVIDER_REGISTRY).length).toBeGreaterThan(0);
   });
 
-  test("all registry values are import functions", () => {
-    // Registry should map provider names to async import functions
+  test("all registry values are import functions that return promises", () => {
+    // Registry should map provider names to functions returning promises
     for (const importFn of Object.values(PROVIDER_REGISTRY)) {
       expect(typeof importFn).toBe("function");
-      expect(importFn.constructor.name).toBe("AsyncFunction");
+      // Verify calling the function returns a Promise (don't await - just type check)
+      const result = importFn();
+      expect(result).toBeInstanceOf(Promise);
     }
   });
 
diff --git a/src/common/constants/providers.ts b/src/common/constants/providers.ts
index 2650cf274..b3bd7ec55 100644
--- a/src/common/constants/providers.ts
+++ b/src/common/constants/providers.ts
@@ -9,56 +9,56 @@
 /**
  * Dynamically import the Anthropic provider package
  */
-export async function importAnthropic() {
-  return await import("@ai-sdk/anthropic");
+export function importAnthropic() {
+  return import("@ai-sdk/anthropic");
 }
 
 /**
  * Dynamically import the OpenAI provider package
  */
-export async function importOpenAI() {
-  return await import("@ai-sdk/openai");
+export function importOpenAI() {
+  return import("@ai-sdk/openai");
 }
 
 /**
  * Dynamically import the Ollama provider package
  */
-export async function importOllama() {
-  return await import("ollama-ai-provider-v2");
+export function importOllama() {
+  return import("ollama-ai-provider-v2");
 }
 
 /**
  * Dynamically import the Google provider package
  */
-export async function importGoogle() {
-  return await import("@ai-sdk/google");
+export function importGoogle() {
+  return import("@ai-sdk/google");
 }
 
 /**
  * Dynamically import the OpenRouter provider package
  */
-export async function importOpenRouter() {
-  return await import("@openrouter/ai-sdk-provider");
+export function importOpenRouter() {
+  return import("@openrouter/ai-sdk-provider");
 }
 
 /**
  * Dynamically import the xAI provider package
  */
-export async function importXAI() {
-  return await import("@ai-sdk/xai");
+export function importXAI() {
+  return import("@ai-sdk/xai");
 }
 
 /**
  * Dynamically import the Amazon Bedrock provider package
  */
-export async function importBedrock() {
+export function importBedrock() {
   return import("@ai-sdk/amazon-bedrock");
 }
 
 /**
  * Dynamically import the Gateway provider from the AI SDK
  */
-export async function importMuxGateway() {
+export function importMuxGateway() {
   return import("ai");
 }
 
diff --git a/src/common/utils/ai/cacheStrategy.test.ts b/src/common/utils/ai/cacheStrategy.test.ts
index 5c837700c..24a25e64a 100644
--- a/src/common/utils/ai/cacheStrategy.test.ts
+++ b/src/common/utils/ai/cacheStrategy.test.ts
@@ -11,15 +11,22 @@ import {
 
 describe("cacheStrategy", () => {
   describe("supportsAnthropicCache", () => {
-    it("should return true for Anthropic models", () => {
+    it("should return true for direct Anthropic models", () => {
       expect(supportsAnthropicCache("anthropic:claude-3-5-sonnet-20241022")).toBe(true);
       expect(supportsAnthropicCache("anthropic:claude-3-5-haiku-20241022")).toBe(true);
     });
 
+    it("should return true for gateway providers routing to Anthropic", () => {
+      expect(supportsAnthropicCache("mux-gateway:anthropic/claude-opus-4-5")).toBe(true);
+      expect(supportsAnthropicCache("mux-gateway:anthropic/claude-sonnet-4-5-20250514")).toBe(true);
+      expect(supportsAnthropicCache("openrouter:anthropic/claude-3.5-sonnet")).toBe(true);
+    });
+
     it("should return false for non-Anthropic models", () => {
       expect(supportsAnthropicCache("openai:gpt-4")).toBe(false);
       expect(supportsAnthropicCache("google:gemini-2.0")).toBe(false);
       expect(supportsAnthropicCache("openrouter:meta-llama/llama-3.1")).toBe(false);
+      expect(supportsAnthropicCache("mux-gateway:openai/gpt-5.1")).toBe(false);
     });
   });
 
@@ -83,6 +90,46 @@ describe("cacheStrategy", () => {
       });
       expect(result[1]).toEqual(messages[1]); // Last message unchanged
     });
+
+    it("should add cache control to last content part for array content", () => {
+      // Messages with array content (typical for user/assistant with multiple parts)
+      const messages: ModelMessage[] = [
+        {
+          role: "user",
+          content: [
+            { type: "text", text: "Hello" },
+            { type: "text", text: "World" },
+          ],
+        },
+        {
+          role: "assistant",
+          content: [
+            { type: "text", text: "Hi there!" },
+            { type: "text", text: "How can I help?" },
+          ],
+        },
+        { role: "user", content: "Final question" },
+      ];
+      const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet");
+
+      expect(result[0]).toEqual(messages[0]); // First message unchanged
+
+      // Second message (array content): cache control on LAST content part only
+      const secondMsg = result[1];
+      expect(secondMsg.role).toBe("assistant");
+      expect(Array.isArray(secondMsg.content)).toBe(true);
+      const content = secondMsg.content as Array<{
+        type: string;
+        text: string;
+        providerOptions?: unknown;
+      }>;
+      expect(content[0].providerOptions).toBeUndefined(); // First part unchanged
+      expect(content[1].providerOptions).toEqual({
+        anthropic: { cacheControl: { type: "ephemeral" } },
+      }); // Last part has cache control
+
+      expect(result[2]).toEqual(messages[2]); // Last message unchanged
+    });
   });
 
   describe("createCachedSystemMessage", () => {
@@ -198,5 +245,48 @@ describe("cacheStrategy", () => {
       applyCacheControlToTools(mockTools, "anthropic:claude-3-5-sonnet");
       expect(mockTools).toEqual(originalTools);
     });
+
+    it("should handle provider-defined tools without recreating them", () => {
+      // Provider-defined tools (like Anthropic's webSearch) have type: "provider-defined"
+      // and cannot be recreated with createTool() - they have special internal properties
+      const providerDefinedTool = {
+        type: "provider-defined" as const,
+        id: "web_search",
+        name: "web_search_20250305",
+        args: { maxUses: 1000 },
+        // Note: no description or execute - these are handled internally by the SDK
+      };
+
+      const toolsWithProviderDefined: Record<string, Tool> = {
+        readFile: tool({
+          description: "Read a file",
+          inputSchema: z.object({ path: z.string() }),
+          execute: () => Promise.resolve({ success: true }),
+        }),
+        // Provider-defined tool as last tool (typical for Anthropic web search)
+        web_search: providerDefinedTool as unknown as Tool,
+      };
+
+      const result = applyCacheControlToTools(
+        toolsWithProviderDefined,
+        "anthropic:claude-3-5-sonnet"
+      );
+
+      // Verify all tools are present
+      expect(Object.keys(result)).toEqual(Object.keys(toolsWithProviderDefined));
+
+      // First tool should be unchanged
+      expect(result.readFile).toEqual(toolsWithProviderDefined.readFile);
+
+      // Provider-defined tool should have cache control added but retain its type
+      const cachedWebSearch = result.web_search as unknown as {
+        type: string;
+        providerOptions: unknown;
+      };
+      expect(cachedWebSearch.type).toBe("provider-defined");
+      expect(cachedWebSearch.providerOptions).toEqual({
+        anthropic: { cacheControl: { type: "ephemeral" } },
+      });
+    });
   });
 });
diff --git a/src/common/utils/ai/cacheStrategy.ts b/src/common/utils/ai/cacheStrategy.ts
index a2c1ccffb..70d82f5f3 100644
--- a/src/common/utils/ai/cacheStrategy.ts
+++ b/src/common/utils/ai/cacheStrategy.ts
@@ -1,15 +1,75 @@
-import type { ModelMessage, Tool } from "ai";
+import { tool as createTool, type ModelMessage, type Tool } from "ai";
 
 /**
- * Check if a model supports Anthropic cache control
+ * Check if a model supports Anthropic cache control.
+ * Matches:
+ * - Direct Anthropic provider: "anthropic:claude-opus-4-5"
+ * - Gateway providers routing to Anthropic: "mux-gateway:anthropic/claude-opus-4-5"
+ * - OpenRouter Anthropic models: "openrouter:anthropic/claude-3.5-sonnet"
  */
 export function supportsAnthropicCache(modelString: string): boolean {
-  return modelString.startsWith("anthropic:");
+  // Direct Anthropic provider
+  if (modelString.startsWith("anthropic:")) {
+    return true;
+  }
+  // Gateway/router providers routing to Anthropic (format: "provider:anthropic/model")
+  const [, modelId] = modelString.split(":");
+  if (modelId?.startsWith("anthropic/")) {
+    return true;
+  }
+  return false;
+}
+
+/** Cache control providerOptions for Anthropic */
+const ANTHROPIC_CACHE_CONTROL = {
+  anthropic: {
+    cacheControl: { type: "ephemeral" as const },
+  },
+};
+
+/**
+ * Add providerOptions to the last content part of a message.
+ * The SDK requires providerOptions on content parts, not on the message itself.
+ *
+ * For system messages with string content, we use message-level providerOptions
+ * (which the SDK handles correctly). For user/assistant messages with array
+ * content, we add providerOptions to the last content part.
+ */
+function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
+  const content = msg.content;
+
+  // String content (typically system messages): use message-level providerOptions
+  // The SDK correctly translates this for system messages
+  if (typeof content === "string") {
+    return {
+      ...msg,
+      providerOptions: ANTHROPIC_CACHE_CONTROL,
+    };
+  }
+
+  // Array content: add providerOptions to the last part
+  // Use type assertion since we're adding providerOptions which is valid but not in base types
+  if (Array.isArray(content) && content.length > 0) {
+    const lastIndex = content.length - 1;
+    const newContent = content.map((part, i) =>
+      i === lastIndex ? { ...part, providerOptions: ANTHROPIC_CACHE_CONTROL } : part
+    );
+    // Type assertion needed: ModelMessage types are strict unions but providerOptions
+    // on content parts is valid per SDK docs
+    const result = { ...msg, content: newContent };
+    return result as ModelMessage;
+  }
+
+  // Empty or unexpected content: return as-is
+  return msg;
 }
 
 /**
  * Apply cache control to messages for Anthropic models.
  * Caches all messages except the last user message for optimal cache hits.
+ *
+ * NOTE: The SDK requires providerOptions on content parts, not on the message.
+ * We add cache_control to the last content part of the second-to-last message.
  */
 export function applyCacheControl(messages: ModelMessage[], modelString: string): ModelMessage[] {
   // Only apply cache control for Anthropic models
@@ -28,16 +88,7 @@ export function applyCacheControl(messages: ModelMessage[], modelString: string)
 
   return messages.map((msg, index) => {
     if (index === cacheIndex) {
-      return {
-        ...msg,
-        providerOptions: {
-          anthropic: {
-            cacheControl: {
-              type: "ephemeral" as const,
-            },
-          },
-        },
-      };
+      return addCacheControlToLastContentPart(msg);
     }
     return msg;
   });
@@ -77,6 +128,9 @@ export function createCachedSystemMessage(
  * 2. Conversation history (1 breakpoint)
  * 3. Last tool only (1 breakpoint) - caches all tools up to and including this one
  * = 3 total, leaving 1 for future use
+ *
+ * NOTE: The SDK requires providerOptions to be passed during tool() creation,
+ * not added afterwards. We re-create the last tool with providerOptions included.
  */
 export function applyCacheControlToTools<T extends Record<string, Tool>>(
   tools: T,
@@ -95,23 +149,41 @@ export function applyCacheControlToTools<T extends Record<string, Tool>>(
   // Anthropic caches everything up to the cache breakpoint, so marking
   // only the last tool will cache all tools
   const cachedTools = {} as unknown as T;
-  for (const [key, tool] of Object.entries(tools)) {
+  for (const [key, existingTool] of Object.entries(tools)) {
     if (key === lastToolKey) {
-      // Last tool gets cache control
-      const cachedTool = {
-        ...tool,
-        providerOptions: {
-          anthropic: {
-            cacheControl: {
-              type: "ephemeral" as const,
+      // For provider-defined tools (like Anthropic's webSearch), we cannot recreate them
+      // with createTool() - they have special properties. Instead, spread providerOptions
+      // directly onto the tool object. While this doesn't work for regular tools (SDK
+      // requires providerOptions at creation time), provider-defined tools handle it.
+      const isProviderDefinedTool = (existingTool as { type?: string }).type === "provider-defined";
+
+      if (isProviderDefinedTool) {
+        // Provider-defined tools: add providerOptions directly (SDK handles it differently)
+        cachedTools[key as keyof T] = {
+          ...existingTool,
+          providerOptions: {
+            anthropic: {
+              cacheControl: { type: "ephemeral" },
             },
           },
-        },
-      };
-      cachedTools[key as keyof T] = cachedTool as unknown as T[keyof T];
+        } as unknown as T[keyof T];
+      } else {
+        // Regular tools: re-create with providerOptions (SDK requires this at creation time)
+        const cachedTool = createTool({
+          description: existingTool.description,
+          inputSchema: existingTool.inputSchema,
+          execute: existingTool.execute,
+          providerOptions: {
+            anthropic: {
+              cacheControl: { type: "ephemeral" },
+            },
+          },
+        });
+        cachedTools[key as keyof T] = cachedTool as unknown as T[keyof T];
+      }
     } else {
-      // Other tools are copied as-is (use unknown for type safety)
-      cachedTools[key as keyof T] = tool as unknown as T[keyof T];
+      // Other tools are copied as-is
+      cachedTools[key as keyof T] = existingTool as unknown as T[keyof T];
     }
   }
 
diff --git a/src/common/utils/tools/tools.ts b/src/common/utils/tools/tools.ts
index 873e6a8c3..04527bcfc 100644
--- a/src/common/utils/tools/tools.ts
+++ b/src/common/utils/tools/tools.ts
@@ -125,7 +125,8 @@ export async function getToolsForModel(
         const { anthropic } = await import("@ai-sdk/anthropic");
         allTools = {
           ...baseTools,
-          web_search: anthropic.tools.webSearch_20250305({ maxUses: 1000 }),
+          // Type assertion needed due to SDK version mismatch between ai and @ai-sdk/anthropic
+          web_search: anthropic.tools.webSearch_20250305({ maxUses: 1000 }) as Tool,
         };
         break;
       }
@@ -136,9 +137,10 @@ export async function getToolsForModel(
           const { openai } = await import("@ai-sdk/openai");
           allTools = {
             ...baseTools,
+            // Type assertion needed due to SDK version mismatch between ai and @ai-sdk/openai
             web_search: openai.tools.webSearch({
               searchContextSize: "high",
-            }),
+            }) as Tool,
           };
         }
         break;
diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts
index b9010fd36..2995d46fb 100644
--- a/src/node/services/aiService.ts
+++ b/src/node/services/aiService.ts
@@ -94,6 +94,63 @@ if (typeof globalFetchWithExtras.certificate === "function") {
     globalFetchWithExtras.certificate.bind(globalFetchWithExtras);
 }
 
+/**
+ * Wrap fetch to inject Anthropic cache_control directly into the request body.
+ * The AI SDK's providerOptions.anthropic.cacheControl doesn't get translated
+ * to raw cache_control for tools or message content parts, so we inject it
+ * at the HTTP level.
+ *
+ * Injects cache_control on:
+ * 1. Last tool (caches all tool definitions)
+ * 2. Second-to-last message's last content part (caches conversation history)
+ */
+function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fetch {
+  const cachingFetch = async (
+    input: Parameters<typeof fetch>[0],
+    init?: Parameters<typeof fetch>[1]
+  ): Promise<Response> => {
+    // Only modify POST requests with JSON body
+    if (init?.method?.toUpperCase() !== "POST" || typeof init?.body !== "string") {
+      return baseFetch(input, init);
+    }
+
+    try {
+      const json = JSON.parse(init.body) as Record<string, unknown>;
+
+      // Inject cache_control on the last tool if tools array exists
+      if (Array.isArray(json.tools) && json.tools.length > 0) {
+        const lastTool = json.tools[json.tools.length - 1] as Record<string, unknown>;
+        lastTool.cache_control ??= { type: "ephemeral" };
+      }
+
+      // Inject cache_control on second-to-last message's last content part
+      // This caches conversation history up to (but not including) the current user message
+      if (Array.isArray(json.messages) && json.messages.length >= 2) {
+        const secondToLastMsg = json.messages[json.messages.length - 2] as Record<string, unknown>;
+        const content = secondToLastMsg.content;
+
+        if (Array.isArray(content) && content.length > 0) {
+          // Array content: add cache_control to last part
+          const lastPart = content[content.length - 1] as Record<string, unknown>;
+          lastPart.cache_control ??= { type: "ephemeral" };
+        }
+        // Note: String content messages are rare after SDK conversion; skip for now
+      }
+
+      // Update body with modified JSON
+      const newBody = JSON.stringify(json);
+      const headers = new Headers(init?.headers);
+      headers.delete("content-length"); // Body size changed
+      return baseFetch(input, { ...init, headers, body: newBody });
+    } catch {
+      // If parsing fails, pass through unchanged
+      return baseFetch(input, init);
+    }
+  };
+
+  return Object.assign(cachingFetch, baseFetch) as typeof fetch;
+}
+
 /**
  * Get fetch function for provider - use custom if provided, otherwise unlimited timeout default
  */
@@ -354,7 +411,16 @@ export class AIService extends EventEmitter {
 
         // Lazy-load Anthropic provider to reduce startup time
         const { createAnthropic } = await PROVIDER_REGISTRY.anthropic();
-        const provider = createAnthropic({ ...normalizedConfig, headers });
+        // Wrap fetch to inject cache_control on tools and messages
+        // (SDK doesn't translate providerOptions to cache_control for these)
+        // Use getProviderFetch to preserve any user-configured custom fetch (e.g., proxies)
+        const baseFetch = getProviderFetch(providerConfig);
+        const fetchWithCacheControl = wrapFetchWithAnthropicCacheControl(baseFetch);
+        const provider = createAnthropic({
+          ...normalizedConfig,
+          headers,
+          fetch: fetchWithCacheControl,
+        });
         return Ok(provider(modelId));
       }
 
@@ -668,9 +734,21 @@ export class AIService extends EventEmitter {
         }
 
         const { createGateway } = await PROVIDER_REGISTRY["mux-gateway"]();
+        // For Anthropic models via gateway, wrap fetch to inject cache_control on tools
+        // (gateway provider doesn't process providerOptions.anthropic.cacheControl)
+        // Use getProviderFetch to preserve any user-configured custom fetch (e.g., proxies)
+        const baseFetch = getProviderFetch(providerConfig);
+        const isAnthropicModel = modelId.startsWith("anthropic/");
+        const fetchWithCacheControl = isAnthropicModel
+          ? wrapFetchWithAnthropicCacheControl(baseFetch)
+          : baseFetch;
+        // Use configured baseURL or fall back to default gateway URL
+        const gatewayBaseURL =
+          providerConfig.baseURL ?? "https://gateway.mux.coder.com/api/v1/ai-gateway/v1/ai";
         const gateway = createGateway({
           apiKey: couponCode,
-          baseURL: "https://gateway.mux.coder.com/api/v1/ai-gateway/v1/ai",
+          baseURL: gatewayBaseURL,
+          fetch: fetchWithCacheControl,
         });
         return Ok(gateway(modelId));
       }