🤖 feat: add OpenAI promptCacheKey for improved caching

ibetitsmike · ibetitsmike · commit a0b4769e4f26 · 2025-12-15T15:39:38.000Z
Wire AI SDK's providerOptions.openai.promptCacheKey to improve OpenAI
prompt cache hit rates.

- Derive default key as mux-v1-{workspaceId} when workspace ID available
- Fall back to mux-v1 when workspace ID is unavailable
- Pass workspaceId from AIService.streamMessage to buildProviderOptions

This enables OpenAI to route requests to cached prefixes within a
workspace, improving cache hit rates for repeated calls.

---
_Generated with `mux` • Model: `anthropic:claude-opus-4-5` • Thinking: `high`_
diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
@@ -2,6 +2,7 @@
  * Tests for provider options builder
  */
 
+import type { OpenAIResponsesProviderOptions } from "@ai-sdk/openai";
 import { describe, test, expect, mock } from "bun:test";
 import { buildProviderOptions } from "./providerOptions";
 
@@ -120,3 +121,55 @@ describe("buildProviderOptions - Anthropic", () => {
     });
   });
 });
+
+describe("buildProviderOptions - OpenAI promptCacheKey", () => {
+  // Helper to extract OpenAI options from the result
+  const getOpenAIOptions = (
+    result: ReturnType<typeof buildProviderOptions>
+  ): OpenAIResponsesProviderOptions | undefined => {
+    if ("openai" in result) {
+      return result.openai;
+    }
+    return undefined;
+  };
+
+  describe("promptCacheKey derivation", () => {
+    test("should derive promptCacheKey from workspaceId when provided", () => {
+      const result = buildProviderOptions(
+        "openai:gpt-5.2",
+        "off",
+        undefined,
+        undefined,
+        undefined,
+        "abc123"
+      );
+      const openai = getOpenAIOptions(result);
+
+      expect(openai).toBeDefined();
+      expect(openai!.promptCacheKey).toBe("mux-v1-abc123");
+    });
+
+    test("should use generic fallback when workspaceId is not provided", () => {
+      const result = buildProviderOptions("openai:gpt-5.2", "off");
+      const openai = getOpenAIOptions(result);
+
+      expect(openai).toBeDefined();
+      expect(openai!.promptCacheKey).toBe("mux-v1");
+    });
+
+    test("should derive promptCacheKey for gateway OpenAI model", () => {
+      const result = buildProviderOptions(
+        "mux-gateway:openai/gpt-5.2",
+        "off",
+        undefined,
+        undefined,
+        undefined,
+        "workspace-xyz"
+      );
+      const openai = getOpenAIOptions(result);
+
+      expect(openai).toBeDefined();
+      expect(openai!.promptCacheKey).toBe("mux-v1-workspace-xyz");
+    });
+  });
+});
diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
@@ -65,7 +65,8 @@ export function buildProviderOptions(
   thinkingLevel: ThinkingLevel,
   messages?: MuxMessage[],
   lostResponseIds?: (id: string) => boolean,
-  muxProviderOptions?: MuxProviderOptions
+  muxProviderOptions?: MuxProviderOptions,
+  workspaceId?: string
 ): ProviderOptions {
   // Always clamp to the model's supported thinking policy (e.g., gpt-5-pro = HIGH only)
   const effectiveThinking = enforceThinkingPolicy(modelString, thinkingLevel);
@@ -210,11 +211,16 @@ export function buildProviderOptions(
     // Check if auto-truncation should be disabled (for testing context limit errors)
     const disableAutoTruncation = muxProviderOptions?.openai?.disableAutoTruncation ?? false;
 
+    // Prompt cache key: derive from workspaceId or use generic fallback
+    // This helps OpenAI route requests to cached prefixes for improved hit rates
+    const promptCacheKey = workspaceId ? `mux-v1-${workspaceId}` : "mux-v1";
+
     log.debug("buildProviderOptions: OpenAI config", {
       reasoningEffort,
       thinkingLevel: effectiveThinking,
       previousResponseId,
       disableAutoTruncation,
+      promptCacheKey,
     });
 
     const serviceTier = muxProviderOptions?.openai?.serviceTier ?? "auto";
@@ -225,6 +231,9 @@ export function buildProviderOptions(
         serviceTier,
         // Automatically truncate conversation to fit context window, unless disabled for testing
         truncation: disableAutoTruncation ? "disabled" : "auto",
+        // Stable prompt cache key to improve OpenAI cache hit rates
+        // See: https://sdk.vercel.ai/providers/ai-sdk-providers/openai#responses-models
+        promptCacheKey,
         // Conditionally add reasoning configuration
         ...(reasoningEffort && {
           reasoningEffort,
diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts
@@ -1365,12 +1365,14 @@ export class AIService extends EventEmitter {
       // Build provider options based on thinking level and message history
       // Pass filtered messages so OpenAI can extract previousResponseId for persistence
       // Also pass callback to filter out lost responseIds (OpenAI invalidated them)
+      // Pass workspaceId to derive stable promptCacheKey for OpenAI caching
       const providerOptions = buildProviderOptions(
         modelString,
         thinkingLevel ?? "off",
         filteredMessages,
         (id) => this.streamManager.isResponseIdLost(id),
-        effectiveMuxProviderOptions
+        effectiveMuxProviderOptions,
+        workspaceId
       );
 
       // Debug dump: Log the complete LLM request when MUX_DEBUG_LLM_REQUEST is set