diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts index 63e4ec2041..53bccf6732 100644 --- a/src/common/utils/ai/providerOptions.test.ts +++ b/src/common/utils/ai/providerOptions.test.ts @@ -2,6 +2,7 @@ * Tests for provider options builder */ +import type { OpenAIResponsesProviderOptions } from "@ai-sdk/openai"; import { describe, test, expect, mock } from "bun:test"; import { buildProviderOptions } from "./providerOptions"; @@ -120,3 +121,47 @@ describe("buildProviderOptions - Anthropic", () => { }); }); }); + +describe("buildProviderOptions - OpenAI promptCacheKey", () => { + // Helper to extract OpenAI options from the result + const getOpenAIOptions = ( + result: ReturnType + ): OpenAIResponsesProviderOptions | undefined => { + if ("openai" in result) { + return result.openai; + } + return undefined; + }; + + describe("promptCacheKey derivation", () => { + test("should derive promptCacheKey from workspaceId when provided", () => { + const result = buildProviderOptions( + "openai:gpt-5.2", + "off", + undefined, + undefined, + undefined, + "abc123" + ); + const openai = getOpenAIOptions(result); + + expect(openai).toBeDefined(); + expect(openai!.promptCacheKey).toBe("mux-v1-abc123"); + }); + + test("should derive promptCacheKey for gateway OpenAI model", () => { + const result = buildProviderOptions( + "mux-gateway:openai/gpt-5.2", + "off", + undefined, + undefined, + undefined, + "workspace-xyz" + ); + const openai = getOpenAIOptions(result); + + expect(openai).toBeDefined(); + expect(openai!.promptCacheKey).toBe("mux-v1-workspace-xyz"); + }); + }); +}); diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index 6193c9d041..3044a4a8cf 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -65,7 +65,8 @@ export function buildProviderOptions( thinkingLevel: ThinkingLevel, messages?: MuxMessage[], lostResponseIds?: (id: string) => boolean, - muxProviderOptions?: MuxProviderOptions + muxProviderOptions?: MuxProviderOptions, + workspaceId?: string // Optional for non-OpenAI providers ): ProviderOptions { // Always clamp to the model's supported thinking policy (e.g., gpt-5-pro = HIGH only) const effectiveThinking = enforceThinkingPolicy(modelString, thinkingLevel); @@ -210,11 +211,17 @@ export function buildProviderOptions( // Check if auto-truncation should be disabled (for testing context limit errors) const disableAutoTruncation = muxProviderOptions?.openai?.disableAutoTruncation ?? false; + // Prompt cache key: derive from workspaceId + // This helps OpenAI route requests to cached prefixes for improved hit rates + // workspaceId is always passed from AIService.streamMessage for real requests + const promptCacheKey = workspaceId ? `mux-v1-${workspaceId}` : undefined; + log.debug("buildProviderOptions: OpenAI config", { reasoningEffort, thinkingLevel: effectiveThinking, previousResponseId, disableAutoTruncation, + promptCacheKey, }); const serviceTier = muxProviderOptions?.openai?.serviceTier ?? "auto"; @@ -225,6 +232,9 @@ export function buildProviderOptions( serviceTier, // Automatically truncate conversation to fit context window, unless disabled for testing truncation: disableAutoTruncation ? "disabled" : "auto", + // Stable prompt cache key to improve OpenAI cache hit rates + // See: https://sdk.vercel.ai/providers/ai-sdk-providers/openai#responses-models + ...(promptCacheKey && { promptCacheKey }), // Conditionally add reasoning configuration ...(reasoningEffort && { reasoningEffort, diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 5c230b9dc8..5ecc6bde15 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -1365,12 +1365,14 @@ export class AIService extends EventEmitter { // Build provider options based on thinking level and message history // Pass filtered messages so OpenAI can extract previousResponseId for persistence // Also pass callback to filter out lost responseIds (OpenAI invalidated them) + // Pass workspaceId to derive stable promptCacheKey for OpenAI caching const providerOptions = buildProviderOptions( modelString, thinkingLevel ?? "off", filteredMessages, (id) => this.streamManager.isResponseIdLost(id), - effectiveMuxProviderOptions + effectiveMuxProviderOptions, + workspaceId ); // Debug dump: Log the complete LLM request when MUX_DEBUG_LLM_REQUEST is set