From 8dae3c84de5d1f9f810a5b32dd2c96f583aea8ba Mon Sep 17 00:00:00 2001 From: Michael Suchacz <203725896+ibetitsmike@users.noreply.github.com> Date: Mon, 15 Dec 2025 15:37:46 +0000 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20feat:=20add=20OpenAI=20promptCac?= =?UTF-8?q?heKey=20for=20improved=20caching?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Wire AI SDK's providerOptions.openai.promptCacheKey to improve OpenAI prompt cache hit rates. - Derive default key as mux-v1-{workspaceId} when workspace ID available - Fall back to mux-v1 when workspace ID is unavailable - Pass workspaceId from AIService.streamMessage to buildProviderOptions This enables OpenAI to route requests to cached prefixes within a workspace, improving cache hit rates for repeated calls. --- _Generated with `mux` • Model: `anthropic:claude-opus-4-5` • Thinking: `high`_ --- src/common/utils/ai/providerOptions.test.ts | 45 +++++++++++++++++++++ src/common/utils/ai/providerOptions.ts | 12 +++++- src/node/services/aiService.ts | 4 +- 3 files changed, 59 insertions(+), 2 deletions(-) diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts index 63e4ec2041..53bccf6732 100644 --- a/src/common/utils/ai/providerOptions.test.ts +++ b/src/common/utils/ai/providerOptions.test.ts @@ -2,6 +2,7 @@ * Tests for provider options builder */ +import type { OpenAIResponsesProviderOptions } from "@ai-sdk/openai"; import { describe, test, expect, mock } from "bun:test"; import { buildProviderOptions } from "./providerOptions"; @@ -120,3 +121,47 @@ describe("buildProviderOptions - Anthropic", () => { }); }); }); + +describe("buildProviderOptions - OpenAI promptCacheKey", () => { + // Helper to extract OpenAI options from the result + const getOpenAIOptions = ( + result: ReturnType + ): OpenAIResponsesProviderOptions | undefined => { + if ("openai" in result) { + return result.openai; + } + return undefined; + }; + + describe("promptCacheKey derivation", () => { + test("should derive promptCacheKey from workspaceId when provided", () => { + const result = buildProviderOptions( + "openai:gpt-5.2", + "off", + undefined, + undefined, + undefined, + "abc123" + ); + const openai = getOpenAIOptions(result); + + expect(openai).toBeDefined(); + expect(openai!.promptCacheKey).toBe("mux-v1-abc123"); + }); + + test("should derive promptCacheKey for gateway OpenAI model", () => { + const result = buildProviderOptions( + "mux-gateway:openai/gpt-5.2", + "off", + undefined, + undefined, + undefined, + "workspace-xyz" + ); + const openai = getOpenAIOptions(result); + + expect(openai).toBeDefined(); + expect(openai!.promptCacheKey).toBe("mux-v1-workspace-xyz"); + }); + }); +}); diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index 6193c9d041..3044a4a8cf 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -65,7 +65,8 @@ export function buildProviderOptions( thinkingLevel: ThinkingLevel, messages?: MuxMessage[], lostResponseIds?: (id: string) => boolean, - muxProviderOptions?: MuxProviderOptions + muxProviderOptions?: MuxProviderOptions, + workspaceId?: string // Optional for non-OpenAI providers ): ProviderOptions { // Always clamp to the model's supported thinking policy (e.g., gpt-5-pro = HIGH only) const effectiveThinking = enforceThinkingPolicy(modelString, thinkingLevel); @@ -210,11 +211,17 @@ export function buildProviderOptions( // Check if auto-truncation should be disabled (for testing context limit errors) const disableAutoTruncation = muxProviderOptions?.openai?.disableAutoTruncation ?? false; + // Prompt cache key: derive from workspaceId + // This helps OpenAI route requests to cached prefixes for improved hit rates + // workspaceId is always passed from AIService.streamMessage for real requests + const promptCacheKey = workspaceId ? `mux-v1-${workspaceId}` : undefined; + log.debug("buildProviderOptions: OpenAI config", { reasoningEffort, thinkingLevel: effectiveThinking, previousResponseId, disableAutoTruncation, + promptCacheKey, }); const serviceTier = muxProviderOptions?.openai?.serviceTier ?? "auto"; @@ -225,6 +232,9 @@ export function buildProviderOptions( serviceTier, // Automatically truncate conversation to fit context window, unless disabled for testing truncation: disableAutoTruncation ? "disabled" : "auto", + // Stable prompt cache key to improve OpenAI cache hit rates + // See: https://sdk.vercel.ai/providers/ai-sdk-providers/openai#responses-models + ...(promptCacheKey && { promptCacheKey }), // Conditionally add reasoning configuration ...(reasoningEffort && { reasoningEffort, diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 5c230b9dc8..5ecc6bde15 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -1365,12 +1365,14 @@ export class AIService extends EventEmitter { // Build provider options based on thinking level and message history // Pass filtered messages so OpenAI can extract previousResponseId for persistence // Also pass callback to filter out lost responseIds (OpenAI invalidated them) + // Pass workspaceId to derive stable promptCacheKey for OpenAI caching const providerOptions = buildProviderOptions( modelString, thinkingLevel ?? "off", filteredMessages, (id) => this.streamManager.isResponseIdLost(id), - effectiveMuxProviderOptions + effectiveMuxProviderOptions, + workspaceId ); // Debug dump: Log the complete LLM request when MUX_DEBUG_LLM_REQUEST is set