Skip to content

Commit a0b4769

Browse files
committed
🤖 feat: add OpenAI promptCacheKey for improved caching
Wire AI SDK's providerOptions.openai.promptCacheKey to improve OpenAI prompt cache hit rates. - Derive default key as mux-v1-{workspaceId} when workspace ID available - Fall back to mux-v1 when workspace ID is unavailable - Pass workspaceId from AIService.streamMessage to buildProviderOptions This enables OpenAI to route requests to cached prefixes within a workspace, improving cache hit rates for repeated calls. --- _Generated with `mux` • Model: `anthropic:claude-opus-4-5` • Thinking: `high`_
1 parent a92f3e8 commit a0b4769

File tree

3 files changed

+66
-2
lines changed

3 files changed

+66
-2
lines changed

src/common/utils/ai/providerOptions.test.ts

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
* Tests for provider options builder
33
*/
44

5+
import type { OpenAIResponsesProviderOptions } from "@ai-sdk/openai";
56
import { describe, test, expect, mock } from "bun:test";
67
import { buildProviderOptions } from "./providerOptions";
78

@@ -120,3 +121,55 @@ describe("buildProviderOptions - Anthropic", () => {
120121
});
121122
});
122123
});
124+
125+
describe("buildProviderOptions - OpenAI promptCacheKey", () => {
126+
// Helper to extract OpenAI options from the result
127+
const getOpenAIOptions = (
128+
result: ReturnType<typeof buildProviderOptions>
129+
): OpenAIResponsesProviderOptions | undefined => {
130+
if ("openai" in result) {
131+
return result.openai;
132+
}
133+
return undefined;
134+
};
135+
136+
describe("promptCacheKey derivation", () => {
137+
test("should derive promptCacheKey from workspaceId when provided", () => {
138+
const result = buildProviderOptions(
139+
"openai:gpt-5.2",
140+
"off",
141+
undefined,
142+
undefined,
143+
undefined,
144+
"abc123"
145+
);
146+
const openai = getOpenAIOptions(result);
147+
148+
expect(openai).toBeDefined();
149+
expect(openai!.promptCacheKey).toBe("mux-v1-abc123");
150+
});
151+
152+
test("should use generic fallback when workspaceId is not provided", () => {
153+
const result = buildProviderOptions("openai:gpt-5.2", "off");
154+
const openai = getOpenAIOptions(result);
155+
156+
expect(openai).toBeDefined();
157+
expect(openai!.promptCacheKey).toBe("mux-v1");
158+
});
159+
160+
test("should derive promptCacheKey for gateway OpenAI model", () => {
161+
const result = buildProviderOptions(
162+
"mux-gateway:openai/gpt-5.2",
163+
"off",
164+
undefined,
165+
undefined,
166+
undefined,
167+
"workspace-xyz"
168+
);
169+
const openai = getOpenAIOptions(result);
170+
171+
expect(openai).toBeDefined();
172+
expect(openai!.promptCacheKey).toBe("mux-v1-workspace-xyz");
173+
});
174+
});
175+
});

src/common/utils/ai/providerOptions.ts

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,8 @@ export function buildProviderOptions(
6565
thinkingLevel: ThinkingLevel,
6666
messages?: MuxMessage[],
6767
lostResponseIds?: (id: string) => boolean,
68-
muxProviderOptions?: MuxProviderOptions
68+
muxProviderOptions?: MuxProviderOptions,
69+
workspaceId?: string
6970
): ProviderOptions {
7071
// Always clamp to the model's supported thinking policy (e.g., gpt-5-pro = HIGH only)
7172
const effectiveThinking = enforceThinkingPolicy(modelString, thinkingLevel);
@@ -210,11 +211,16 @@ export function buildProviderOptions(
210211
// Check if auto-truncation should be disabled (for testing context limit errors)
211212
const disableAutoTruncation = muxProviderOptions?.openai?.disableAutoTruncation ?? false;
212213

214+
// Prompt cache key: derive from workspaceId or use generic fallback
215+
// This helps OpenAI route requests to cached prefixes for improved hit rates
216+
const promptCacheKey = workspaceId ? `mux-v1-${workspaceId}` : "mux-v1";
217+
213218
log.debug("buildProviderOptions: OpenAI config", {
214219
reasoningEffort,
215220
thinkingLevel: effectiveThinking,
216221
previousResponseId,
217222
disableAutoTruncation,
223+
promptCacheKey,
218224
});
219225

220226
const serviceTier = muxProviderOptions?.openai?.serviceTier ?? "auto";
@@ -225,6 +231,9 @@ export function buildProviderOptions(
225231
serviceTier,
226232
// Automatically truncate conversation to fit context window, unless disabled for testing
227233
truncation: disableAutoTruncation ? "disabled" : "auto",
234+
// Stable prompt cache key to improve OpenAI cache hit rates
235+
// See: https://sdk.vercel.ai/providers/ai-sdk-providers/openai#responses-models
236+
promptCacheKey,
228237
// Conditionally add reasoning configuration
229238
...(reasoningEffort && {
230239
reasoningEffort,

src/node/services/aiService.ts

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1365,12 +1365,14 @@ export class AIService extends EventEmitter {
13651365
// Build provider options based on thinking level and message history
13661366
// Pass filtered messages so OpenAI can extract previousResponseId for persistence
13671367
// Also pass callback to filter out lost responseIds (OpenAI invalidated them)
1368+
// Pass workspaceId to derive stable promptCacheKey for OpenAI caching
13681369
const providerOptions = buildProviderOptions(
13691370
modelString,
13701371
thinkingLevel ?? "off",
13711372
filteredMessages,
13721373
(id) => this.streamManager.isResponseIdLost(id),
1373-
effectiveMuxProviderOptions
1374+
effectiveMuxProviderOptions,
1375+
workspaceId
13741376
);
13751377

13761378
// Debug dump: Log the complete LLM request when MUX_DEBUG_LLM_REQUEST is set

0 commit comments

Comments
 (0)