Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/session-affinity-header.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"workers-ai-provider": patch
---

Add `sessionAffinity` setting to send `x-session-affinity` header for prefix-cache optimization. Also forward `extraHeaders` in the REST API path instead of discarding them.
5 changes: 5 additions & 0 deletions .changeset/tanstack-session-affinity.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@cloudflare/tanstack-ai": patch
---

Add `sessionAffinity` option to `WorkersAiAdapterConfig` for prefix-cache optimization. Routes requests with the same key to the same backend replica via the `x-session-affinity` header. Supported across binding, REST, and gateway modes.
222 changes: 111 additions & 111 deletions demos.json
Original file line number Diff line number Diff line change
@@ -1,112 +1,112 @@
{
"demos": {
"./demos/agent-scheduler": {
"package_json_hash": "2fe8785345d56ff37e675baaa06380c1eed736ba"
},
"./demos/agent-task-manager": {
"package_json_hash": "f2245bb30e95d0785aa195635a0928fba5b621ae"
},
"./demos/agent-task-manager-human-in-the-loop": {
"package_json_hash": "d8856a52bd0bf9641b8d1ff98e06de650aece34a"
},
"./demos/evaluator-optimiser": {
"package_json_hash": "8c4e9a71c91d806dcbef586b50a594e650d8f090"
},
"./demos/image-generation": {
"package_json_hash": "697d55539ad024faa349faa3dcd3bcbdfacf37bc"
},
"./demos/mcp-client": {
"package_json_hash": "5129c9edfcdd03c8625615c85329d85138fb9773"
},
"./demos/mcp-server-bearer-auth": {
"package_json_hash": "8703a8f8992a06377cce9a139ce6709450b51b5c"
},
"./demos/mcp-slack-oauth": {
"package_json_hash": "3134658fb11397626329bc344eba48fd57b21d46"
},
"./demos/mcp-stytch-b2b-okr-manager": {
"package_json_hash": "48232b81779a5f5fb0253842b4243c7dad032c0c"
},
"./demos/mcp-stytch-consumer-todo-list": {
"package_json_hash": "f53fe23dcebec62f51f9a6e332d2c192b8598cf6"
},
"./demos/model-scraper": {
"package_json_hash": "5a20ad46b257699c313bdd7c0b520701d739ed12"
},
"./demos/orchestrator-workers": {
"package_json_hash": "e159d1ce03c17bf13239ee4ac76c0290a210bc38"
},
"./demos/parallelisation": {
"package_json_hash": "6dbc55c3277b3ea634776821e60642b3dd03d8c0"
},
"./demos/prompt-chaining": {
"package_json_hash": "510159b05545a2d7f9c8cb240def56649cd25989"
},
"./demos/remote-mcp-authkit": {
"package_json_hash": "d3a0122c45d27140db96df6859e191aa7d2f8ac1"
},
"./demos/remote-mcp-github-oauth": {
"package_json_hash": "c59a2ecc4937d54c658383c3d7fe95e7c123f5c1"
},
"./demos/remote-mcp-server": {
"package_json_hash": "6240672fd54010c3b03a8af553b420306e11bc78"
},
"./demos/routing": {
"package_json_hash": "5f547b98f4e9a6167a2913e3a6c61681312986dd"
},
"./demos/structured-output": {
"package_json_hash": "a66aacd49c57e74c0937bf4bea0986168086debb"
},
"./demos/structured-output-node": {
"package_json_hash": "f64cc27508f9dda6fbb3bf4192c031dcc671e64a"
},
"./demos/text-generation": {
"package_json_hash": "d52767521e285b05c3235eaf2c8cc0e47fdbf90d"
},
"./demos/text-generation-stream": {
"package_json_hash": "f8272f5b1f5f1c83c53395dfc76646cab18a32b7"
},
"./demos/tool-calling": {
"package_json_hash": "3a0b1d91022d706b96e7b429c1349116ba9373b5"
},
"./demos/tool-calling-stream": {
"package_json_hash": "7c92250cda46aaac7eb6aeea0255828781c4abcb"
},
"./demos/tool-calling-stream-traditional": {
"package_json_hash": "c610c334d5f53a6e399bddddf68098ca0dec96d7"
},
"./demos/ui-worker": {
"package_json_hash": "831702fff4771ce9ce7d93afe6824ec6fa316125"
},
"./demos/remote-mcp-cf-access": {
"package_json_hash": "1a09d449c88cfe3b989f352d18813385578b98ca"
},
"./demos/remote-mcp-authless": {
"package_json_hash": "ba9953ce57a26cb271144e67609ed98fd1c1110e"
},
"./demos/python-workers-mcp": {
"package_json_hash": "0e710d7b27bb34edba396dc2b3365db230c076cb"
},
"./demos/vision": {
"package_json_hash": "e53450d50753f0574995feef3b2f845045fc3dc3"
},
"./demos/remote-mcp-google-oauth": {
"package_json_hash": "21bdab2ebbbe336c5fe6fb032fde804373f1b489"
},
"./demos/remote-mcp-logto": {
"package_json_hash": "a98a0cb367641ff86d89a7127f5e2551d2a1532f"
},
"./demos/remote-mcp-server-descope-auth": {
"package_json_hash": "c5de845803aae734fa60185200d4bfa2e1d0fb23"
},
"./demos/remote-mcp-server-autorag": {
"package_json_hash": "2b4e9b35192362b3be2743370469ce3a627a72b0"
},
"./demos/use-mcp-inspector": {
"package_json_hash": "d1d084f1aa9a752ead5250e0a070f97a9114dcea"
},
"./demos/hello-world": {
"package_json_hash": "ab24a12893c001fe3416fadea2a8bf5e7e68392e"
}
}
}
"demos": {
"./demos/agent-scheduler": {
"package_json_hash": "2fe8785345d56ff37e675baaa06380c1eed736ba"
},
"./demos/agent-task-manager": {
"package_json_hash": "f2245bb30e95d0785aa195635a0928fba5b621ae"
},
"./demos/agent-task-manager-human-in-the-loop": {
"package_json_hash": "d8856a52bd0bf9641b8d1ff98e06de650aece34a"
},
"./demos/evaluator-optimiser": {
"package_json_hash": "8c4e9a71c91d806dcbef586b50a594e650d8f090"
},
"./demos/image-generation": {
"package_json_hash": "697d55539ad024faa349faa3dcd3bcbdfacf37bc"
},
"./demos/mcp-client": {
"package_json_hash": "5129c9edfcdd03c8625615c85329d85138fb9773"
},
"./demos/mcp-server-bearer-auth": {
"package_json_hash": "8703a8f8992a06377cce9a139ce6709450b51b5c"
},
"./demos/mcp-slack-oauth": {
"package_json_hash": "3134658fb11397626329bc344eba48fd57b21d46"
},
"./demos/mcp-stytch-b2b-okr-manager": {
"package_json_hash": "48232b81779a5f5fb0253842b4243c7dad032c0c"
},
"./demos/mcp-stytch-consumer-todo-list": {
"package_json_hash": "f53fe23dcebec62f51f9a6e332d2c192b8598cf6"
},
"./demos/model-scraper": {
"package_json_hash": "5a20ad46b257699c313bdd7c0b520701d739ed12"
},
"./demos/orchestrator-workers": {
"package_json_hash": "e159d1ce03c17bf13239ee4ac76c0290a210bc38"
},
"./demos/parallelisation": {
"package_json_hash": "6dbc55c3277b3ea634776821e60642b3dd03d8c0"
},
"./demos/prompt-chaining": {
"package_json_hash": "510159b05545a2d7f9c8cb240def56649cd25989"
},
"./demos/remote-mcp-authkit": {
"package_json_hash": "d3a0122c45d27140db96df6859e191aa7d2f8ac1"
},
"./demos/remote-mcp-github-oauth": {
"package_json_hash": "c59a2ecc4937d54c658383c3d7fe95e7c123f5c1"
},
"./demos/remote-mcp-server": {
"package_json_hash": "6240672fd54010c3b03a8af553b420306e11bc78"
},
"./demos/routing": {
"package_json_hash": "5f547b98f4e9a6167a2913e3a6c61681312986dd"
},
"./demos/structured-output": {
"package_json_hash": "a66aacd49c57e74c0937bf4bea0986168086debb"
},
"./demos/structured-output-node": {
"package_json_hash": "f64cc27508f9dda6fbb3bf4192c031dcc671e64a"
},
"./demos/text-generation": {
"package_json_hash": "d52767521e285b05c3235eaf2c8cc0e47fdbf90d"
},
"./demos/text-generation-stream": {
"package_json_hash": "f8272f5b1f5f1c83c53395dfc76646cab18a32b7"
},
"./demos/tool-calling": {
"package_json_hash": "3a0b1d91022d706b96e7b429c1349116ba9373b5"
},
"./demos/tool-calling-stream": {
"package_json_hash": "7c92250cda46aaac7eb6aeea0255828781c4abcb"
},
"./demos/tool-calling-stream-traditional": {
"package_json_hash": "c610c334d5f53a6e399bddddf68098ca0dec96d7"
},
"./demos/ui-worker": {
"package_json_hash": "831702fff4771ce9ce7d93afe6824ec6fa316125"
},
"./demos/remote-mcp-cf-access": {
"package_json_hash": "1a09d449c88cfe3b989f352d18813385578b98ca"
},
"./demos/remote-mcp-authless": {
"package_json_hash": "ba9953ce57a26cb271144e67609ed98fd1c1110e"
},
"./demos/python-workers-mcp": {
"package_json_hash": "0e710d7b27bb34edba396dc2b3365db230c076cb"
},
"./demos/vision": {
"package_json_hash": "e53450d50753f0574995feef3b2f845045fc3dc3"
},
"./demos/remote-mcp-google-oauth": {
"package_json_hash": "21bdab2ebbbe336c5fe6fb032fde804373f1b489"
},
"./demos/remote-mcp-logto": {
"package_json_hash": "a98a0cb367641ff86d89a7127f5e2551d2a1532f"
},
"./demos/remote-mcp-server-descope-auth": {
"package_json_hash": "c5de845803aae734fa60185200d4bfa2e1d0fb23"
},
"./demos/remote-mcp-server-autorag": {
"package_json_hash": "2b4e9b35192362b3be2743370469ce3a627a72b0"
},
"./demos/use-mcp-inspector": {
"package_json_hash": "d1d084f1aa9a752ead5250e0a070f97a9114dcea"
},
"./demos/hello-world": {
"package_json_hash": "ab24a12893c001fe3416fadea2a8bf5e7e68392e"
}
}
}
9 changes: 9 additions & 0 deletions packages/tanstack-ai/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -275,6 +275,15 @@ Workers AI supports four configuration modes:

Third-party providers (OpenAI, Anthropic, Gemini, Grok, OpenRouter) only support the gateway modes.

All Workers AI config modes also accept `sessionAffinity` to route requests with the same key to the same backend replica for prefix-cache optimization:

```typescript
const adapter = createWorkersAiChat("@cf/meta/llama-3.3-70b-instruct-fp8-fast", {
binding: env.AI,
sessionAffinity: "my-unique-session-id",
});
```

## Links

- [TanStack AI Documentation](https://tanstack.com/ai)
Expand Down
19 changes: 12 additions & 7 deletions packages/tanstack-ai/src/adapters/workers-ai.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,11 +33,18 @@ export type WorkersAiTextModel =
function buildWorkersAiClient(config: WorkersAiAdapterConfig): OpenAI {
validateWorkersAiConfig(config);

const sessionHeaders: Record<string, string> | undefined = config.sessionAffinity
? { "x-session-affinity": config.sessionAffinity }
: undefined;

if (isDirectBindingConfig(config)) {
// Plain binding mode: shim translates OpenAI fetch calls to env.AI.run()
return new OpenAI({
apiKey: "unused",
fetch: createWorkersAiBindingFetch(config.binding),
fetch: createWorkersAiBindingFetch(
config.binding,
sessionHeaders ? { extraHeaders: sessionHeaders } : undefined,
),
});
}

Expand All @@ -46,13 +53,14 @@ function buildWorkersAiClient(config: WorkersAiAdapterConfig): OpenAI {
return new OpenAI({
baseURL: `https://api.cloudflare.com/client/v4/accounts/${config.accountId}/ai/v1`,
apiKey: config.apiKey,
defaultHeaders: sessionHeaders,
});
}

// Gateway mode (existing): use createGatewayFetch
const gatewayConfig = config as AiGatewayAdapterConfig;
return new OpenAI({
fetch: createGatewayFetch("workers-ai", gatewayConfig),
fetch: createGatewayFetch("workers-ai", gatewayConfig, sessionHeaders),
apiKey: gatewayConfig.apiKey ?? "unused",
});
}
Expand Down Expand Up @@ -377,11 +385,8 @@ export class WorkersAiTextAdapter<TModel extends WorkersAiTextModel> extends Bas

// Reasoning content (used by models like QwQ, DeepSeek R1, Kimi K2.5)
// The OpenAI SDK doesn't type this field, but models send it as an extension.
const reasoningContent = ((delta as Record<string, unknown>)
.reasoning_content ??
(delta as Record<string, unknown>).reasoning) as
| string
| undefined;
const reasoningContent = ((delta as Record<string, unknown>).reasoning_content ??
(delta as Record<string, unknown>).reasoning) as string | undefined;
if (reasoningContent) {
// RUN_STARTED is already guaranteed by the guard above
if (!hasEmittedStepStarted) {
Expand Down
22 changes: 18 additions & 4 deletions packages/tanstack-ai/src/utils/create-fetcher.ts
Original file line number Diff line number Diff line change
Expand Up @@ -105,10 +105,17 @@ export interface WorkersAiDirectCredentialsConfig {
* upstream provider), distinct from `cfApiKey` (used in the `cf-aig-authorization`
* header for authenticated gateways).
*/
export type WorkersAiAdapterConfig =
export type WorkersAiAdapterConfig = (
| WorkersAiDirectBindingConfig
| WorkersAiDirectCredentialsConfig
| (AiGatewayAdapterConfig & { apiKey?: string });
| (AiGatewayAdapterConfig & { apiKey?: string })
) & {
/**
* Session affinity key for prefix-cache optimization.
* Routes requests with the same key to the same backend replica.
*/
sessionAffinity?: string;
};

// ---------------------------------------------------------------------------
// Config detection helpers
Expand Down Expand Up @@ -330,7 +337,10 @@ function sanitizeToolCallId(id: string): string {
* request parameters are extracted from the JSON body, matching Workers AI's
* `binding.run(model, inputs)` calling convention.
*/
export function createWorkersAiBindingFetch(binding: WorkersAiBinding): typeof fetch {
export function createWorkersAiBindingFetch(
binding: WorkersAiBinding,
options?: { extraHeaders?: Record<string, string> },
): typeof fetch {
return async (_input, init) => {
if (!init?.body) {
return new Response("No body", { status: 400 });
Expand Down Expand Up @@ -359,7 +369,11 @@ export function createWorkersAiBindingFetch(binding: WorkersAiBinding): typeof f
if (body.response_format) inputs.response_format = body.response_format;
if (stream) inputs.stream = true;

const result = await binding.run(model, inputs);
const result = await binding.run(
model,
inputs,
options?.extraHeaders ? { extraHeaders: options.extraHeaders } : undefined,
);

if (stream && result instanceof ReadableStream) {
// Workers AI returns an SSE stream with `data: {"response":"chunk"}` format.
Expand Down
38 changes: 38 additions & 0 deletions packages/tanstack-ai/test/binding-fetch.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -545,6 +545,44 @@ describe("createWorkersAiBindingFetch", () => {
expect(json.choices[0]!.finish_reason).toBe("stop");
});

it("should forward extraHeaders to binding.run() when configured", async () => {
const binding = mockBinding(vi.fn().mockResolvedValue({ response: "ok" }));

const fetcher = createWorkersAiBindingFetch(binding, {
extraHeaders: { "x-session-affinity": "session-123" },
});

await fetcher("https://api.openai.com/v1/chat/completions", {
method: "POST",
body: JSON.stringify({
model: "@cf/meta/llama-3.3-70b-instruct-fp8-fast",
messages: [{ role: "user", content: "Hi" }],
}),
});

expect(binding.run).toHaveBeenCalledOnce();
const [, , options] = binding.run.mock.calls[0]!;
expect(options).toEqual({ extraHeaders: { "x-session-affinity": "session-123" } });
});

it("should not pass extraHeaders to binding.run() when not configured", async () => {
const binding = mockBinding(vi.fn().mockResolvedValue({ response: "ok" }));

const fetcher = createWorkersAiBindingFetch(binding);

await fetcher("https://api.openai.com/v1/chat/completions", {
method: "POST",
body: JSON.stringify({
model: "@cf/meta/llama-3.3-70b-instruct-fp8-fast",
messages: [{ role: "user", content: "Hi" }],
}),
});

expect(binding.run).toHaveBeenCalledOnce();
const [, , options] = binding.run.mock.calls[0]!;
expect(options).toBeUndefined();
});

it("should pass response_format to binding for structured output", async () => {
const binding = mockBinding(vi.fn().mockResolvedValue({ response: '{"name":"test"}' }));

Expand Down
Loading
Loading