Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
52 changes: 33 additions & 19 deletions src/common/utils/ai/cacheStrategy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,22 @@ describe("cacheStrategy", () => {
expect(result).toEqual(messages);
});

it("should not modify messages if less than 2 messages", () => {
it("should add cache control to single message for Anthropic models", () => {
const messages: ModelMessage[] = [{ role: "user", content: "Hello" }];
const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet");
expect(result).toEqual(messages);
expect(result[0]).toEqual({
...messages[0],
providerOptions: {
anthropic: {
cacheControl: {
type: "ephemeral",
},
},
},
});
});

it("should add cache control to second-to-last message for Anthropic models", () => {
it("should add cache control to last message for Anthropic models", () => {
const messages: ModelMessage[] = [
{ role: "user", content: "Hello" },
{ role: "assistant", content: "Hi there!" },
Expand All @@ -56,9 +65,10 @@ describe("cacheStrategy", () => {
const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet");

expect(result[0]).toEqual(messages[0]); // First message unchanged
expect(result[1]).toEqual({
// Second message has cache control
...messages[1],
expect(result[1]).toEqual(messages[1]); // Second message unchanged
expect(result[2]).toEqual({
// Last message has cache control
...messages[2],
providerOptions: {
anthropic: {
cacheControl: {
Expand All @@ -67,7 +77,6 @@ describe("cacheStrategy", () => {
},
},
});
expect(result[2]).toEqual(messages[2]); // Last message unchanged
});

it("should work with exactly 2 messages", () => {
Expand All @@ -77,9 +86,10 @@ describe("cacheStrategy", () => {
];
const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet");

expect(result[0]).toEqual({
// First message gets cache control
...messages[0],
expect(result[0]).toEqual(messages[0]); // First message unchanged
expect(result[1]).toEqual({
// Last message gets cache control
...messages[1],
providerOptions: {
anthropic: {
cacheControl: {
Expand All @@ -88,7 +98,6 @@ describe("cacheStrategy", () => {
},
},
});
expect(result[1]).toEqual(messages[1]); // Last message unchanged
});

it("should add cache control to last content part for array content", () => {
Expand All @@ -108,17 +117,24 @@ describe("cacheStrategy", () => {
{ type: "text", text: "How can I help?" },
],
},
{ role: "user", content: "Final question" },
{
role: "user",
content: [
{ type: "text", text: "Final" },
{ type: "text", text: "question" },
],
},
];
const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet");

expect(result[0]).toEqual(messages[0]); // First message unchanged
expect(result[1]).toEqual(messages[1]); // Second message unchanged

// Second message (array content): cache control on LAST content part only
const secondMsg = result[1];
expect(secondMsg.role).toBe("assistant");
expect(Array.isArray(secondMsg.content)).toBe(true);
const content = secondMsg.content as Array<{
// Last message (array content): cache control on LAST content part only
const lastMsg = result[2];
expect(lastMsg.role).toBe("user");
expect(Array.isArray(lastMsg.content)).toBe(true);
const content = lastMsg.content as Array<{
type: string;
text: string;
providerOptions?: unknown;
Expand All @@ -127,8 +143,6 @@ describe("cacheStrategy", () => {
expect(content[1].providerOptions).toEqual({
anthropic: { cacheControl: { type: "ephemeral" } },
}); // Last part has cache control

expect(result[2]).toEqual(messages[2]); // Last message unchanged
});
});

Expand Down
13 changes: 6 additions & 7 deletions src/common/utils/ai/cacheStrategy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -66,25 +66,24 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {

/**
* Apply cache control to messages for Anthropic models.
* Caches all messages except the last user message for optimal cache hits.
* Adds a cache marker to the last message so the entire conversation is cached.
*
* NOTE: The SDK requires providerOptions on content parts, not on the message.
* We add cache_control to the last content part of the second-to-last message.
* We add cache_control to the last content part of the last message.
*/
export function applyCacheControl(messages: ModelMessage[], modelString: string): ModelMessage[] {
// Only apply cache control for Anthropic models
if (!supportsAnthropicCache(modelString)) {
return messages;
}

// Need at least 2 messages to add a cache breakpoint
if (messages.length < 2) {
// Need at least 1 message to add a cache breakpoint
if (messages.length < 1) {
return messages;
}

// Add cache breakpoint at the second-to-last message
// This caches everything up to (but not including) the current user message
const cacheIndex = messages.length - 2;
// Add cache breakpoint at the last message
const cacheIndex = messages.length - 1;

return messages.map((msg, index) => {
if (index === cacheIndex) {
Expand Down
12 changes: 6 additions & 6 deletions src/node/services/aiService.ts
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ if (typeof globalFetchWithExtras.certificate === "function") {
*
* Injects cache_control on:
* 1. Last tool (caches all tool definitions)
* 2. Second-to-last message's last content part (caches conversation history)
* 2. Last message's last content part (caches entire conversation)
*/
function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fetch {
const cachingFetch = async (
Expand All @@ -123,11 +123,11 @@ function wrapFetchWithAnthropicCacheControl(baseFetch: typeof fetch): typeof fet
lastTool.cache_control ??= { type: "ephemeral" };
}

// Inject cache_control on second-to-last message's last content part
// This caches conversation history up to (but not including) the current user message
if (Array.isArray(json.messages) && json.messages.length >= 2) {
const secondToLastMsg = json.messages[json.messages.length - 2] as Record<string, unknown>;
const content = secondToLastMsg.content;
// Inject cache_control on last message's last content part
// This caches the entire conversation
if (Array.isArray(json.messages) && json.messages.length >= 1) {
const lastMsg = json.messages[json.messages.length - 1] as Record<string, unknown>;
const content = lastMsg.content;

if (Array.isArray(content) && content.length > 0) {
// Array content: add cache_control to last part
Expand Down