Skip to content

Commit d4a78fc

Browse files
committed
fix: place Anthropic cache marker on last message instead of second-to-last
1 parent d7560e1 commit d4a78fc

File tree

2 files changed

+39
-26
lines changed

2 files changed

+39
-26
lines changed

src/common/utils/ai/cacheStrategy.test.ts

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -41,13 +41,22 @@ describe("cacheStrategy", () => {
4141
expect(result).toEqual(messages);
4242
});
4343

44-
it("should not modify messages if less than 2 messages", () => {
44+
it("should add cache control to single message for Anthropic models", () => {
4545
const messages: ModelMessage[] = [{ role: "user", content: "Hello" }];
4646
const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet");
47-
expect(result).toEqual(messages);
47+
expect(result[0]).toEqual({
48+
...messages[0],
49+
providerOptions: {
50+
anthropic: {
51+
cacheControl: {
52+
type: "ephemeral",
53+
},
54+
},
55+
},
56+
});
4857
});
4958

50-
it("should add cache control to second-to-last message for Anthropic models", () => {
59+
it("should add cache control to last message for Anthropic models", () => {
5160
const messages: ModelMessage[] = [
5261
{ role: "user", content: "Hello" },
5362
{ role: "assistant", content: "Hi there!" },
@@ -56,9 +65,10 @@ describe("cacheStrategy", () => {
5665
const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet");
5766

5867
expect(result[0]).toEqual(messages[0]); // First message unchanged
59-
expect(result[1]).toEqual({
60-
// Second message has cache control
61-
...messages[1],
68+
expect(result[1]).toEqual(messages[1]); // Second message unchanged
69+
expect(result[2]).toEqual({
70+
// Last message has cache control
71+
...messages[2],
6272
providerOptions: {
6373
anthropic: {
6474
cacheControl: {
@@ -67,7 +77,6 @@ describe("cacheStrategy", () => {
6777
},
6878
},
6979
});
70-
expect(result[2]).toEqual(messages[2]); // Last message unchanged
7180
});
7281

7382
it("should work with exactly 2 messages", () => {
@@ -77,9 +86,10 @@ describe("cacheStrategy", () => {
7786
];
7887
const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet");
7988

80-
expect(result[0]).toEqual({
81-
// First message gets cache control
82-
...messages[0],
89+
expect(result[0]).toEqual(messages[0]); // First message unchanged
90+
expect(result[1]).toEqual({
91+
// Last message gets cache control
92+
...messages[1],
8393
providerOptions: {
8494
anthropic: {
8595
cacheControl: {
@@ -88,7 +98,6 @@ describe("cacheStrategy", () => {
8898
},
8999
},
90100
});
91-
expect(result[1]).toEqual(messages[1]); // Last message unchanged
92101
});
93102

94103
it("should add cache control to last content part for array content", () => {
@@ -108,17 +117,24 @@ describe("cacheStrategy", () => {
108117
{ type: "text", text: "How can I help?" },
109118
],
110119
},
111-
{ role: "user", content: "Final question" },
120+
{
121+
role: "user",
122+
content: [
123+
{ type: "text", text: "Final" },
124+
{ type: "text", text: "question" },
125+
],
126+
},
112127
];
113128
const result = applyCacheControl(messages, "anthropic:claude-3-5-sonnet");
114129

115130
expect(result[0]).toEqual(messages[0]); // First message unchanged
131+
expect(result[1]).toEqual(messages[1]); // Second message unchanged
116132

117-
// Second message (array content): cache control on LAST content part only
118-
const secondMsg = result[1];
119-
expect(secondMsg.role).toBe("assistant");
120-
expect(Array.isArray(secondMsg.content)).toBe(true);
121-
const content = secondMsg.content as Array<{
133+
// Last message (array content): cache control on LAST content part only
134+
const lastMsg = result[2];
135+
expect(lastMsg.role).toBe("user");
136+
expect(Array.isArray(lastMsg.content)).toBe(true);
137+
const content = lastMsg.content as Array<{
122138
type: string;
123139
text: string;
124140
providerOptions?: unknown;
@@ -127,8 +143,6 @@ describe("cacheStrategy", () => {
127143
expect(content[1].providerOptions).toEqual({
128144
anthropic: { cacheControl: { type: "ephemeral" } },
129145
}); // Last part has cache control
130-
131-
expect(result[2]).toEqual(messages[2]); // Last message unchanged
132146
});
133147
});
134148

src/common/utils/ai/cacheStrategy.ts

Lines changed: 6 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -66,25 +66,24 @@ function addCacheControlToLastContentPart(msg: ModelMessage): ModelMessage {
6666

6767
/**
6868
* Apply cache control to messages for Anthropic models.
69-
* Caches all messages except the last user message for optimal cache hits.
69+
* Adds a cache marker to the last message so the entire conversation is cached.
7070
*
7171
* NOTE: The SDK requires providerOptions on content parts, not on the message.
72-
* We add cache_control to the last content part of the second-to-last message.
72+
* We add cache_control to the last content part of the last message.
7373
*/
7474
export function applyCacheControl(messages: ModelMessage[], modelString: string): ModelMessage[] {
7575
// Only apply cache control for Anthropic models
7676
if (!supportsAnthropicCache(modelString)) {
7777
return messages;
7878
}
7979

80-
// Need at least 2 messages to add a cache breakpoint
81-
if (messages.length < 2) {
80+
// Need at least 1 message to add a cache breakpoint
81+
if (messages.length < 1) {
8282
return messages;
8383
}
8484

85-
// Add cache breakpoint at the second-to-last message
86-
// This caches everything up to (but not including) the current user message
87-
const cacheIndex = messages.length - 2;
85+
// Add cache breakpoint at the last message
86+
const cacheIndex = messages.length - 1;
8887

8988
return messages.map((msg, index) => {
9089
if (index === cacheIndex) {

0 commit comments

Comments
 (0)