Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions package-lock.json

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@librechat/agents",
"version": "3.1.74",
"version": "3.1.75",
"main": "./dist/cjs/main.cjs",
"module": "./dist/esm/main.mjs",
"types": "./dist/types/index.d.ts",
Expand Down
167 changes: 123 additions & 44 deletions src/agents/AgentContext.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,16 @@ import { addCacheControl } from '@/messages/cache';
import { DEFAULT_RESERVE_RATIO } from '@/messages';
import { toJsonSchema } from '@/utils/schema';

type AgentSystemTextBlock = {
type: 'text';
text: string;
cache_control?: { type: 'ephemeral' };
};

type AgentSystemContentBlock =
| AgentSystemTextBlock
| { cachePoint: { type: 'default' } };

/**
* Encapsulates agent-specific state that can vary between agents in a multi-agent system
*/
Expand Down Expand Up @@ -249,7 +259,7 @@ export class AgentContext {
private summaryTokenCount: number = 0;
/**
* Where the summary should be injected:
* - `'system_prompt'`: cross-run summary, included in `buildInstructionsString`
* - `'system_prompt'`: cross-run summary, included in the dynamic system tail
* - `'user_message'`: mid-run compaction, injected as HumanMessage on clean slate
* - `'none'`: no summary present
*/
Expand Down Expand Up @@ -417,7 +427,8 @@ export class AgentContext {

/**
* Gets the system runnable, creating it lazily if needed.
* Includes instructions, additional instructions, and programmatic-only tools documentation.
* Includes stable instructions, dynamic additional instructions, and
* programmatic-only tools documentation.
* Only rebuilds when marked stale (via markToolsAsDiscovered).
*/
get systemRunnable():
Expand All @@ -431,8 +442,10 @@ export class AgentContext {
return this.cachedSystemRunnable;
}

const instructionsString = this.buildInstructionsString();
this.cachedSystemRunnable = this.buildSystemRunnable(instructionsString);
this.cachedSystemRunnable = this.buildSystemRunnable({
stableInstructions: this.buildStableInstructionsString(),
dynamicInstructions: this.buildDynamicInstructionsString(),
});
this.systemRunnableStale = false;
return this.cachedSystemRunnable;
}
Expand All @@ -443,17 +456,19 @@ export class AgentContext {
*/
initializeSystemRunnable(): void {
if (this.systemRunnableStale || this.cachedSystemRunnable === undefined) {
const instructionsString = this.buildInstructionsString();
this.cachedSystemRunnable = this.buildSystemRunnable(instructionsString);
this.cachedSystemRunnable = this.buildSystemRunnable({
stableInstructions: this.buildStableInstructionsString(),
dynamicInstructions: this.buildDynamicInstructionsString(),
});
this.systemRunnableStale = false;
}
}

/**
* Builds the raw instructions string (without creating SystemMessage).
* Builds the cacheable instructions string (without creating SystemMessage).
* Includes agent identity preamble and handoff context when available.
*/
private buildInstructionsString(): string {
private buildStableInstructionsString(): string {
const parts: string[] = [];

const identityPreamble = this.buildIdentityPreamble();
Expand All @@ -465,21 +480,33 @@ export class AgentContext {
parts.push(this.instructions);
}

const programmaticToolsDoc = this.buildProgrammaticOnlyToolsInstructions();
if (programmaticToolsDoc) {
parts.push(programmaticToolsDoc);
}

return parts.join('\n\n');
}

/**
* Builds the dynamic system-tail string (without creating SystemMessage).
* Keep this out of prompt-cache-marked content so volatile context does not
* invalidate the stable prefix.
*/
private buildDynamicInstructionsString(): string {
const parts: string[] = [];

if (
this.additionalInstructions != null &&
this.additionalInstructions !== ''
) {
parts.push(this.additionalInstructions);
}

const programmaticToolsDoc = this.buildProgrammaticOnlyToolsInstructions();
if (programmaticToolsDoc) {
parts.push(programmaticToolsDoc);
}

// Cross-run summary: include in system prompt so the model has context
// from the prior run. Mid-run summaries are injected as a HumanMessage
// on the post-compaction clean slate instead (see buildSystemRunnable).
// Cross-run summary: include in the system tail so the model has context
// from the prior run without invalidating the cacheable prefix. Mid-run
// summaries are injected as a HumanMessage on the post-compaction clean
// slate instead (see buildSystemRunnable).
if (
this._summaryLocation === 'system_prompt' &&
this.summaryText != null &&
Expand Down Expand Up @@ -523,9 +550,13 @@ export class AgentContext {
* Build system runnable from pre-built instructions string.
* Only called when content has actually changed.
*/
private buildSystemRunnable(
instructionsString: string
):
private buildSystemRunnable({
stableInstructions,
dynamicInstructions,
}: {
stableInstructions: string;
dynamicInstructions: string;
}):
| Runnable<
BaseMessage[],
(BaseMessage | SystemMessage)[],
Expand All @@ -537,35 +568,17 @@ export class AgentContext {
this.summaryText != null &&
this.summaryText !== '';

if (!instructionsString && !hasMidRunSummary) {
if (!stableInstructions && !dynamicInstructions && !hasMidRunSummary) {
this.systemMessageTokens = 0;
return undefined;
}

let finalInstructions: string | BaseMessageFields = instructionsString;

let usePromptCache = false;
if (this.provider === Providers.ANTHROPIC) {
const anthropicOptions = this.clientOptions as
| t.AnthropicClientOptions
| undefined;
if (anthropicOptions?.promptCache === true) {
usePromptCache = true;
finalInstructions = {
content: [
{
type: 'text',
text: instructionsString,
cache_control: { type: 'ephemeral' },
},
],
};
}
}

const systemMessage = instructionsString
? new SystemMessage(finalInstructions)
: undefined;
const usePromptCache = this.hasAnthropicPromptCache();
const systemMessage = this.buildSystemMessage({
stableInstructions,
dynamicInstructions,
usePromptCache,
});

if (this.tokenCounter) {
this.systemMessageTokens = systemMessage
Expand Down Expand Up @@ -615,6 +628,72 @@ export class AgentContext {
}).withConfig({ runName: 'prompt' });
}

private hasAnthropicPromptCache(): boolean {
if (this.provider !== Providers.ANTHROPIC) {
return false;
}
const anthropicOptions = this.clientOptions as
| t.AnthropicClientOptions
| undefined;
return anthropicOptions?.promptCache === true;
}

private hasBedrockPromptCache(): boolean {
if (this.provider !== Providers.BEDROCK) {
return false;
}
const bedrockOptions = this.clientOptions as
| t.BedrockAnthropicClientOptions
| undefined;
return bedrockOptions?.promptCache === true;
}

private buildSystemMessage({
stableInstructions,
dynamicInstructions,
usePromptCache,
}: {
stableInstructions: string;
dynamicInstructions: string;
usePromptCache: boolean;
}): SystemMessage | undefined {
if (!stableInstructions && !dynamicInstructions) {
return undefined;
}

if (usePromptCache) {
const content: AgentSystemContentBlock[] = [];
if (stableInstructions) {
content.push({
type: 'text',
text: stableInstructions,
cache_control: { type: 'ephemeral' },
});
}
if (dynamicInstructions) {
content.push({ type: 'text', text: dynamicInstructions });
}
return new SystemMessage({ content } as BaseMessageFields);
}

if (this.hasBedrockPromptCache() && stableInstructions) {
const content: AgentSystemContentBlock[] = [
{ type: 'text', text: stableInstructions },
{ cachePoint: { type: 'default' } },
];
if (dynamicInstructions) {
content.push({ type: 'text', text: dynamicInstructions });
}
return new SystemMessage({ content } as BaseMessageFields);
}

return new SystemMessage(
[stableInstructions, dynamicInstructions]
.filter((part) => part !== '')
.join('\n\n')
);
}

/**
* Reset context for a new run
*/
Expand Down
116 changes: 116 additions & 0 deletions src/agents/__tests__/AgentContext.anthropic.live.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
// src/agents/__tests__/AgentContext.anthropic.live.test.ts
/**
* Live Anthropic prompt-cache verification.
*
* Run with:
* RUN_ANTHROPIC_PROMPT_CACHE_LIVE_TESTS=1 ANTHROPIC_API_KEY=... npm test -- AgentContext.anthropic.live.test.ts --runInBand
*/
import { config as dotenvConfig } from 'dotenv';
dotenvConfig();

import { describe, expect, it } from '@jest/globals';
import type * as t from '@/types';
import {
runLiveTurn,
assertSystemPayloadShape,
buildDynamicInstructions,
buildStableInstructions,
waitForCachePropagation,
} from './promptCacheLiveHelpers';
import { Providers } from '@/common';

const shouldRunLive =
process.env.RUN_ANTHROPIC_PROMPT_CACHE_LIVE_TESTS === '1' &&
process.env.ANTHROPIC_API_KEY != null &&
process.env.ANTHROPIC_API_KEY !== '';

const describeIfLive = shouldRunLive ? describe : describe.skip;

const modelName =
process.env.ANTHROPIC_PROMPT_CACHE_MODEL ?? 'claude-sonnet-4-5';
const providerLabel = 'Anthropic';

function createClientOptions(): t.AnthropicClientOptions {
return {
modelName,
temperature: 0,
maxTokens: 8,
streaming: true,
streamUsage: true,
promptCache: true,
clientOptions: {
defaultHeaders: {
'anthropic-beta': 'prompt-caching-2024-07-31',
},
},
};
}

describeIfLive('AgentContext Anthropic prompt cache live API', () => {
it('caches only the stable system prefix while dynamic tail changes', async () => {
const nonce = `agent-cache-live-${Date.now()}`;
const clientOptions = createClientOptions();
const stableInstructions = buildStableInstructions({
nonce,
providerLabel,
});
const firstDynamicInstructions = buildDynamicInstructions({
marker: 'alpha',
tailDescription:
'The Dynamic Marker line is runtime context and must remain outside the cached prefix.',
});
const secondDynamicInstructions = buildDynamicInstructions({
marker: 'bravo',
tailDescription:
'The Dynamic Marker line is runtime context and must remain outside the cached prefix.',
});

await assertSystemPayloadShape({
agentId: 'live-cache-shape-check',
provider: Providers.ANTHROPIC,
clientOptions,
stableInstructions,
dynamicInstructions: firstDynamicInstructions,
expectedContent: [
{
type: 'text',
text: stableInstructions,
cache_control: { type: 'ephemeral' },
},
{
type: 'text',
text: firstDynamicInstructions,
},
],
});

const first = await runLiveTurn({
provider: Providers.ANTHROPIC,
providerLabel,
clientOptions,
runId: `${nonce}-first`,
threadId: `${nonce}-thread`,
stableInstructions,
dynamicInstructions: firstDynamicInstructions,
});

expect(first.text.toLowerCase()).toContain('alpha');
expect(first.usage.input_token_details?.cache_creation).toBeGreaterThan(0);
expect(first.usage.input_token_details?.cache_read ?? 0).toBe(0);

await waitForCachePropagation();

const second = await runLiveTurn({
provider: Providers.ANTHROPIC,
providerLabel,
clientOptions,
runId: `${nonce}-second`,
threadId: `${nonce}-thread`,
stableInstructions,
dynamicInstructions: secondDynamicInstructions,
});

expect(second.text.toLowerCase()).toContain('bravo');
expect(second.usage.input_token_details?.cache_read).toBeGreaterThan(0);
}, 120_000);
});
Loading
Loading