innFactory · pull · May 1, 2026 · May 1, 2026
diff --git a/package-lock.json b/package-lock.json
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
   "name": "@librechat/agents",
-  "version": "3.1.74",
+  "version": "3.1.75",
   "main": "./dist/cjs/main.cjs",
   "module": "./dist/esm/main.mjs",
   "types": "./dist/types/index.d.ts",

diff --git a/src/agents/AgentContext.ts b/src/agents/AgentContext.ts
@@ -20,6 +20,16 @@ import { addCacheControl } from '@/messages/cache';
 import { DEFAULT_RESERVE_RATIO } from '@/messages';
 import { toJsonSchema } from '@/utils/schema';
 
+type AgentSystemTextBlock = {
+  type: 'text';
+  text: string;
+  cache_control?: { type: 'ephemeral' };
+};
+
+type AgentSystemContentBlock =
+  | AgentSystemTextBlock
+  | { cachePoint: { type: 'default' } };
+
 /**
  * Encapsulates agent-specific state that can vary between agents in a multi-agent system
  */
@@ -249,7 +259,7 @@ export class AgentContext {
   private summaryTokenCount: number = 0;
   /**
    * Where the summary should be injected:
-   * - `'system_prompt'`: cross-run summary, included in `buildInstructionsString`
+   * - `'system_prompt'`: cross-run summary, included in the dynamic system tail
    * - `'user_message'`: mid-run compaction, injected as HumanMessage on clean slate
    * - `'none'`: no summary present
    */
@@ -417,7 +427,8 @@ export class AgentContext {
 
   /**
    * Gets the system runnable, creating it lazily if needed.
-   * Includes instructions, additional instructions, and programmatic-only tools documentation.
+   * Includes stable instructions, dynamic additional instructions, and
+   * programmatic-only tools documentation.
    * Only rebuilds when marked stale (via markToolsAsDiscovered).
    */
   get systemRunnable():
@@ -431,8 +442,10 @@ export class AgentContext {
       return this.cachedSystemRunnable;
     }
 
-    const instructionsString = this.buildInstructionsString();
-    this.cachedSystemRunnable = this.buildSystemRunnable(instructionsString);
+    this.cachedSystemRunnable = this.buildSystemRunnable({
+      stableInstructions: this.buildStableInstructionsString(),
+      dynamicInstructions: this.buildDynamicInstructionsString(),
+    });
     this.systemRunnableStale = false;
     return this.cachedSystemRunnable;
   }
@@ -443,17 +456,19 @@ export class AgentContext {
    */
   initializeSystemRunnable(): void {
     if (this.systemRunnableStale || this.cachedSystemRunnable === undefined) {
-      const instructionsString = this.buildInstructionsString();
-      this.cachedSystemRunnable = this.buildSystemRunnable(instructionsString);
+      this.cachedSystemRunnable = this.buildSystemRunnable({
+        stableInstructions: this.buildStableInstructionsString(),
+        dynamicInstructions: this.buildDynamicInstructionsString(),
+      });
       this.systemRunnableStale = false;
     }
   }
 
   /**
-   * Builds the raw instructions string (without creating SystemMessage).
+   * Builds the cacheable instructions string (without creating SystemMessage).
    * Includes agent identity preamble and handoff context when available.
    */
-  private buildInstructionsString(): string {
+  private buildStableInstructionsString(): string {
     const parts: string[] = [];
 
     const identityPreamble = this.buildIdentityPreamble();
@@ -465,21 +480,33 @@ export class AgentContext {
       parts.push(this.instructions);
     }
 
+    const programmaticToolsDoc = this.buildProgrammaticOnlyToolsInstructions();
+    if (programmaticToolsDoc) {
+      parts.push(programmaticToolsDoc);
+    }
+
+    return parts.join('\n\n');
+  }
+
+  /**
+   * Builds the dynamic system-tail string (without creating SystemMessage).
+   * Keep this out of prompt-cache-marked content so volatile context does not
+   * invalidate the stable prefix.
+   */
+  private buildDynamicInstructionsString(): string {
+    const parts: string[] = [];
+
     if (
       this.additionalInstructions != null &&
       this.additionalInstructions !== ''
     ) {
       parts.push(this.additionalInstructions);
     }
 
-    const programmaticToolsDoc = this.buildProgrammaticOnlyToolsInstructions();
-    if (programmaticToolsDoc) {
-      parts.push(programmaticToolsDoc);
-    }
-
-    // Cross-run summary: include in system prompt so the model has context
-    // from the prior run.  Mid-run summaries are injected as a HumanMessage
-    // on the post-compaction clean slate instead (see buildSystemRunnable).
+    // Cross-run summary: include in the system tail so the model has context
+    // from the prior run without invalidating the cacheable prefix. Mid-run
+    // summaries are injected as a HumanMessage on the post-compaction clean
+    // slate instead (see buildSystemRunnable).
     if (
       this._summaryLocation === 'system_prompt' &&
       this.summaryText != null &&
@@ -523,9 +550,13 @@ export class AgentContext {
    * Build system runnable from pre-built instructions string.
    * Only called when content has actually changed.
    */
-  private buildSystemRunnable(
-    instructionsString: string
-  ):
+  private buildSystemRunnable({
+    stableInstructions,
+    dynamicInstructions,
+  }: {
+    stableInstructions: string;
+    dynamicInstructions: string;
+  }):
     | Runnable<
         BaseMessage[],
         (BaseMessage | SystemMessage)[],
@@ -537,35 +568,17 @@ export class AgentContext {
       this.summaryText != null &&
       this.summaryText !== '';
 
-    if (!instructionsString && !hasMidRunSummary) {
+    if (!stableInstructions && !dynamicInstructions && !hasMidRunSummary) {
       this.systemMessageTokens = 0;
       return undefined;
     }
 
-    let finalInstructions: string | BaseMessageFields = instructionsString;
-
-    let usePromptCache = false;
-    if (this.provider === Providers.ANTHROPIC) {
-      const anthropicOptions = this.clientOptions as
-        | t.AnthropicClientOptions
-        | undefined;
-      if (anthropicOptions?.promptCache === true) {
-        usePromptCache = true;
-        finalInstructions = {
-          content: [
-            {
-              type: 'text',
-              text: instructionsString,
-              cache_control: { type: 'ephemeral' },
-            },
-          ],
-        };
-      }
-    }
-
-    const systemMessage = instructionsString
-      ? new SystemMessage(finalInstructions)
-      : undefined;
+    const usePromptCache = this.hasAnthropicPromptCache();
+    const systemMessage = this.buildSystemMessage({
+      stableInstructions,
+      dynamicInstructions,
+      usePromptCache,
+    });
 
     if (this.tokenCounter) {
       this.systemMessageTokens = systemMessage
@@ -615,6 +628,72 @@ export class AgentContext {
     }).withConfig({ runName: 'prompt' });
   }
 
+  private hasAnthropicPromptCache(): boolean {
+    if (this.provider !== Providers.ANTHROPIC) {
+      return false;
+    }
+    const anthropicOptions = this.clientOptions as
+      | t.AnthropicClientOptions
+      | undefined;
+    return anthropicOptions?.promptCache === true;
+  }
+
+  private hasBedrockPromptCache(): boolean {
+    if (this.provider !== Providers.BEDROCK) {
+      return false;
+    }
+    const bedrockOptions = this.clientOptions as
+      | t.BedrockAnthropicClientOptions
+      | undefined;
+    return bedrockOptions?.promptCache === true;
+  }
+
+  private buildSystemMessage({
+    stableInstructions,
+    dynamicInstructions,
+    usePromptCache,
+  }: {
+    stableInstructions: string;
+    dynamicInstructions: string;
+    usePromptCache: boolean;
+  }): SystemMessage | undefined {
+    if (!stableInstructions && !dynamicInstructions) {
+      return undefined;
+    }
+
+    if (usePromptCache) {
+      const content: AgentSystemContentBlock[] = [];
+      if (stableInstructions) {
+        content.push({
+          type: 'text',
+          text: stableInstructions,
+          cache_control: { type: 'ephemeral' },
+        });
+      }
+      if (dynamicInstructions) {
+        content.push({ type: 'text', text: dynamicInstructions });
+      }
+      return new SystemMessage({ content } as BaseMessageFields);
+    }
+
+    if (this.hasBedrockPromptCache() && stableInstructions) {
+      const content: AgentSystemContentBlock[] = [
+        { type: 'text', text: stableInstructions },
+        { cachePoint: { type: 'default' } },
+      ];
+      if (dynamicInstructions) {
+        content.push({ type: 'text', text: dynamicInstructions });
+      }
+      return new SystemMessage({ content } as BaseMessageFields);
+    }
+
+    return new SystemMessage(
+      [stableInstructions, dynamicInstructions]
+        .filter((part) => part !== '')
+        .join('\n\n')
+    );
+  }
+
   /**
    * Reset context for a new run
    */

diff --git a/src/agents/__tests__/AgentContext.anthropic.live.test.ts b/src/agents/__tests__/AgentContext.anthropic.live.test.ts
@@ -0,0 +1,116 @@
+// src/agents/__tests__/AgentContext.anthropic.live.test.ts
+/**
+ * Live Anthropic prompt-cache verification.
+ *
+ * Run with:
+ * RUN_ANTHROPIC_PROMPT_CACHE_LIVE_TESTS=1 ANTHROPIC_API_KEY=... npm test -- AgentContext.anthropic.live.test.ts --runInBand
+ */
+import { config as dotenvConfig } from 'dotenv';
+dotenvConfig();
+
+import { describe, expect, it } from '@jest/globals';
+import type * as t from '@/types';
+import {
+  runLiveTurn,
+  assertSystemPayloadShape,
+  buildDynamicInstructions,
+  buildStableInstructions,
+  waitForCachePropagation,
+} from './promptCacheLiveHelpers';
+import { Providers } from '@/common';
+
+const shouldRunLive =
+  process.env.RUN_ANTHROPIC_PROMPT_CACHE_LIVE_TESTS === '1' &&
+  process.env.ANTHROPIC_API_KEY != null &&
+  process.env.ANTHROPIC_API_KEY !== '';
+
+const describeIfLive = shouldRunLive ? describe : describe.skip;
+
+const modelName =
+  process.env.ANTHROPIC_PROMPT_CACHE_MODEL ?? 'claude-sonnet-4-5';
+const providerLabel = 'Anthropic';
+
+function createClientOptions(): t.AnthropicClientOptions {
+  return {
+    modelName,
+    temperature: 0,
+    maxTokens: 8,
+    streaming: true,
+    streamUsage: true,
+    promptCache: true,
+    clientOptions: {
+      defaultHeaders: {
+        'anthropic-beta': 'prompt-caching-2024-07-31',
+      },
+    },
+  };
+}
+
+describeIfLive('AgentContext Anthropic prompt cache live API', () => {
+  it('caches only the stable system prefix while dynamic tail changes', async () => {
+    const nonce = `agent-cache-live-${Date.now()}`;
+    const clientOptions = createClientOptions();
+    const stableInstructions = buildStableInstructions({
+      nonce,
+      providerLabel,
+    });
+    const firstDynamicInstructions = buildDynamicInstructions({
+      marker: 'alpha',
+      tailDescription:
+        'The Dynamic Marker line is runtime context and must remain outside the cached prefix.',
+    });
+    const secondDynamicInstructions = buildDynamicInstructions({
+      marker: 'bravo',
+      tailDescription:
+        'The Dynamic Marker line is runtime context and must remain outside the cached prefix.',
+    });
+
+    await assertSystemPayloadShape({
+      agentId: 'live-cache-shape-check',
+      provider: Providers.ANTHROPIC,
+      clientOptions,
+      stableInstructions,
+      dynamicInstructions: firstDynamicInstructions,
+      expectedContent: [
+        {
+          type: 'text',
+          text: stableInstructions,
+          cache_control: { type: 'ephemeral' },
+        },
+        {
+          type: 'text',
+          text: firstDynamicInstructions,
+        },
+      ],
+    });
+
+    const first = await runLiveTurn({
+      provider: Providers.ANTHROPIC,
+      providerLabel,
+      clientOptions,
+      runId: `${nonce}-first`,
+      threadId: `${nonce}-thread`,
+      stableInstructions,
+      dynamicInstructions: firstDynamicInstructions,
+    });
+
+    expect(first.text.toLowerCase()).toContain('alpha');
+    expect(first.usage.input_token_details?.cache_creation).toBeGreaterThan(0);
+    expect(first.usage.input_token_details?.cache_read ?? 0).toBe(0);
+
+    await waitForCachePropagation();
+
+    const second = await runLiveTurn({
+      provider: Providers.ANTHROPIC,
+      providerLabel,
+      clientOptions,
+      runId: `${nonce}-second`,
+      threadId: `${nonce}-thread`,
+      stableInstructions,
+      dynamicInstructions: secondDynamicInstructions,
+    });
+
+    expect(second.text.toLowerCase()).toContain('bravo');
+    expect(second.usage.input_token_details?.cache_read).toBeGreaterThan(0);
+  }, 120_000);
+});