From 64cce7506f2af2f1f8ab8d1c0553440165e9bad7 Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 9 Dec 2025 18:54:29 -0600 Subject: [PATCH 1/2] fix: reduce flakiness in anthropic1MContext test Token limit errors should fail immediately, but Anthropic's actual limits are unreliable to test against (they may change without notice). Instead: - Remove flakey integration test that depended on Anthropic's 200k limit - Extracted buildAnthropicHeaders() into testable pure function - Added unit tests for header building logic in aiService.test.ts The unit tests verify we send the correct header; Anthropic's enforcement of that header is their responsibility, not ours. --- src/node/services/aiService.test.ts | 43 +++++++++- src/node/services/aiService.ts | 32 ++++++-- tests/ipc/anthropic1MContext.test.ts | 115 --------------------------- 3 files changed, 66 insertions(+), 124 deletions(-) delete mode 100644 tests/ipc/anthropic1MContext.test.ts diff --git a/src/node/services/aiService.test.ts b/src/node/services/aiService.test.ts index af96f3db7d..5bf14e8e88 100644 --- a/src/node/services/aiService.test.ts +++ b/src/node/services/aiService.test.ts @@ -3,7 +3,12 @@ // For now, the commandProcessor tests demonstrate our testing approach import { describe, it, expect, beforeEach } from "bun:test"; -import { AIService, normalizeAnthropicBaseURL } from "./aiService"; +import { + AIService, + normalizeAnthropicBaseURL, + buildAnthropicHeaders, + ANTHROPIC_1M_CONTEXT_HEADER, +} from "./aiService"; import { HistoryService } from "./historyService"; import { PartialService } from "./partialService"; import { InitStateManager } from "./initStateManager"; @@ -76,3 +81,39 @@ describe("normalizeAnthropicBaseURL", () => { ); }); }); + +describe("buildAnthropicHeaders", () => { + it("returns undefined when use1MContext is false and no existing headers", () => { + expect(buildAnthropicHeaders(undefined, false)).toBeUndefined(); + }); + + it("returns existing headers unchanged when use1MContext is false", () => { + const existing = { "x-custom": "value" }; + expect(buildAnthropicHeaders(existing, false)).toBe(existing); + }); + + it("returns existing headers unchanged when use1MContext is undefined", () => { + const existing = { "x-custom": "value" }; + expect(buildAnthropicHeaders(existing, undefined)).toBe(existing); + }); + + it("adds 1M context header when use1MContext is true and no existing headers", () => { + const result = buildAnthropicHeaders(undefined, true); + expect(result).toEqual({ "anthropic-beta": ANTHROPIC_1M_CONTEXT_HEADER }); + }); + + it("merges 1M context header with existing headers when use1MContext is true", () => { + const existing = { "x-custom": "value" }; + const result = buildAnthropicHeaders(existing, true); + expect(result).toEqual({ + "x-custom": "value", + "anthropic-beta": ANTHROPIC_1M_CONTEXT_HEADER, + }); + }); + + it("overwrites existing anthropic-beta header when use1MContext is true", () => { + const existing = { "anthropic-beta": "other-beta" }; + const result = buildAnthropicHeaders(existing, true); + expect(result).toEqual({ "anthropic-beta": ANTHROPIC_1M_CONTEXT_HEADER }); + }); +}); diff --git a/src/node/services/aiService.ts b/src/node/services/aiService.ts index 5f880ade60..3689d24dfc 100644 --- a/src/node/services/aiService.ts +++ b/src/node/services/aiService.ts @@ -214,6 +214,26 @@ export function normalizeAnthropicBaseURL(baseURL: string): string { return `${trimmed}/v1`; } +/** Header value for Anthropic 1M context beta */ +export const ANTHROPIC_1M_CONTEXT_HEADER = "context-1m-2025-08-07"; + +/** + * Build headers for Anthropic provider, optionally including the 1M context beta header. + * Exported for testing. + */ +export function buildAnthropicHeaders( + existingHeaders: Record | undefined, + use1MContext: boolean | undefined +): Record | undefined { + if (!use1MContext) { + return existingHeaders; + } + if (existingHeaders) { + return { ...existingHeaders, "anthropic-beta": ANTHROPIC_1M_CONTEXT_HEADER }; + } + return { "anthropic-beta": ANTHROPIC_1M_CONTEXT_HEADER }; +} + /** * Preload AI SDK provider modules to avoid race conditions in concurrent test environments. * This function loads @ai-sdk/anthropic, @ai-sdk/openai, and ollama-ai-provider-v2 eagerly @@ -447,14 +467,10 @@ export class AIService extends EventEmitter { : configWithApiKey; // Add 1M context beta header if requested - const use1MContext = muxProviderOptions?.anthropic?.use1MContext; - const existingHeaders = normalizedConfig.headers; - const headers = - use1MContext && existingHeaders - ? { ...existingHeaders, "anthropic-beta": "context-1m-2025-08-07" } - : use1MContext - ? { "anthropic-beta": "context-1m-2025-08-07" } - : existingHeaders; + const headers = buildAnthropicHeaders( + normalizedConfig.headers, + muxProviderOptions?.anthropic?.use1MContext + ); // Lazy-load Anthropic provider to reduce startup time const { createAnthropic } = await PROVIDER_REGISTRY.anthropic(); diff --git a/tests/ipc/anthropic1MContext.test.ts b/tests/ipc/anthropic1MContext.test.ts deleted file mode 100644 index 9fed7c567e..0000000000 --- a/tests/ipc/anthropic1MContext.test.ts +++ /dev/null @@ -1,115 +0,0 @@ -import { setupWorkspace, shouldRunIntegrationTests, validateApiKeys } from "./setup"; -import { - sendMessageWithModel, - createStreamCollector, - assertStreamSuccess, - buildLargeHistory, - modelString, -} from "./helpers"; - -// Skip all tests if TEST_INTEGRATION is not set -const describeIntegration = shouldRunIntegrationTests() ? describe : describe.skip; - -// Validate API keys before running tests -if (shouldRunIntegrationTests()) { - validateApiKeys(["ANTHROPIC_API_KEY"]); -} - -describeIntegration("Anthropic 1M context", () => { - test.concurrent( - "should handle larger context with 1M flag enabled vs standard limits", - async () => { - const { env, workspaceId, cleanup } = await setupWorkspace("anthropic"); - try { - // Build large conversation history to exceed 200k token limit - // Standard limit: 200k tokens - // 1M context: up to 1M tokens - // We need ~210k tokens to reliably exceed standard limit - // Using 20 messages of 50k chars = 1M chars ≈ 210k tokens (accounting for overhead) - await buildLargeHistory(workspaceId, env.config, { - messageSize: 50_000, - messageCount: 20, - textPrefix: "Context test: ", - }); - - // Phase 1: Try without 1M context flag - should fail with context limit error - const collectorWithout1M = createStreamCollector(env.orpc, workspaceId); - collectorWithout1M.start(); - - const resultWithout1M = await sendMessageWithModel( - env, - workspaceId, - "Summarize the context above in one word.", - modelString("anthropic", "claude-sonnet-4-5"), - { - providerOptions: { - anthropic: { - use1MContext: false, - }, - }, - } - ); - - expect(resultWithout1M.success).toBe(true); - - const resultType = await Promise.race([ - collectorWithout1M.waitForEvent("stream-end", 30000).then(() => "success"), - collectorWithout1M.waitForEvent("stream-error", 30000).then(() => "error"), - ]); - - // Should get an error due to exceeding 200k token limit - expect(resultType).toBe("error"); - const errorEvent = collectorWithout1M - .getEvents() - .find((e) => "type" in e && e.type === "stream-error") as { error: string } | undefined; - expect(errorEvent).toBeDefined(); - expect(errorEvent!.error).toMatch(/too long|200000|maximum/i); - collectorWithout1M.stop(); - - // Phase 2: Try WITH 1M context flag - // Should handle the large context better with beta header - const collectorWith1M = createStreamCollector(env.orpc, workspaceId); - collectorWith1M.start(); - - const resultWith1M = await sendMessageWithModel( - env, - workspaceId, - "Summarize the context above in one word.", - modelString("anthropic", "claude-sonnet-4-5"), - { - providerOptions: { - anthropic: { - use1MContext: true, - }, - }, - } - ); - - expect(resultWith1M.success).toBe(true); - - await collectorWith1M.waitForEvent("stream-end", 30000); - - // With 1M context, should succeed - assertStreamSuccess(collectorWith1M); - - const messageWith1M = collectorWith1M.getFinalMessage(); - expect(messageWith1M).toBeDefined(); - - // The key test: with 1M context, we should get a valid response - // that processed the large context - if (messageWith1M && "parts" in messageWith1M && Array.isArray(messageWith1M.parts)) { - const content = messageWith1M.parts - .filter((part) => part.type === "text") - .map((part) => (part as { text: string }).text) - .join(""); - // Should have some content (proves it processed the request) - expect(content.length).toBeGreaterThan(0); - } - collectorWith1M.stop(); - } finally { - await cleanup(); - } - }, - 60000 // 1 minute timeout - ); -}); From 69f3ee9378b94bd7f93344022aad2a9edeaf8456 Mon Sep 17 00:00:00 2001 From: Ammar Date: Tue, 9 Dec 2025 19:10:58 -0600 Subject: [PATCH 2/2] ci: retry