diff --git a/src/common/types/thinking.ts b/src/common/types/thinking.ts index 66cd536a6f..c2b025c876 100644 --- a/src/common/types/thinking.ts +++ b/src/common/types/thinking.ts @@ -14,17 +14,33 @@ export type ThinkingLevel = "off" | "low" | "medium" | "high"; export type ThinkingLevelOn = Exclude; /** - * Anthropic effort level mapping + * Anthropic thinking token budget mapping * - * Maps our unified thinking levels to Anthropic's effort parameter: - * - off: No effort specified (undefined) - * - low: Most efficient - significant token savings - * - medium: Balanced approach with moderate token savings - * - high: Maximum capability (default behavior) + * These heuristics balance thinking depth with response time and cost. + * Used for models that support extended thinking with budgetTokens + * (e.g., Sonnet 4.5, Haiku 4.5, Opus 4.1, etc.) * - * The effort parameter controls all token spend including thinking, - * text responses, and tool calls. Unlike budget_tokens, it doesn't require - * thinking to be explicitly enabled. + * - off: No extended thinking + * - low: Quick thinking for straightforward tasks (4K tokens) + * - medium: Standard thinking for moderate complexity (10K tokens) + * - high: Deep thinking for complex problems (20K tokens) + */ +export const ANTHROPIC_THINKING_BUDGETS: Record = { + off: 0, + low: 4000, + medium: 10000, + high: 20000, +}; + +/** + * Anthropic Opus 4.5 effort parameter mapping + * + * The effort parameter is a new feature ONLY available for Claude Opus 4.5. + * It controls how much computational work the model applies to each task. + * + * Other Anthropic models must use the thinking.budgetTokens approach instead. + * + * @see https://www.anthropic.com/news/claude-opus-4-5 */ export const ANTHROPIC_EFFORT: Record = { off: undefined, diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts new file mode 100644 index 0000000000..6283269f37 --- /dev/null +++ b/src/common/utils/ai/providerOptions.test.ts @@ -0,0 +1,119 @@ +/** + * Tests for provider options builder + */ + +import { describe, test, expect, mock } from "bun:test"; +import { buildProviderOptions } from "./providerOptions"; +import type { ThinkingLevel } from "@/common/types/thinking"; + +// Mock the log module to avoid console noise +void mock.module("@/node/services/log", () => ({ + log: { + debug: (): void => undefined, + info: (): void => undefined, + warn: (): void => undefined, + error: (): void => undefined, + }, +})); + +// Mock enforceThinkingPolicy to pass through +void mock.module("@/browser/utils/thinking/policy", () => ({ + enforceThinkingPolicy: (_model: string, level: ThinkingLevel) => level, +})); + +describe("buildProviderOptions - Anthropic", () => { + describe("Opus 4.5 (effort parameter)", () => { + test("should use effort parameter for claude-opus-4-5", () => { + const result = buildProviderOptions("anthropic:claude-opus-4-5", "medium"); + + expect(result).toEqual({ + anthropic: { + disableParallelToolUse: false, + sendReasoning: true, + effort: "medium", + }, + }); + }); + + test("should use effort parameter for claude-opus-4-5-20251101", () => { + const result = buildProviderOptions("anthropic:claude-opus-4-5-20251101", "high"); + + expect(result).toEqual({ + anthropic: { + disableParallelToolUse: false, + sendReasoning: true, + effort: "high", + }, + }); + }); + + test("should omit effort when thinking is off for Opus 4.5", () => { + const result = buildProviderOptions("anthropic:claude-opus-4-5", "off"); + + expect(result).toEqual({ + anthropic: { + disableParallelToolUse: false, + sendReasoning: true, + }, + }); + }); + }); + + describe("Other Anthropic models (thinking/budgetTokens)", () => { + test("should use thinking.budgetTokens for claude-sonnet-4-5", () => { + const result = buildProviderOptions("anthropic:claude-sonnet-4-5", "medium"); + + expect(result).toEqual({ + anthropic: { + disableParallelToolUse: false, + sendReasoning: true, + thinking: { + type: "enabled", + budgetTokens: 10000, + }, + }, + }); + }); + + test("should use thinking.budgetTokens for claude-opus-4-1", () => { + const result = buildProviderOptions("anthropic:claude-opus-4-1", "high"); + + expect(result).toEqual({ + anthropic: { + disableParallelToolUse: false, + sendReasoning: true, + thinking: { + type: "enabled", + budgetTokens: 20000, + }, + }, + }); + }); + + test("should use thinking.budgetTokens for claude-haiku-4-5", () => { + const result = buildProviderOptions("anthropic:claude-haiku-4-5", "low"); + + expect(result).toEqual({ + anthropic: { + disableParallelToolUse: false, + sendReasoning: true, + thinking: { + type: "enabled", + budgetTokens: 4000, + }, + }, + }); + }); + + test("should omit thinking when thinking is off for non-Opus 4.5", () => { + const result = buildProviderOptions("anthropic:claude-sonnet-4-5", "off"); + + expect(result).toEqual({ + anthropic: { + disableParallelToolUse: false, + sendReasoning: true, + }, + }); + }); + }); +}); diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index 93b33cb068..5eb54540ca 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -12,6 +12,7 @@ import type { MuxProviderOptions } from "@/common/types/providerOptions"; import type { ThinkingLevel } from "@/common/types/thinking"; import { ANTHROPIC_EFFORT, + ANTHROPIC_THINKING_BUDGETS, GEMINI_THINKING_BUDGETS, OPENAI_REASONING_EFFORT, OPENROUTER_REASONING_EFFORT, @@ -83,9 +84,39 @@ export function buildProviderOptions( // Build Anthropic-specific options if (provider === "anthropic") { - const effort = ANTHROPIC_EFFORT[effectiveThinking]; + // Extract model name from model string (e.g., "anthropic:claude-opus-4-5" -> "claude-opus-4-5") + const [, modelName] = modelString.split(":"); + + // Check if this is Opus 4.5 (supports effort parameter) + // Opus 4.5 uses the new "effort" parameter for reasoning control + // All other Anthropic models use the "thinking" parameter with budgetTokens + const isOpus45 = modelName?.includes("opus-4-5") ?? false; + + if (isOpus45) { + // Opus 4.5: Use effort parameter for reasoning control + const effort = ANTHROPIC_EFFORT[effectiveThinking]; + log.debug("buildProviderOptions: Anthropic Opus 4.5 config", { + effort, + thinkingLevel: effectiveThinking, + }); + + const options: ProviderOptions = { + anthropic: { + disableParallelToolUse: false, // Always enable concurrent tool execution + sendReasoning: true, // Include reasoning traces in requests sent to the model + // Use effort parameter (Opus 4.5 only) to control token spend + // SDK auto-adds beta header "effort-2025-11-24" when effort is set + ...(effort && { effort }), + }, + }; + log.debug("buildProviderOptions: Returning Anthropic Opus 4.5 options", options); + return options; + } + + // Other Anthropic models: Use thinking parameter with budgetTokens + const budgetTokens = ANTHROPIC_THINKING_BUDGETS[effectiveThinking]; log.debug("buildProviderOptions: Anthropic config", { - effort, + budgetTokens, thinkingLevel: effectiveThinking, }); @@ -93,9 +124,13 @@ export function buildProviderOptions( anthropic: { disableParallelToolUse: false, // Always enable concurrent tool execution sendReasoning: true, // Include reasoning traces in requests sent to the model - // Use effort parameter to control token spend (thinking, text, and tool calls) - // SDK auto-adds beta header "effort-2025-11-24" when effort is set - ...(effort && { effort }), + // Conditionally add thinking configuration (non-Opus 4.5 models) + ...(budgetTokens > 0 && { + thinking: { + type: "enabled", + budgetTokens, + }, + }), }, }; log.debug("buildProviderOptions: Returning Anthropic options", options); diff --git a/tests/ipcMain/sendMessage.reasoning.test.ts b/tests/ipcMain/sendMessage.reasoning.test.ts new file mode 100644 index 0000000000..10dc01218c --- /dev/null +++ b/tests/ipcMain/sendMessage.reasoning.test.ts @@ -0,0 +1,60 @@ +/** + * Integration tests for reasoning/thinking functionality across Anthropic models. + * Verifies Opus 4.5 uses `effort` and Sonnet 4.5 uses `thinking.budgetTokens`. + */ + +import { shouldRunIntegrationTests, validateApiKeys } from "./setup"; +import { sendMessage, assertStreamSuccess, waitForStreamSuccess } from "./helpers"; +import { createSharedRepo, cleanupSharedRepo, withSharedWorkspace } from "./sendMessageTestHelpers"; +import { KNOWN_MODELS } from "@/common/constants/knownModels"; + +const describeIntegration = shouldRunIntegrationTests() ? describe : describe.skip; + +if (shouldRunIntegrationTests()) { + validateApiKeys(["ANTHROPIC_API_KEY"]); +} + +beforeAll(createSharedRepo); +afterAll(cleanupSharedRepo); + +describeIntegration("Anthropic reasoning parameter tests", () => { + test.concurrent( + "Sonnet 4.5 with thinking (budgetTokens)", + async () => { + await withSharedWorkspace("anthropic", async ({ env, workspaceId }) => { + const result = await sendMessage( + env.mockIpcRenderer, + workspaceId, + "What is 2+2? Answer in one word.", + { model: KNOWN_MODELS.SONNET.id, thinkingLevel: "low" } + ); + expect(result.success).toBe(true); + + const collector = await waitForStreamSuccess(env.sentEvents, workspaceId, 30000); + assertStreamSuccess(collector); + expect(collector.getDeltas().length).toBeGreaterThan(0); + }); + }, + 60000 + ); + + test.concurrent( + "Opus 4.5 with thinking (effort)", + async () => { + await withSharedWorkspace("anthropic", async ({ env, workspaceId }) => { + const result = await sendMessage( + env.mockIpcRenderer, + workspaceId, + "What is 4+4? Answer in one word.", + { model: KNOWN_MODELS.OPUS.id, thinkingLevel: "low" } + ); + expect(result.success).toBe(true); + + const collector = await waitForStreamSuccess(env.sentEvents, workspaceId, 60000); + assertStreamSuccess(collector); + expect(collector.getDeltas().length).toBeGreaterThan(0); + }); + }, + 90000 + ); +});