From 4dda38832faa156bb46453d6a088b95a167dface Mon Sep 17 00:00:00 2001 From: Thomas Kosiewski Date: Tue, 25 Nov 2025 13:46:17 +0100 Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20fix:=20enable=20visible=20reason?= =?UTF-8?q?ing=20for=20Opus=204.5=20with=20configurable=20effort?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Opus 4.5 now supports all thinking levels with proper effort mapping: - off: effort='low', no thinking (fast, cheap, no visible reasoning) - low: effort='low', thinking enabled (visible reasoning, budget-conscious) - medium: effort='medium', thinking enabled - high: effort='high', thinking enabled Previously Opus 4.5 only passed the effort parameter without the thinking parameter, which meant no visible reasoning traces were returned. Also removed the policy restriction that prevented 'off' for Opus 4.5 - users can now disable visible reasoning while still getting efficient responses via effort='low'. _Generated with mux_ Change-Id: I05239192b38babf22c7ca980d9380da718608be2 Signed-off-by: Thomas Kosiewski --- .../components/ThinkingSlider.stories.tsx | 15 ++++++------- src/browser/utils/thinking/policy.test.ts | 19 ++++++++-------- src/browser/utils/thinking/policy.ts | 15 +------------ src/common/types/thinking.ts | 4 ++-- src/common/utils/ai/providerOptions.test.ts | 15 ++++++++++--- src/common/utils/ai/providerOptions.ts | 22 +++++++++++++++---- 6 files changed, 50 insertions(+), 40 deletions(-) diff --git a/src/browser/components/ThinkingSlider.stories.tsx b/src/browser/components/ThinkingSlider.stories.tsx index b25b7c786..a36bec358 100644 --- a/src/browser/components/ThinkingSlider.stories.tsx +++ b/src/browser/components/ThinkingSlider.stories.tsx @@ -48,9 +48,7 @@ export const DifferentModels: Story = {
-
- Claude Opus 4.5 (3 levels: low/medium/high) -
+
Claude Opus 4.5 (4 levels)
@@ -116,18 +114,19 @@ export const InteractiveDemo: Story = { }, }; -export const Opus45ThreeLevels: Story = { +export const Opus45AllLevels: Story = { args: { modelString: "anthropic:claude-opus-4-5" }, render: (args) => (
- Claude Opus 4.5 uses the effort parameter (low/medium/high only, no “off”): + Claude Opus 4.5 uses the effort parameter with optional extended thinking:
- • Low: Conservative token usage -
Medium: Balanced usage (default) -
High: Best results, more tokens + • Off: effort=“low”, no visible reasoning +
Low: effort=“low”, visible reasoning +
Medium: effort=“medium”, visible reasoning +
High: effort=“high”, visible reasoning
), diff --git a/src/browser/utils/thinking/policy.test.ts b/src/browser/utils/thinking/policy.test.ts index 974fca0c6..86bc4a903 100644 --- a/src/browser/utils/thinking/policy.test.ts +++ b/src/browser/utils/thinking/policy.test.ts @@ -33,13 +33,17 @@ describe("getThinkingPolicyForModel", () => { ]); }); - test("returns low/medium/high for Opus 4.5", () => { + test("returns all levels for Opus 4.5 (uses default policy)", () => { + // Opus 4.5 uses the default policy - no special case needed + // The effort parameter handles the "off" case by setting effort="low" expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5")).toEqual([ + "off", "low", "medium", "high", ]); expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5-20251101")).toEqual([ + "off", "low", "medium", "high", @@ -95,19 +99,16 @@ describe("enforceThinkingPolicy", () => { }); }); - describe("Opus 4.5 (no off option)", () => { - test("allows low/medium/high levels", () => { + describe("Opus 4.5 (all levels supported)", () => { + test("allows all levels including off", () => { + expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("off"); expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "low")).toBe("low"); expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "medium")).toBe("medium"); expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "high")).toBe("high"); }); - test("falls back to high when off is requested", () => { - expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("high"); - }); - - test("falls back to high when off is requested (versioned model)", () => { - expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("high"); + test("allows off for versioned model", () => { + expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("off"); }); }); }); diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts index 41c2fad4f..4346d9272 100644 --- a/src/browser/utils/thinking/policy.ts +++ b/src/browser/utils/thinking/policy.ts @@ -25,7 +25,6 @@ export type ThinkingPolicy = readonly ThinkingLevel[]; * * Rules: * - openai:gpt-5-pro → ["high"] (only supported level) - * - anthropic:claude-opus-4-5 → ["low", "medium", "high"] (effort parameter only) * - gemini-3 → ["low", "high"] (thinking level only) * - default → ["off", "low", "medium", "high"] (all levels selectable) * @@ -39,12 +38,6 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy { return ["high"]; } - // Claude Opus 4.5 only supports effort parameter: low, medium, high (no "off") - // Match "anthropic:" followed by "claude-opus-4-5" with optional version suffix - if (modelString.includes("opus-4-5")) { - return ["low", "medium", "high"]; - } - // Gemini 3 Pro only supports "low" and "high" reasoning levels if (modelString.includes("gemini-3")) { return ["low", "high"]; @@ -59,8 +52,7 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy { * * Fallback strategy: * 1. If requested level is allowed, use it - * 2. For Opus 4.5: prefer "high" (best experience for reasoning model) - * 3. Otherwise: prefer "medium" if allowed, else use first allowed level + * 2. Otherwise: prefer "medium" if allowed, else use first allowed level */ export function enforceThinkingPolicy( modelString: string, @@ -72,11 +64,6 @@ export function enforceThinkingPolicy( return requested; } - // Special case: Opus 4.5 defaults to "high" for best experience - if (modelString.includes("opus-4-5") && allowed.includes("high")) { - return "high"; - } - // Fallback: prefer "medium" if allowed, else use first allowed level return allowed.includes("medium") ? "medium" : allowed[0]; } diff --git a/src/common/types/thinking.ts b/src/common/types/thinking.ts index c2b025c87..f6283d067 100644 --- a/src/common/types/thinking.ts +++ b/src/common/types/thinking.ts @@ -42,8 +42,8 @@ export const ANTHROPIC_THINKING_BUDGETS: Record = { * * @see https://www.anthropic.com/news/claude-opus-4-5 */ -export const ANTHROPIC_EFFORT: Record = { - off: undefined, +export const ANTHROPIC_EFFORT: Record = { + off: "low", low: "low", medium: "medium", high: "high", diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts index 6283269f3..a62356bec 100644 --- a/src/common/utils/ai/providerOptions.test.ts +++ b/src/common/utils/ai/providerOptions.test.ts @@ -23,37 +23,46 @@ void mock.module("@/browser/utils/thinking/policy", () => ({ describe("buildProviderOptions - Anthropic", () => { describe("Opus 4.5 (effort parameter)", () => { - test("should use effort parameter for claude-opus-4-5", () => { + test("should use effort and thinking parameters for claude-opus-4-5", () => { const result = buildProviderOptions("anthropic:claude-opus-4-5", "medium"); expect(result).toEqual({ anthropic: { disableParallelToolUse: false, sendReasoning: true, + thinking: { + type: "enabled", + budgetTokens: 10000, // ANTHROPIC_THINKING_BUDGETS.medium + }, effort: "medium", }, }); }); - test("should use effort parameter for claude-opus-4-5-20251101", () => { + test("should use effort and thinking parameters for claude-opus-4-5-20251101", () => { const result = buildProviderOptions("anthropic:claude-opus-4-5-20251101", "high"); expect(result).toEqual({ anthropic: { disableParallelToolUse: false, sendReasoning: true, + thinking: { + type: "enabled", + budgetTokens: 20000, // ANTHROPIC_THINKING_BUDGETS.high + }, effort: "high", }, }); }); - test("should omit effort when thinking is off for Opus 4.5", () => { + test("should use effort 'low' with no thinking when off for Opus 4.5", () => { const result = buildProviderOptions("anthropic:claude-opus-4-5", "off"); expect(result).toEqual({ anthropic: { disableParallelToolUse: false, sendReasoning: true, + effort: "low", // "off" maps to effort: "low" for efficiency }, }); }); diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts index 5eb54540c..769acbd92 100644 --- a/src/common/utils/ai/providerOptions.ts +++ b/src/common/utils/ai/providerOptions.ts @@ -93,10 +93,16 @@ export function buildProviderOptions( const isOpus45 = modelName?.includes("opus-4-5") ?? false; if (isOpus45) { - // Opus 4.5: Use effort parameter for reasoning control - const effort = ANTHROPIC_EFFORT[effectiveThinking]; + // Opus 4.5: Use effort parameter AND optionally thinking for visible reasoning + // - "off" or "low" → effort: "low", no thinking (fast, no visible reasoning for off) + // - "low" → effort: "low", thinking enabled (visible reasoning) + // - "medium" → effort: "medium", thinking enabled + // - "high" → effort: "high", thinking enabled + const effortLevel = ANTHROPIC_EFFORT[effectiveThinking]; + const budgetTokens = ANTHROPIC_THINKING_BUDGETS[effectiveThinking]; log.debug("buildProviderOptions: Anthropic Opus 4.5 config", { - effort, + effort: effortLevel, + budgetTokens, thinkingLevel: effectiveThinking, }); @@ -104,9 +110,17 @@ export function buildProviderOptions( anthropic: { disableParallelToolUse: false, // Always enable concurrent tool execution sendReasoning: true, // Include reasoning traces in requests sent to the model + // Enable thinking to get visible reasoning traces (only when not "off") + // budgetTokens sets the ceiling; effort controls how eagerly tokens are spent + ...(budgetTokens > 0 && { + thinking: { + type: "enabled", + budgetTokens, + }, + }), // Use effort parameter (Opus 4.5 only) to control token spend // SDK auto-adds beta header "effort-2025-11-24" when effort is set - ...(effort && { effort }), + effort: effortLevel, }, }; log.debug("buildProviderOptions: Returning Anthropic Opus 4.5 options", options);