🤖 fix: enable visible reasoning for Opus 4.5 with configurable effort

ThomasK33 · ThomasK33 · commit 4dda38832faa · 2025-11-25T14:51:48.000+01:00
Opus 4.5 now supports all thinking levels with proper effort mapping:
- off: effort='low', no thinking (fast, cheap, no visible reasoning)
- low: effort='low', thinking enabled (visible reasoning, budget-conscious)
- medium: effort='medium', thinking enabled
- high: effort='high', thinking enabled

Previously Opus 4.5 only passed the effort parameter without the thinking
parameter, which meant no visible reasoning traces were returned.

Also removed the policy restriction that prevented 'off' for Opus 4.5 -
users can now disable visible reasoning while still getting efficient
responses via effort='low'.

_Generated with mux_

Change-Id: I05239192b38babf22c7ca980d9380da718608be2
Signed-off-by: Thomas Kosiewski &lt;tk@coder.com&gt;
diff --git a/src/browser/components/ThinkingSlider.stories.tsx b/src/browser/components/ThinkingSlider.stories.tsx
@@ -48,9 +48,7 @@ export const DifferentModels: Story = {
       </div>
 
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">
-          Claude Opus 4.5 (3 levels: low/medium/high)
-        </div>
+        <div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.5 (4 levels)</div>
         <ThinkingSliderComponent modelString="anthropic:claude-opus-4-5" />
       </div>
 
@@ -116,18 +114,19 @@ export const InteractiveDemo: Story = {
   },
 };
 
-export const Opus45ThreeLevels: Story = {
+export const Opus45AllLevels: Story = {
   args: { modelString: "anthropic:claude-opus-4-5" },
   render: (args) => (
     <div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
       <div className="text-bright font-primary mb-2.5 text-[13px]">
-        Claude Opus 4.5 uses the effort parameter (low/medium/high only, no &ldquo;off&rdquo;):
+        Claude Opus 4.5 uses the effort parameter with optional extended thinking:
       </div>
       <ThinkingSliderComponent modelString={args.modelString} />
       <div className="text-muted-light font-primary mt-2.5 text-[11px]">
-        • <strong>Low</strong>: Conservative token usage
-        <br />• <strong>Medium</strong>: Balanced usage (default)
-        <br />• <strong>High</strong>: Best results, more tokens
+        • <strong>Off</strong>: effort=&ldquo;low&rdquo;, no visible reasoning
+        <br />• <strong>Low</strong>: effort=&ldquo;low&rdquo;, visible reasoning
+        <br />• <strong>Medium</strong>: effort=&ldquo;medium&rdquo;, visible reasoning
+        <br />• <strong>High</strong>: effort=&ldquo;high&rdquo;, visible reasoning
       </div>
     </div>
   ),
diff --git a/src/browser/utils/thinking/policy.test.ts b/src/browser/utils/thinking/policy.test.ts
@@ -33,13 +33,17 @@ describe("getThinkingPolicyForModel", () => {
     ]);
   });
 
-  test("returns low/medium/high for Opus 4.5", () => {
+  test("returns all levels for Opus 4.5 (uses default policy)", () => {
+    // Opus 4.5 uses the default policy - no special case needed
+    // The effort parameter handles the "off" case by setting effort="low"
     expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5")).toEqual([
+      "off",
       "low",
       "medium",
       "high",
     ]);
     expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5-20251101")).toEqual([
+      "off",
       "low",
       "medium",
       "high",
@@ -95,19 +99,16 @@ describe("enforceThinkingPolicy", () => {
     });
   });
 
-  describe("Opus 4.5 (no off option)", () => {
-    test("allows low/medium/high levels", () => {
+  describe("Opus 4.5 (all levels supported)", () => {
+    test("allows all levels including off", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("off");
       expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "low")).toBe("low");
       expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "medium")).toBe("medium");
       expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "high")).toBe("high");
     });
 
-    test("falls back to high when off is requested", () => {
-      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("high");
-    });
-
-    test("falls back to high when off is requested (versioned model)", () => {
-      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("high");
+    test("allows off for versioned model", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("off");
     });
   });
 });
diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts
@@ -25,7 +25,6 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
  *
  * Rules:
  * - openai:gpt-5-pro → ["high"] (only supported level)
- * - anthropic:claude-opus-4-5 → ["low", "medium", "high"] (effort parameter only)
  * - gemini-3 → ["low", "high"] (thinking level only)
  * - default → ["off", "low", "medium", "high"] (all levels selectable)
  *
@@ -39,12 +38,6 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
     return ["high"];
   }
 
-  // Claude Opus 4.5 only supports effort parameter: low, medium, high (no "off")
-  // Match "anthropic:" followed by "claude-opus-4-5" with optional version suffix
-  if (modelString.includes("opus-4-5")) {
-    return ["low", "medium", "high"];
-  }
-
   // Gemini 3 Pro only supports "low" and "high" reasoning levels
   if (modelString.includes("gemini-3")) {
     return ["low", "high"];
@@ -59,8 +52,7 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
  *
  * Fallback strategy:
  * 1. If requested level is allowed, use it
- * 2. For Opus 4.5: prefer "high" (best experience for reasoning model)
- * 3. Otherwise: prefer "medium" if allowed, else use first allowed level
+ * 2. Otherwise: prefer "medium" if allowed, else use first allowed level
  */
 export function enforceThinkingPolicy(
   modelString: string,
@@ -72,11 +64,6 @@ export function enforceThinkingPolicy(
     return requested;
   }
 
-  // Special case: Opus 4.5 defaults to "high" for best experience
-  if (modelString.includes("opus-4-5") && allowed.includes("high")) {
-    return "high";
-  }
-
   // Fallback: prefer "medium" if allowed, else use first allowed level
   return allowed.includes("medium") ? "medium" : allowed[0];
 }
diff --git a/src/common/types/thinking.ts b/src/common/types/thinking.ts
@@ -42,8 +42,8 @@ export const ANTHROPIC_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
  *
  * @see https://www.anthropic.com/news/claude-opus-4-5
  */
-export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high" | undefined> = {
-  off: undefined,
+export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high"> = {
+  off: "low",
   low: "low",
   medium: "medium",
   high: "high",
diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
@@ -23,37 +23,46 @@ void mock.module("@/browser/utils/thinking/policy", () => ({
 
 describe("buildProviderOptions - Anthropic", () => {
   describe("Opus 4.5 (effort parameter)", () => {
-    test("should use effort parameter for claude-opus-4-5", () => {
+    test("should use effort and thinking parameters for claude-opus-4-5", () => {
       const result = buildProviderOptions("anthropic:claude-opus-4-5", "medium");
 
       expect(result).toEqual({
         anthropic: {
           disableParallelToolUse: false,
           sendReasoning: true,
+          thinking: {
+            type: "enabled",
+            budgetTokens: 10000, // ANTHROPIC_THINKING_BUDGETS.medium
+          },
           effort: "medium",
         },
       });
     });
 
-    test("should use effort parameter for claude-opus-4-5-20251101", () => {
+    test("should use effort and thinking parameters for claude-opus-4-5-20251101", () => {
       const result = buildProviderOptions("anthropic:claude-opus-4-5-20251101", "high");
 
       expect(result).toEqual({
         anthropic: {
           disableParallelToolUse: false,
           sendReasoning: true,
+          thinking: {
+            type: "enabled",
+            budgetTokens: 20000, // ANTHROPIC_THINKING_BUDGETS.high
+          },
           effort: "high",
         },
       });
     });
 
-    test("should omit effort when thinking is off for Opus 4.5", () => {
+    test("should use effort 'low' with no thinking when off for Opus 4.5", () => {
       const result = buildProviderOptions("anthropic:claude-opus-4-5", "off");
 
       expect(result).toEqual({
         anthropic: {
           disableParallelToolUse: false,
           sendReasoning: true,
+          effort: "low", // "off" maps to effort: "low" for efficiency
         },
       });
     });
diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
@@ -93,20 +93,34 @@ export function buildProviderOptions(
     const isOpus45 = modelName?.includes("opus-4-5") ?? false;
 
     if (isOpus45) {
-      // Opus 4.5: Use effort parameter for reasoning control
-      const effort = ANTHROPIC_EFFORT[effectiveThinking];
+      // Opus 4.5: Use effort parameter AND optionally thinking for visible reasoning
+      // - "off" or "low" → effort: "low", no thinking (fast, no visible reasoning for off)
+      // - "low" → effort: "low", thinking enabled (visible reasoning)
+      // - "medium" → effort: "medium", thinking enabled
+      // - "high" → effort: "high", thinking enabled
+      const effortLevel = ANTHROPIC_EFFORT[effectiveThinking];
+      const budgetTokens = ANTHROPIC_THINKING_BUDGETS[effectiveThinking];
       log.debug("buildProviderOptions: Anthropic Opus 4.5 config", {
-        effort,
+        effort: effortLevel,
+        budgetTokens,
         thinkingLevel: effectiveThinking,
       });
 
       const options: ProviderOptions = {
         anthropic: {
           disableParallelToolUse: false, // Always enable concurrent tool execution
           sendReasoning: true, // Include reasoning traces in requests sent to the model
+          // Enable thinking to get visible reasoning traces (only when not "off")
+          // budgetTokens sets the ceiling; effort controls how eagerly tokens are spent
+          ...(budgetTokens > 0 && {
+            thinking: {
+              type: "enabled",
+              budgetTokens,
+            },
+          }),
           // Use effort parameter (Opus 4.5 only) to control token spend
           // SDK auto-adds beta header "effort-2025-11-24" when effort is set
-          ...(effort && { effort }),
+          effort: effortLevel,
         },
       };
       log.debug("buildProviderOptions: Returning Anthropic Opus 4.5 options", options);