From 4dda38832faa156bb46453d6a088b95a167dface Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Tue, 25 Nov 2025 13:46:17 +0100
Subject: [PATCH] =?UTF-8?q?=F0=9F=A4=96=20fix:=20enable=20visible=20reason?=
 =?UTF-8?q?ing=20for=20Opus=204.5=20with=20configurable=20effort?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Opus 4.5 now supports all thinking levels with proper effort mapping:
- off: effort='low', no thinking (fast, cheap, no visible reasoning)
- low: effort='low', thinking enabled (visible reasoning, budget-conscious)
- medium: effort='medium', thinking enabled
- high: effort='high', thinking enabled

Previously Opus 4.5 only passed the effort parameter without the thinking
parameter, which meant no visible reasoning traces were returned.

Also removed the policy restriction that prevented 'off' for Opus 4.5 -
users can now disable visible reasoning while still getting efficient
responses via effort='low'.

_Generated with mux_

Change-Id: I05239192b38babf22c7ca980d9380da718608be2
Signed-off-by: Thomas Kosiewski <tk@coder.com>
---
 .../components/ThinkingSlider.stories.tsx     | 15 ++++++-------
 src/browser/utils/thinking/policy.test.ts     | 19 ++++++++--------
 src/browser/utils/thinking/policy.ts          | 15 +------------
 src/common/types/thinking.ts                  |  4 ++--
 src/common/utils/ai/providerOptions.test.ts   | 15 ++++++++++---
 src/common/utils/ai/providerOptions.ts        | 22 +++++++++++++++----
 6 files changed, 50 insertions(+), 40 deletions(-)
diff --git a/src/browser/components/ThinkingSlider.stories.tsx b/src/browser/components/ThinkingSlider.stories.tsx
index b25b7c786..a36bec358 100644
--- a/src/browser/components/ThinkingSlider.stories.tsx
+++ b/src/browser/components/ThinkingSlider.stories.tsx
@@ -48,9 +48,7 @@ export const DifferentModels: Story = {
       </div>
 
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">
-          Claude Opus 4.5 (3 levels: low/medium/high)
-        </div>
+        <div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.5 (4 levels)</div>
         <ThinkingSliderComponent modelString="anthropic:claude-opus-4-5" />
       </div>
 
@@ -116,18 +114,19 @@ export const InteractiveDemo: Story = {
   },
 };
 
-export const Opus45ThreeLevels: Story = {
+export const Opus45AllLevels: Story = {
   args: { modelString: "anthropic:claude-opus-4-5" },
   render: (args) => (
     <div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
       <div className="text-bright font-primary mb-2.5 text-[13px]">
-        Claude Opus 4.5 uses the effort parameter (low/medium/high only, no &ldquo;off&rdquo;):
+        Claude Opus 4.5 uses the effort parameter with optional extended thinking:
       </div>
       <ThinkingSliderComponent modelString={args.modelString} />
       <div className="text-muted-light font-primary mt-2.5 text-[11px]">
-        • <strong>Low</strong>: Conservative token usage
-        <br />• <strong>Medium</strong>: Balanced usage (default)
-        <br />• <strong>High</strong>: Best results, more tokens
+        • <strong>Off</strong>: effort=&ldquo;low&rdquo;, no visible reasoning
+        <br />• <strong>Low</strong>: effort=&ldquo;low&rdquo;, visible reasoning
+        <br />• <strong>Medium</strong>: effort=&ldquo;medium&rdquo;, visible reasoning
+        <br />• <strong>High</strong>: effort=&ldquo;high&rdquo;, visible reasoning
       </div>
     </div>
   ),
diff --git a/src/browser/utils/thinking/policy.test.ts b/src/browser/utils/thinking/policy.test.ts
index 974fca0c6..86bc4a903 100644
--- a/src/browser/utils/thinking/policy.test.ts
+++ b/src/browser/utils/thinking/policy.test.ts
@@ -33,13 +33,17 @@ describe("getThinkingPolicyForModel", () => {
     ]);
   });
 
-  test("returns low/medium/high for Opus 4.5", () => {
+  test("returns all levels for Opus 4.5 (uses default policy)", () => {
+    // Opus 4.5 uses the default policy - no special case needed
+    // The effort parameter handles the "off" case by setting effort="low"
     expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5")).toEqual([
+      "off",
       "low",
       "medium",
       "high",
     ]);
     expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5-20251101")).toEqual([
+      "off",
       "low",
       "medium",
       "high",
@@ -95,19 +99,16 @@ describe("enforceThinkingPolicy", () => {
     });
   });
 
-  describe("Opus 4.5 (no off option)", () => {
-    test("allows low/medium/high levels", () => {
+  describe("Opus 4.5 (all levels supported)", () => {
+    test("allows all levels including off", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("off");
       expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "low")).toBe("low");
       expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "medium")).toBe("medium");
       expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "high")).toBe("high");
     });
 
-    test("falls back to high when off is requested", () => {
-      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("high");
-    });
-
-    test("falls back to high when off is requested (versioned model)", () => {
-      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("high");
+    test("allows off for versioned model", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("off");
     });
   });
 });
diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts
index 41c2fad4f..4346d9272 100644
--- a/src/browser/utils/thinking/policy.ts
+++ b/src/browser/utils/thinking/policy.ts
@@ -25,7 +25,6 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
  *
  * Rules:
  * - openai:gpt-5-pro → ["high"] (only supported level)
- * - anthropic:claude-opus-4-5 → ["low", "medium", "high"] (effort parameter only)
  * - gemini-3 → ["low", "high"] (thinking level only)
  * - default → ["off", "low", "medium", "high"] (all levels selectable)
  *
@@ -39,12 +38,6 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
     return ["high"];
   }
 
-  // Claude Opus 4.5 only supports effort parameter: low, medium, high (no "off")
-  // Match "anthropic:" followed by "claude-opus-4-5" with optional version suffix
-  if (modelString.includes("opus-4-5")) {
-    return ["low", "medium", "high"];
-  }
-
   // Gemini 3 Pro only supports "low" and "high" reasoning levels
   if (modelString.includes("gemini-3")) {
     return ["low", "high"];
@@ -59,8 +52,7 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
  *
  * Fallback strategy:
  * 1. If requested level is allowed, use it
- * 2. For Opus 4.5: prefer "high" (best experience for reasoning model)
- * 3. Otherwise: prefer "medium" if allowed, else use first allowed level
+ * 2. Otherwise: prefer "medium" if allowed, else use first allowed level
  */
 export function enforceThinkingPolicy(
   modelString: string,
@@ -72,11 +64,6 @@ export function enforceThinkingPolicy(
     return requested;
   }
 
-  // Special case: Opus 4.5 defaults to "high" for best experience
-  if (modelString.includes("opus-4-5") && allowed.includes("high")) {
-    return "high";
-  }
-
   // Fallback: prefer "medium" if allowed, else use first allowed level
   return allowed.includes("medium") ? "medium" : allowed[0];
 }
diff --git a/src/common/types/thinking.ts b/src/common/types/thinking.ts
index c2b025c87..f6283d067 100644
--- a/src/common/types/thinking.ts
+++ b/src/common/types/thinking.ts
@@ -42,8 +42,8 @@ export const ANTHROPIC_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
  *
  * @see https://www.anthropic.com/news/claude-opus-4-5
  */
-export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high" | undefined> = {
-  off: undefined,
+export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high"> = {
+  off: "low",
   low: "low",
   medium: "medium",
   high: "high",
diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
index 6283269f3..a62356bec 100644
--- a/src/common/utils/ai/providerOptions.test.ts
+++ b/src/common/utils/ai/providerOptions.test.ts
@@ -23,37 +23,46 @@ void mock.module("@/browser/utils/thinking/policy", () => ({
 
 describe("buildProviderOptions - Anthropic", () => {
   describe("Opus 4.5 (effort parameter)", () => {
-    test("should use effort parameter for claude-opus-4-5", () => {
+    test("should use effort and thinking parameters for claude-opus-4-5", () => {
       const result = buildProviderOptions("anthropic:claude-opus-4-5", "medium");
 
       expect(result).toEqual({
         anthropic: {
           disableParallelToolUse: false,
           sendReasoning: true,
+          thinking: {
+            type: "enabled",
+            budgetTokens: 10000, // ANTHROPIC_THINKING_BUDGETS.medium
+          },
           effort: "medium",
         },
       });
     });
 
-    test("should use effort parameter for claude-opus-4-5-20251101", () => {
+    test("should use effort and thinking parameters for claude-opus-4-5-20251101", () => {
       const result = buildProviderOptions("anthropic:claude-opus-4-5-20251101", "high");
 
       expect(result).toEqual({
         anthropic: {
           disableParallelToolUse: false,
           sendReasoning: true,
+          thinking: {
+            type: "enabled",
+            budgetTokens: 20000, // ANTHROPIC_THINKING_BUDGETS.high
+          },
           effort: "high",
         },
       });
     });
 
-    test("should omit effort when thinking is off for Opus 4.5", () => {
+    test("should use effort 'low' with no thinking when off for Opus 4.5", () => {
       const result = buildProviderOptions("anthropic:claude-opus-4-5", "off");
 
       expect(result).toEqual({
         anthropic: {
           disableParallelToolUse: false,
           sendReasoning: true,
+          effort: "low", // "off" maps to effort: "low" for efficiency
         },
       });
     });
diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
index 5eb54540c..769acbd92 100644
--- a/src/common/utils/ai/providerOptions.ts
+++ b/src/common/utils/ai/providerOptions.ts
@@ -93,10 +93,16 @@ export function buildProviderOptions(
     const isOpus45 = modelName?.includes("opus-4-5") ?? false;
 
     if (isOpus45) {
-      // Opus 4.5: Use effort parameter for reasoning control
-      const effort = ANTHROPIC_EFFORT[effectiveThinking];
+      // Opus 4.5: Use effort parameter AND optionally thinking for visible reasoning
+      // - "off" or "low" → effort: "low", no thinking (fast, no visible reasoning for off)
+      // - "low" → effort: "low", thinking enabled (visible reasoning)
+      // - "medium" → effort: "medium", thinking enabled
+      // - "high" → effort: "high", thinking enabled
+      const effortLevel = ANTHROPIC_EFFORT[effectiveThinking];
+      const budgetTokens = ANTHROPIC_THINKING_BUDGETS[effectiveThinking];
       log.debug("buildProviderOptions: Anthropic Opus 4.5 config", {
-        effort,
+        effort: effortLevel,
+        budgetTokens,
         thinkingLevel: effectiveThinking,
       });
 
@@ -104,9 +110,17 @@ export function buildProviderOptions(
         anthropic: {
           disableParallelToolUse: false, // Always enable concurrent tool execution
           sendReasoning: true, // Include reasoning traces in requests sent to the model
+          // Enable thinking to get visible reasoning traces (only when not "off")
+          // budgetTokens sets the ceiling; effort controls how eagerly tokens are spent
+          ...(budgetTokens > 0 && {
+            thinking: {
+              type: "enabled",
+              budgetTokens,
+            },
+          }),
           // Use effort parameter (Opus 4.5 only) to control token spend
           // SDK auto-adds beta header "effort-2025-11-24" when effort is set
-          ...(effort && { effort }),
+          effort: effortLevel,
         },
       };
       log.debug("buildProviderOptions: Returning Anthropic Opus 4.5 options", options);