🤖 fix: enable reasoning for Claude Opus 4.5 (#754)

ThomasK33 · web-flow · commit 27ef4af69445 · 2025-11-25T14:03:23.000Z
## Summary Fixes missing reasoning traces in the UI when using Claude Opus 4.5. ## Problem Opus 4.5 supports two separate but complementary parameters for reasoning: 1. **`effort`** (new in Opus 4.5): Controls how eagerly Claude spends tokens across ALL output (text, tool calls, and thinking). Values: `low`, `medium`, `high`. 2. **`thinking`** (extended thinking): Enables visible reasoning traces with a token budget. This is what makes the "Thinking..." UI appear. We were only passing `effort`, which controls token spend but **doesn't enable the reasoning traces to be returned from the API**. The `thinking` parameter must also be set for reasoning to be visible. ## Solution For Opus 4.5, now pass both parameters: ```typescript { anthropic: { thinking: { type: "enabled", budgetTokens: 20000, // Enables visible reasoning traces }, effort: "high", // Controls token spend eagerness } } ``` ## How these parameters interact From [Anthropic's docs](https://docs.anthropic.com/en/docs/build-with-claude/extended-thinking#effort-with-extended-thinking): > The effort parameter works alongside the thinking token budget when extended thinking is enabled. These two controls serve different purposes: > - **Effort parameter**: Controls how Claude spends all tokens—including thinking tokens, text responses, and tool calls > - **Thinking token budget**: Sets a maximum limit on thinking tokens specifically > > The effort parameter can be used with or without extended thinking enabled. | Configuration | Behavior | |---------------|----------| | `effort` only | Controls token spend, no visible reasoning | | `thinking` only | Shows reasoning with default budget | | `effort` + `thinking` | Shows reasoning WITH token spend control | ## Testing - Verified reasoning traces now appear in UI when using Opus 4.5 - `make typecheck` passes _Generated with `mux`_ Signed-off-by: Thomas Kosiewski <tk@coder.com>
diff --git a/src/browser/components/ThinkingSlider.stories.tsx b/src/browser/components/ThinkingSlider.stories.tsx
@@ -48,9 +48,7 @@ export const DifferentModels: Story = {
       </div>
 
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">
-          Claude Opus 4.5 (3 levels: low/medium/high)
-        </div>
+        <div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.5 (4 levels)</div>
         <ThinkingSliderComponent modelString="anthropic:claude-opus-4-5" />
       </div>
 
@@ -116,18 +114,19 @@ export const InteractiveDemo: Story = {
   },
 };
 
-export const Opus45ThreeLevels: Story = {
+export const Opus45AllLevels: Story = {
   args: { modelString: "anthropic:claude-opus-4-5" },
   render: (args) => (
     <div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
       <div className="text-bright font-primary mb-2.5 text-[13px]">
-        Claude Opus 4.5 uses the effort parameter (low/medium/high only, no &ldquo;off&rdquo;):
+        Claude Opus 4.5 uses the effort parameter with optional extended thinking:
       </div>
       <ThinkingSliderComponent modelString={args.modelString} />
       <div className="text-muted-light font-primary mt-2.5 text-[11px]">
-        • <strong>Low</strong>: Conservative token usage
-        <br />• <strong>Medium</strong>: Balanced usage (default)
-        <br />• <strong>High</strong>: Best results, more tokens
+        • <strong>Off</strong>: effort=&ldquo;low&rdquo;, no visible reasoning
+        <br />• <strong>Low</strong>: effort=&ldquo;low&rdquo;, visible reasoning
+        <br />• <strong>Medium</strong>: effort=&ldquo;medium&rdquo;, visible reasoning
+        <br />• <strong>High</strong>: effort=&ldquo;high&rdquo;, visible reasoning
       </div>
     </div>
   ),
diff --git a/src/browser/utils/thinking/policy.test.ts b/src/browser/utils/thinking/policy.test.ts
@@ -33,13 +33,17 @@ describe("getThinkingPolicyForModel", () => {
     ]);
   });
 
-  test("returns low/medium/high for Opus 4.5", () => {
+  test("returns all levels for Opus 4.5 (uses default policy)", () => {
+    // Opus 4.5 uses the default policy - no special case needed
+    // The effort parameter handles the "off" case by setting effort="low"
     expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5")).toEqual([
+      "off",
       "low",
       "medium",
       "high",
     ]);
     expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5-20251101")).toEqual([
+      "off",
       "low",
       "medium",
       "high",
@@ -95,19 +99,16 @@ describe("enforceThinkingPolicy", () => {
     });
   });
 
-  describe("Opus 4.5 (no off option)", () => {
-    test("allows low/medium/high levels", () => {
+  describe("Opus 4.5 (all levels supported)", () => {
+    test("allows all levels including off", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("off");
       expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "low")).toBe("low");
       expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "medium")).toBe("medium");
       expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "high")).toBe("high");
     });
 
-    test("falls back to high when off is requested", () => {
-      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("high");
-    });
-
-    test("falls back to high when off is requested (versioned model)", () => {
-      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("high");
+    test("allows off for versioned model", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("off");
     });
   });
 });
diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts
@@ -25,7 +25,6 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
  *
  * Rules:
  * - openai:gpt-5-pro → ["high"] (only supported level)
- * - anthropic:claude-opus-4-5 → ["low", "medium", "high"] (effort parameter only)
  * - gemini-3 → ["low", "high"] (thinking level only)
  * - default → ["off", "low", "medium", "high"] (all levels selectable)
  *
@@ -39,12 +38,6 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
     return ["high"];
   }
 
-  // Claude Opus 4.5 only supports effort parameter: low, medium, high (no "off")
-  // Match "anthropic:" followed by "claude-opus-4-5" with optional version suffix
-  if (modelString.includes("opus-4-5")) {
-    return ["low", "medium", "high"];
-  }
-
   // Gemini 3 Pro only supports "low" and "high" reasoning levels
   if (modelString.includes("gemini-3")) {
     return ["low", "high"];
@@ -59,8 +52,7 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
  *
  * Fallback strategy:
  * 1. If requested level is allowed, use it
- * 2. For Opus 4.5: prefer "high" (best experience for reasoning model)
- * 3. Otherwise: prefer "medium" if allowed, else use first allowed level
+ * 2. Otherwise: prefer "medium" if allowed, else use first allowed level
  */
 export function enforceThinkingPolicy(
   modelString: string,
@@ -72,11 +64,6 @@ export function enforceThinkingPolicy(
     return requested;
   }
 
-  // Special case: Opus 4.5 defaults to "high" for best experience
-  if (modelString.includes("opus-4-5") && allowed.includes("high")) {
-    return "high";
-  }
-
   // Fallback: prefer "medium" if allowed, else use first allowed level
   return allowed.includes("medium") ? "medium" : allowed[0];
 }
diff --git a/src/common/types/thinking.ts b/src/common/types/thinking.ts
@@ -42,8 +42,8 @@ export const ANTHROPIC_THINKING_BUDGETS: Record<ThinkingLevel, number> = {
  *
  * @see https://www.anthropic.com/news/claude-opus-4-5
  */
-export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high" | undefined> = {
-  off: undefined,
+export const ANTHROPIC_EFFORT: Record<ThinkingLevel, "low" | "medium" | "high"> = {
+  off: "low",
   low: "low",
   medium: "medium",
   high: "high",
diff --git a/src/common/utils/ai/providerOptions.test.ts b/src/common/utils/ai/providerOptions.test.ts
@@ -23,37 +23,46 @@ void mock.module("@/browser/utils/thinking/policy", () => ({
 
 describe("buildProviderOptions - Anthropic", () => {
   describe("Opus 4.5 (effort parameter)", () => {
-    test("should use effort parameter for claude-opus-4-5", () => {
+    test("should use effort and thinking parameters for claude-opus-4-5", () => {
       const result = buildProviderOptions("anthropic:claude-opus-4-5", "medium");
 
       expect(result).toEqual({
         anthropic: {
           disableParallelToolUse: false,
           sendReasoning: true,
+          thinking: {
+            type: "enabled",
+            budgetTokens: 10000, // ANTHROPIC_THINKING_BUDGETS.medium
+          },
           effort: "medium",
         },
       });
     });
 
-    test("should use effort parameter for claude-opus-4-5-20251101", () => {
+    test("should use effort and thinking parameters for claude-opus-4-5-20251101", () => {
       const result = buildProviderOptions("anthropic:claude-opus-4-5-20251101", "high");
 
       expect(result).toEqual({
         anthropic: {
           disableParallelToolUse: false,
           sendReasoning: true,
+          thinking: {
+            type: "enabled",
+            budgetTokens: 20000, // ANTHROPIC_THINKING_BUDGETS.high
+          },
           effort: "high",
         },
       });
     });
 
-    test("should omit effort when thinking is off for Opus 4.5", () => {
+    test("should use effort 'low' with no thinking when off for Opus 4.5", () => {
       const result = buildProviderOptions("anthropic:claude-opus-4-5", "off");
 
       expect(result).toEqual({
         anthropic: {
           disableParallelToolUse: false,
           sendReasoning: true,
+          effort: "low", // "off" maps to effort: "low" for efficiency
         },
       });
     });
diff --git a/src/common/utils/ai/providerOptions.ts b/src/common/utils/ai/providerOptions.ts
@@ -93,20 +93,34 @@ export function buildProviderOptions(
     const isOpus45 = modelName?.includes("opus-4-5") ?? false;
 
     if (isOpus45) {
-      // Opus 4.5: Use effort parameter for reasoning control
-      const effort = ANTHROPIC_EFFORT[effectiveThinking];
+      // Opus 4.5: Use effort parameter AND optionally thinking for visible reasoning
+      // - "off" or "low" → effort: "low", no thinking (fast, no visible reasoning for off)
+      // - "low" → effort: "low", thinking enabled (visible reasoning)
+      // - "medium" → effort: "medium", thinking enabled
+      // - "high" → effort: "high", thinking enabled
+      const effortLevel = ANTHROPIC_EFFORT[effectiveThinking];
+      const budgetTokens = ANTHROPIC_THINKING_BUDGETS[effectiveThinking];
       log.debug("buildProviderOptions: Anthropic Opus 4.5 config", {
-        effort,
+        effort: effortLevel,
+        budgetTokens,
         thinkingLevel: effectiveThinking,
       });
 
       const options: ProviderOptions = {
         anthropic: {
           disableParallelToolUse: false, // Always enable concurrent tool execution
           sendReasoning: true, // Include reasoning traces in requests sent to the model
+          // Enable thinking to get visible reasoning traces (only when not "off")
+          // budgetTokens sets the ceiling; effort controls how eagerly tokens are spent
+          ...(budgetTokens > 0 && {
+            thinking: {
+              type: "enabled",
+              budgetTokens,
+            },
+          }),
           // Use effort parameter (Opus 4.5 only) to control token spend
           // SDK auto-adds beta header "effort-2025-11-24" when effort is set
-          ...(effort && { effort }),
+          effort: effortLevel,
         },
       };
       log.debug("buildProviderOptions: Returning Anthropic Opus 4.5 options", options);