🤖 fix: update Claude Opus 4.5 thinking policy to match API (#752)

ThomasK33 · web-flow · commit f8e7690c7451 · 2025-11-25T11:41:24.000Z
Claude Opus 4.5 uses the effort parameter which only supports `low`,
`medium`, and `high` (no 'off' option). This PR updates the thinking
policy to reflect the API's capabilities.

## Changes

- **Policy Update**: Return `['low', 'medium', 'high']` for `opus-4-5`
models (no 'off')
- **Fallback Logic**: When 'off' is requested, fallback to 'high' for
best reasoning experience
- **Test Coverage**: Added comprehensive tests for Opus 4.5 policy (15
tests pass)
- **Storybook**: Added `Opus45ThreeLevels` story showcasing the
3-position slider

## Impact

| Before | After |
|--------|-------|
| 4-position slider (off/low/medium/high) | 3-position slider
(low/medium/high) |
| Could select 'off' (invalid for API) | Cannot select 'off' (matches
API) |
| Invalid UI state possible | UI always matches API capabilities |

## Testing

- ✅ All 419 browser utils tests pass
- ✅ All 15 policy tests pass
- ✅ TypeScript typecheck passes
- ✅ Opus 4.5 fallback to 'high' works correctly

This aligns Opus 4.5 with how Gemini 3 is handled (which has 2 levels:
low/high).

_Generated with `mux`_

---------

Signed-off-by: Thomas Kosiewski &lt;tk@coder.com&gt;
diff --git a/src/browser/components/ThinkingSlider.stories.tsx b/src/browser/components/ThinkingSlider.stories.tsx
@@ -41,24 +41,48 @@ export const DifferentModels: Story = {
   render: () => (
     <div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">Claude Sonnet 4.5</div>
+        <div className="text-muted-light font-primary mb-2 text-xs">
+          Claude Sonnet 4.5 (4 levels)
+        </div>
         <ThinkingSliderComponent modelString="anthropic:claude-sonnete-4-5" />
       </div>
 
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.1</div>
+        <div className="text-muted-light font-primary mb-2 text-xs">
+          Claude Opus 4.5 (3 levels: low/medium/high)
+        </div>
+        <ThinkingSliderComponent modelString="anthropic:claude-opus-4-5" />
+      </div>
+
+      <div>
+        <div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.1 (4 levels)</div>
         <ThinkingSliderComponent modelString="anthropic:claude-opus-4-1" />
       </div>
 
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">GPT-5 Codex</div>
+        <div className="text-muted-light font-primary mb-2 text-xs">
+          Gemini 3 (2 levels: low/high)
+        </div>
+        <ThinkingSliderComponent modelString="google:gemini-3-pro-preview" />
+      </div>
+
+      <div>
+        <div className="text-muted-light font-primary mb-2 text-xs">GPT-5 Codex (4 levels)</div>
         <ThinkingSliderComponent modelString="openai:gpt-5-codex" />
       </div>
     </div>
   ),
 };
 
 export const InteractiveDemo: Story = {
+  // Use unique workspaceId to isolate state from other stories
+  decorators: [
+    (Story) => (
+      <ThinkingProvider workspaceId="storybook-interactive-demo">
+        <Story />
+      </ThinkingProvider>
+    ),
+  ],
   render: () => (
     <div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
       <div className="text-bright font-primary mb-2.5 text-[13px]">
@@ -92,6 +116,23 @@ export const InteractiveDemo: Story = {
   },
 };
 
+export const Opus45ThreeLevels: Story = {
+  args: { modelString: "anthropic:claude-opus-4-5" },
+  render: (args) => (
+    <div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
+      <div className="text-bright font-primary mb-2.5 text-[13px]">
+        Claude Opus 4.5 uses the effort parameter (low/medium/high only, no &ldquo;off&rdquo;):
+      </div>
+      <ThinkingSliderComponent modelString={args.modelString} />
+      <div className="text-muted-light font-primary mt-2.5 text-[11px]">
+        • <strong>Low</strong>: Conservative token usage
+        <br />• <strong>Medium</strong>: Balanced usage (default)
+        <br />• <strong>High</strong>: Best results, more tokens
+      </div>
+    </div>
+  ),
+};
+
 export const LockedThinking: Story = {
   args: { modelString: "openai:gpt-5-pro" },
   render: (args) => (
diff --git a/src/browser/utils/thinking/policy.test.ts b/src/browser/utils/thinking/policy.test.ts
@@ -33,6 +33,23 @@ describe("getThinkingPolicyForModel", () => {
     ]);
   });
 
+  test("returns low/medium/high for Opus 4.5", () => {
+    expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5")).toEqual([
+      "low",
+      "medium",
+      "high",
+    ]);
+    expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5-20251101")).toEqual([
+      "low",
+      "medium",
+      "high",
+    ]);
+  });
+
+  test("returns low/high for Gemini 3", () => {
+    expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
+  });
+
   test("returns all levels for other providers", () => {
     expect(getThinkingPolicyForModel("anthropic:claude-opus-4")).toEqual([
       "off",
@@ -46,7 +63,6 @@ describe("getThinkingPolicyForModel", () => {
       "medium",
       "high",
     ]);
-    expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
   });
 });
 
@@ -78,6 +94,22 @@ describe("enforceThinkingPolicy", () => {
       expect(enforceThinkingPolicy("openai:gpt-5-pro", "low")).toBe("high");
     });
   });
+
+  describe("Opus 4.5 (no off option)", () => {
+    test("allows low/medium/high levels", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "low")).toBe("low");
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "medium")).toBe("medium");
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "high")).toBe("high");
+    });
+
+    test("falls back to high when off is requested", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("high");
+    });
+
+    test("falls back to high when off is requested (versioned model)", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("high");
+    });
+  });
 });
 
 // Note: Tests for invalid levels removed - TypeScript type system prevents invalid
diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts
@@ -25,6 +25,8 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
  *
  * Rules:
  * - openai:gpt-5-pro → ["high"] (only supported level)
+ * - anthropic:claude-opus-4-5 → ["low", "medium", "high"] (effort parameter only)
+ * - gemini-3 → ["low", "high"] (thinking level only)
  * - default → ["off", "low", "medium", "high"] (all levels selectable)
  *
  * Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06).
@@ -37,6 +39,12 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
     return ["high"];
   }
 
+  // Claude Opus 4.5 only supports effort parameter: low, medium, high (no "off")
+  // Match "anthropic:" followed by "claude-opus-4-5" with optional version suffix
+  if (modelString.includes("opus-4-5")) {
+    return ["low", "medium", "high"];
+  }
+
   // Gemini 3 Pro only supports "low" and "high" reasoning levels
   if (modelString.includes("gemini-3")) {
     return ["low", "high"];
@@ -51,8 +59,8 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
  *
  * Fallback strategy:
  * 1. If requested level is allowed, use it
- * 2. If "medium" is allowed, use it (reasonable default)
- * 3. Otherwise use first allowed level
+ * 2. For Opus 4.5: prefer "high" (best experience for reasoning model)
+ * 3. Otherwise: prefer "medium" if allowed, else use first allowed level
  */
 export function enforceThinkingPolicy(
   modelString: string,
@@ -64,6 +72,11 @@ export function enforceThinkingPolicy(
     return requested;
   }
 
+  // Special case: Opus 4.5 defaults to "high" for best experience
+  if (modelString.includes("opus-4-5") && allowed.includes("high")) {
+    return "high";
+  }
+
   // Fallback: prefer "medium" if allowed, else use first allowed level
   return allowed.includes("medium") ? "medium" : allowed[0];
 }