From 184f1af5d53c39675b6707cbd538111054f3ab74 Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Tue, 25 Nov 2025 11:10:54 +0100
Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=A4=96=20fix:=20update=20Claude=20Opu?=
 =?UTF-8?q?s=204.5=20thinking=20policy=20to=20match=20API?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Claude Opus 4.5 uses the effort parameter which only supports low/medium/high
(no 'off' option). Update thinking policy to reflect this:

- Return ['low', 'medium', 'high'] for opus-4-5 models
- Fallback to 'high' (instead of 'medium') when 'off' is requested
- Add comprehensive test coverage for Opus 4.5 policy
- Add Storybook story showcasing 3-position slider

This aligns the UI with the Anthropic API's capabilities, similar to how
Gemini 3 is handled with ['low', 'high'].

Fixes the issue where users could select 'off' for Opus 4.5, which would
result in no effort parameter being sent to the API.

_Generated with `mux`_

Change-Id: If402fe10a6061ce21dac4eb23a29ca58a9ca3613
Signed-off-by: Thomas Kosiewski <tk@coder.com>
---
 .../components/ThinkingSlider.stories.tsx     | 43 +++++++++++++++++--
 src/browser/utils/thinking/policy.test.ts     | 34 ++++++++++++++-
 src/browser/utils/thinking/policy.ts          | 17 +++++++-
 3 files changed, 88 insertions(+), 6 deletions(-)
diff --git a/src/browser/components/ThinkingSlider.stories.tsx b/src/browser/components/ThinkingSlider.stories.tsx
index 0b324cc7a..3a2e0947b 100644
--- a/src/browser/components/ThinkingSlider.stories.tsx
+++ b/src/browser/components/ThinkingSlider.stories.tsx
@@ -41,17 +41,27 @@ export const DifferentModels: Story = {
   render: () => (
     <div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">Claude Sonnet 4.5</div>
+        <div className="text-muted-light font-primary mb-2 text-xs">Claude Sonnet 4.5 (4 levels)</div>
         <ThinkingSliderComponent modelString="anthropic:claude-sonnete-4-5" />
       </div>
 
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.1</div>
+        <div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.5 (3 levels: low/medium/high)</div>
+        <ThinkingSliderComponent modelString="anthropic:claude-opus-4-5" />
+      </div>
+
+      <div>
+        <div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.1 (4 levels)</div>
         <ThinkingSliderComponent modelString="anthropic:claude-opus-4-1" />
       </div>
 
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">GPT-5 Codex</div>
+        <div className="text-muted-light font-primary mb-2 text-xs">Gemini 3 (2 levels: low/high)</div>
+        <ThinkingSliderComponent modelString="google:gemini-3-pro-preview" />
+      </div>
+
+      <div>
+        <div className="text-muted-light font-primary mb-2 text-xs">GPT-5 Codex (4 levels)</div>
         <ThinkingSliderComponent modelString="openai:gpt-5-codex" />
       </div>
     </div>
@@ -92,6 +102,33 @@ export const InteractiveDemo: Story = {
   },
 };
 
+export const Opus45ThreeLevels: Story = {
+  args: { modelString: "anthropic:claude-opus-4-5" },
+  render: (args) => (
+    <div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
+      <div className="text-bright font-primary mb-2.5 text-[13px]">
+        Claude Opus 4.5 uses the effort parameter (low/medium/high only, no &ldquo;off&rdquo;):
+      </div>
+      <ThinkingSliderComponent modelString={args.modelString} />
+      <div className="text-muted-light font-primary mt-2.5 text-[11px]">
+        • <strong>Low</strong>: Conservative token usage
+        <br />• <strong>Medium</strong>: Balanced usage (default)
+        <br />• <strong>High</strong>: Best results, more tokens
+      </div>
+    </div>
+  ),
+  play: async ({ canvasElement }) => {
+    const canvas = within(canvasElement);
+
+    // Find the slider
+    const slider = canvas.getByRole("slider");
+
+    // Verify slider is present with 3 levels (0-2)
+    await expect(slider).toBeInTheDocument();
+    await expect(slider).toHaveAttribute("max", "2");
+  },
+};
+
 export const LockedThinking: Story = {
   args: { modelString: "openai:gpt-5-pro" },
   render: (args) => (
diff --git a/src/browser/utils/thinking/policy.test.ts b/src/browser/utils/thinking/policy.test.ts
index ac99b3b7d..974fca0c6 100644
--- a/src/browser/utils/thinking/policy.test.ts
+++ b/src/browser/utils/thinking/policy.test.ts
@@ -33,6 +33,23 @@ describe("getThinkingPolicyForModel", () => {
     ]);
   });
 
+  test("returns low/medium/high for Opus 4.5", () => {
+    expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5")).toEqual([
+      "low",
+      "medium",
+      "high",
+    ]);
+    expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5-20251101")).toEqual([
+      "low",
+      "medium",
+      "high",
+    ]);
+  });
+
+  test("returns low/high for Gemini 3", () => {
+    expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
+  });
+
   test("returns all levels for other providers", () => {
     expect(getThinkingPolicyForModel("anthropic:claude-opus-4")).toEqual([
       "off",
@@ -46,7 +63,6 @@ describe("getThinkingPolicyForModel", () => {
       "medium",
       "high",
     ]);
-    expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
   });
 });
 
@@ -78,6 +94,22 @@ describe("enforceThinkingPolicy", () => {
       expect(enforceThinkingPolicy("openai:gpt-5-pro", "low")).toBe("high");
     });
   });
+
+  describe("Opus 4.5 (no off option)", () => {
+    test("allows low/medium/high levels", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "low")).toBe("low");
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "medium")).toBe("medium");
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "high")).toBe("high");
+    });
+
+    test("falls back to high when off is requested", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("high");
+    });
+
+    test("falls back to high when off is requested (versioned model)", () => {
+      expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("high");
+    });
+  });
 });
 
 // Note: Tests for invalid levels removed - TypeScript type system prevents invalid
diff --git a/src/browser/utils/thinking/policy.ts b/src/browser/utils/thinking/policy.ts
index e8157b372..41c2fad4f 100644
--- a/src/browser/utils/thinking/policy.ts
+++ b/src/browser/utils/thinking/policy.ts
@@ -25,6 +25,8 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
  *
  * Rules:
  * - openai:gpt-5-pro → ["high"] (only supported level)
+ * - anthropic:claude-opus-4-5 → ["low", "medium", "high"] (effort parameter only)
+ * - gemini-3 → ["low", "high"] (thinking level only)
  * - default → ["off", "low", "medium", "high"] (all levels selectable)
  *
  * Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06).
@@ -37,6 +39,12 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
     return ["high"];
   }
 
+  // Claude Opus 4.5 only supports effort parameter: low, medium, high (no "off")
+  // Match "anthropic:" followed by "claude-opus-4-5" with optional version suffix
+  if (modelString.includes("opus-4-5")) {
+    return ["low", "medium", "high"];
+  }
+
   // Gemini 3 Pro only supports "low" and "high" reasoning levels
   if (modelString.includes("gemini-3")) {
     return ["low", "high"];
@@ -51,8 +59,8 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
  *
  * Fallback strategy:
  * 1. If requested level is allowed, use it
- * 2. If "medium" is allowed, use it (reasonable default)
- * 3. Otherwise use first allowed level
+ * 2. For Opus 4.5: prefer "high" (best experience for reasoning model)
+ * 3. Otherwise: prefer "medium" if allowed, else use first allowed level
  */
 export function enforceThinkingPolicy(
   modelString: string,
@@ -64,6 +72,11 @@ export function enforceThinkingPolicy(
     return requested;
   }
 
+  // Special case: Opus 4.5 defaults to "high" for best experience
+  if (modelString.includes("opus-4-5") && allowed.includes("high")) {
+    return "high";
+  }
+
   // Fallback: prefer "medium" if allowed, else use first allowed level
   return allowed.includes("medium") ? "medium" : allowed[0];
 }

From c3e7b472f41df8428065202a32cbda774f06d5aa Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Tue, 25 Nov 2025 11:15:40 +0100
Subject: [PATCH 2/4] fix: apply prettier formatting

Change-Id: Idfbb6de0240ee4a3f82835bb3cc68469b1eeb7c0
Signed-off-by: Thomas Kosiewski <tk@coder.com>
---
 src/browser/components/ThinkingSlider.stories.tsx | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/src/browser/components/ThinkingSlider.stories.tsx b/src/browser/components/ThinkingSlider.stories.tsx
index 3a2e0947b..5afbe852a 100644
--- a/src/browser/components/ThinkingSlider.stories.tsx
+++ b/src/browser/components/ThinkingSlider.stories.tsx
@@ -41,12 +41,16 @@ export const DifferentModels: Story = {
   render: () => (
     <div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">Claude Sonnet 4.5 (4 levels)</div>
+        <div className="text-muted-light font-primary mb-2 text-xs">
+          Claude Sonnet 4.5 (4 levels)
+        </div>
         <ThinkingSliderComponent modelString="anthropic:claude-sonnete-4-5" />
       </div>
 
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.5 (3 levels: low/medium/high)</div>
+        <div className="text-muted-light font-primary mb-2 text-xs">
+          Claude Opus 4.5 (3 levels: low/medium/high)
+        </div>
         <ThinkingSliderComponent modelString="anthropic:claude-opus-4-5" />
       </div>
 
@@ -56,7 +60,9 @@ export const DifferentModels: Story = {
       </div>
 
       <div>
-        <div className="text-muted-light font-primary mb-2 text-xs">Gemini 3 (2 levels: low/high)</div>
+        <div className="text-muted-light font-primary mb-2 text-xs">
+          Gemini 3 (2 levels: low/high)
+        </div>
         <ThinkingSliderComponent modelString="google:gemini-3-pro-preview" />
       </div>
 

From 7812591507ebb36b238b9063114aa0648356fe9d Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Tue, 25 Nov 2025 11:20:28 +0100
Subject: [PATCH 3/4] fix: remove play function from Opus45ThreeLevels story

The play function was causing test failures. The story is primarily
for visual documentation of the 3-position slider, so the play function
isn't necessary.

Change-Id: I1b0123cecf1b1bba65abd759fd68bd63333ee2d3
Signed-off-by: Thomas Kosiewski <tk@coder.com>
---
 src/browser/components/ThinkingSlider.stories.tsx | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/src/browser/components/ThinkingSlider.stories.tsx b/src/browser/components/ThinkingSlider.stories.tsx
index 5afbe852a..78cefa597 100644
--- a/src/browser/components/ThinkingSlider.stories.tsx
+++ b/src/browser/components/ThinkingSlider.stories.tsx
@@ -123,16 +123,6 @@ export const Opus45ThreeLevels: Story = {
       </div>
     </div>
   ),
-  play: async ({ canvasElement }) => {
-    const canvas = within(canvasElement);
-
-    // Find the slider
-    const slider = canvas.getByRole("slider");
-
-    // Verify slider is present with 3 levels (0-2)
-    await expect(slider).toBeInTheDocument();
-    await expect(slider).toHaveAttribute("max", "2");
-  },
 };
 
 export const LockedThinking: Story = {

From 3c991b93f31ec2b3c63d125cddd85ca99bcfdccf Mon Sep 17 00:00:00 2001
From: Thomas Kosiewski <tk@coder.com>
Date: Tue, 25 Nov 2025 11:37:24 +0100
Subject: [PATCH 4/4] fix: isolate InteractiveDemo story state from other
 stories

The InteractiveDemo play test was failing because it shared persisted
state with other stories (via the shared 'storybook-demo' workspaceId).
When DifferentModels runs first and sets thinking to 'medium' for Opus 4.5,
the InteractiveDemo story inherits that state.

Fix by giving InteractiveDemo its own unique workspaceId to ensure
test isolation.

Change-Id: I31bc0c6a2fc21a9a0084b966ce047422ce91339d
Signed-off-by: Thomas Kosiewski <tk@coder.com>
---
 src/browser/components/ThinkingSlider.stories.tsx | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/browser/components/ThinkingSlider.stories.tsx b/src/browser/components/ThinkingSlider.stories.tsx
index 78cefa597..b25b7c786 100644
--- a/src/browser/components/ThinkingSlider.stories.tsx
+++ b/src/browser/components/ThinkingSlider.stories.tsx
@@ -75,6 +75,14 @@ export const DifferentModels: Story = {
 };
 
 export const InteractiveDemo: Story = {
+  // Use unique workspaceId to isolate state from other stories
+  decorators: [
+    (Story) => (
+      <ThinkingProvider workspaceId="storybook-interactive-demo">
+        <Story />
+      </ThinkingProvider>
+    ),
+  ],
   render: () => (
     <div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
       <div className="text-bright font-primary mb-2.5 text-[13px]">