Skip to content

Commit f8e7690

Browse files
authored
🤖 fix: update Claude Opus 4.5 thinking policy to match API (#752)
Claude Opus 4.5 uses the effort parameter which only supports `low`, `medium`, and `high` (no 'off' option). This PR updates the thinking policy to reflect the API's capabilities. ## Changes - **Policy Update**: Return `['low', 'medium', 'high']` for `opus-4-5` models (no 'off') - **Fallback Logic**: When 'off' is requested, fallback to 'high' for best reasoning experience - **Test Coverage**: Added comprehensive tests for Opus 4.5 policy (15 tests pass) - **Storybook**: Added `Opus45ThreeLevels` story showcasing the 3-position slider ## Impact | Before | After | |--------|-------| | 4-position slider (off/low/medium/high) | 3-position slider (low/medium/high) | | Could select 'off' (invalid for API) | Cannot select 'off' (matches API) | | Invalid UI state possible | UI always matches API capabilities | ## Testing - ✅ All 419 browser utils tests pass - ✅ All 15 policy tests pass - ✅ TypeScript typecheck passes - ✅ Opus 4.5 fallback to 'high' works correctly This aligns Opus 4.5 with how Gemini 3 is handled (which has 2 levels: low/high). _Generated with `mux`_ --------- Signed-off-by: Thomas Kosiewski <tk@coder.com>
1 parent f99f4a5 commit f8e7690

File tree

3 files changed

+92
-6
lines changed

3 files changed

+92
-6
lines changed

src/browser/components/ThinkingSlider.stories.tsx

Lines changed: 44 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,24 +41,48 @@ export const DifferentModels: Story = {
4141
render: () => (
4242
<div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
4343
<div>
44-
<div className="text-muted-light font-primary mb-2 text-xs">Claude Sonnet 4.5</div>
44+
<div className="text-muted-light font-primary mb-2 text-xs">
45+
Claude Sonnet 4.5 (4 levels)
46+
</div>
4547
<ThinkingSliderComponent modelString="anthropic:claude-sonnete-4-5" />
4648
</div>
4749

4850
<div>
49-
<div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.1</div>
51+
<div className="text-muted-light font-primary mb-2 text-xs">
52+
Claude Opus 4.5 (3 levels: low/medium/high)
53+
</div>
54+
<ThinkingSliderComponent modelString="anthropic:claude-opus-4-5" />
55+
</div>
56+
57+
<div>
58+
<div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.1 (4 levels)</div>
5059
<ThinkingSliderComponent modelString="anthropic:claude-opus-4-1" />
5160
</div>
5261

5362
<div>
54-
<div className="text-muted-light font-primary mb-2 text-xs">GPT-5 Codex</div>
63+
<div className="text-muted-light font-primary mb-2 text-xs">
64+
Gemini 3 (2 levels: low/high)
65+
</div>
66+
<ThinkingSliderComponent modelString="google:gemini-3-pro-preview" />
67+
</div>
68+
69+
<div>
70+
<div className="text-muted-light font-primary mb-2 text-xs">GPT-5 Codex (4 levels)</div>
5571
<ThinkingSliderComponent modelString="openai:gpt-5-codex" />
5672
</div>
5773
</div>
5874
),
5975
};
6076

6177
export const InteractiveDemo: Story = {
78+
// Use unique workspaceId to isolate state from other stories
79+
decorators: [
80+
(Story) => (
81+
<ThinkingProvider workspaceId="storybook-interactive-demo">
82+
<Story />
83+
</ThinkingProvider>
84+
),
85+
],
6286
render: () => (
6387
<div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
6488
<div className="text-bright font-primary mb-2.5 text-[13px]">
@@ -92,6 +116,23 @@ export const InteractiveDemo: Story = {
92116
},
93117
};
94118

119+
export const Opus45ThreeLevels: Story = {
120+
args: { modelString: "anthropic:claude-opus-4-5" },
121+
render: (args) => (
122+
<div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
123+
<div className="text-bright font-primary mb-2.5 text-[13px]">
124+
Claude Opus 4.5 uses the effort parameter (low/medium/high only, no &ldquo;off&rdquo;):
125+
</div>
126+
<ThinkingSliderComponent modelString={args.modelString} />
127+
<div className="text-muted-light font-primary mt-2.5 text-[11px]">
128+
<strong>Low</strong>: Conservative token usage
129+
<br /><strong>Medium</strong>: Balanced usage (default)
130+
<br /><strong>High</strong>: Best results, more tokens
131+
</div>
132+
</div>
133+
),
134+
};
135+
95136
export const LockedThinking: Story = {
96137
args: { modelString: "openai:gpt-5-pro" },
97138
render: (args) => (

src/browser/utils/thinking/policy.test.ts

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,23 @@ describe("getThinkingPolicyForModel", () => {
3333
]);
3434
});
3535

36+
test("returns low/medium/high for Opus 4.5", () => {
37+
expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5")).toEqual([
38+
"low",
39+
"medium",
40+
"high",
41+
]);
42+
expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5-20251101")).toEqual([
43+
"low",
44+
"medium",
45+
"high",
46+
]);
47+
});
48+
49+
test("returns low/high for Gemini 3", () => {
50+
expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
51+
});
52+
3653
test("returns all levels for other providers", () => {
3754
expect(getThinkingPolicyForModel("anthropic:claude-opus-4")).toEqual([
3855
"off",
@@ -46,7 +63,6 @@ describe("getThinkingPolicyForModel", () => {
4663
"medium",
4764
"high",
4865
]);
49-
expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
5066
});
5167
});
5268

@@ -78,6 +94,22 @@ describe("enforceThinkingPolicy", () => {
7894
expect(enforceThinkingPolicy("openai:gpt-5-pro", "low")).toBe("high");
7995
});
8096
});
97+
98+
describe("Opus 4.5 (no off option)", () => {
99+
test("allows low/medium/high levels", () => {
100+
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "low")).toBe("low");
101+
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "medium")).toBe("medium");
102+
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "high")).toBe("high");
103+
});
104+
105+
test("falls back to high when off is requested", () => {
106+
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("high");
107+
});
108+
109+
test("falls back to high when off is requested (versioned model)", () => {
110+
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("high");
111+
});
112+
});
81113
});
82114

83115
// Note: Tests for invalid levels removed - TypeScript type system prevents invalid

src/browser/utils/thinking/policy.ts

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
2525
*
2626
* Rules:
2727
* - openai:gpt-5-pro → ["high"] (only supported level)
28+
* - anthropic:claude-opus-4-5 → ["low", "medium", "high"] (effort parameter only)
29+
* - gemini-3 → ["low", "high"] (thinking level only)
2830
* - default → ["off", "low", "medium", "high"] (all levels selectable)
2931
*
3032
* Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06).
@@ -37,6 +39,12 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
3739
return ["high"];
3840
}
3941

42+
// Claude Opus 4.5 only supports effort parameter: low, medium, high (no "off")
43+
// Match "anthropic:" followed by "claude-opus-4-5" with optional version suffix
44+
if (modelString.includes("opus-4-5")) {
45+
return ["low", "medium", "high"];
46+
}
47+
4048
// Gemini 3 Pro only supports "low" and "high" reasoning levels
4149
if (modelString.includes("gemini-3")) {
4250
return ["low", "high"];
@@ -51,8 +59,8 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
5159
*
5260
* Fallback strategy:
5361
* 1. If requested level is allowed, use it
54-
* 2. If "medium" is allowed, use it (reasonable default)
55-
* 3. Otherwise use first allowed level
62+
* 2. For Opus 4.5: prefer "high" (best experience for reasoning model)
63+
* 3. Otherwise: prefer "medium" if allowed, else use first allowed level
5664
*/
5765
export function enforceThinkingPolicy(
5866
modelString: string,
@@ -64,6 +72,11 @@ export function enforceThinkingPolicy(
6472
return requested;
6573
}
6674

75+
// Special case: Opus 4.5 defaults to "high" for best experience
76+
if (modelString.includes("opus-4-5") && allowed.includes("high")) {
77+
return "high";
78+
}
79+
6780
// Fallback: prefer "medium" if allowed, else use first allowed level
6881
return allowed.includes("medium") ? "medium" : allowed[0];
6982
}

0 commit comments

Comments
 (0)