Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 44 additions & 3 deletions src/browser/components/ThinkingSlider.stories.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -41,24 +41,48 @@ export const DifferentModels: Story = {
render: () => (
<div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
<div>
<div className="text-muted-light font-primary mb-2 text-xs">Claude Sonnet 4.5</div>
<div className="text-muted-light font-primary mb-2 text-xs">
Claude Sonnet 4.5 (4 levels)
</div>
<ThinkingSliderComponent modelString="anthropic:claude-sonnete-4-5" />
</div>

<div>
<div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.1</div>
<div className="text-muted-light font-primary mb-2 text-xs">
Claude Opus 4.5 (3 levels: low/medium/high)
</div>
<ThinkingSliderComponent modelString="anthropic:claude-opus-4-5" />
</div>

<div>
<div className="text-muted-light font-primary mb-2 text-xs">Claude Opus 4.1 (4 levels)</div>
<ThinkingSliderComponent modelString="anthropic:claude-opus-4-1" />
</div>

<div>
<div className="text-muted-light font-primary mb-2 text-xs">GPT-5 Codex</div>
<div className="text-muted-light font-primary mb-2 text-xs">
Gemini 3 (2 levels: low/high)
</div>
<ThinkingSliderComponent modelString="google:gemini-3-pro-preview" />
</div>

<div>
<div className="text-muted-light font-primary mb-2 text-xs">GPT-5 Codex (4 levels)</div>
<ThinkingSliderComponent modelString="openai:gpt-5-codex" />
</div>
</div>
),
};

export const InteractiveDemo: Story = {
// Use unique workspaceId to isolate state from other stories
decorators: [
(Story) => (
<ThinkingProvider workspaceId="storybook-interactive-demo">
<Story />
</ThinkingProvider>
),
],
render: () => (
<div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
<div className="text-bright font-primary mb-2.5 text-[13px]">
Expand Down Expand Up @@ -92,6 +116,23 @@ export const InteractiveDemo: Story = {
},
};

export const Opus45ThreeLevels: Story = {
args: { modelString: "anthropic:claude-opus-4-5" },
render: (args) => (
<div className="bg-dark flex min-w-80 flex-col gap-[30px] p-10">
<div className="text-bright font-primary mb-2.5 text-[13px]">
Claude Opus 4.5 uses the effort parameter (low/medium/high only, no &ldquo;off&rdquo;):
</div>
<ThinkingSliderComponent modelString={args.modelString} />
<div className="text-muted-light font-primary mt-2.5 text-[11px]">
β€’ <strong>Low</strong>: Conservative token usage
<br />β€’ <strong>Medium</strong>: Balanced usage (default)
<br />β€’ <strong>High</strong>: Best results, more tokens
</div>
</div>
),
};

export const LockedThinking: Story = {
args: { modelString: "openai:gpt-5-pro" },
render: (args) => (
Expand Down
34 changes: 33 additions & 1 deletion src/browser/utils/thinking/policy.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,23 @@ describe("getThinkingPolicyForModel", () => {
]);
});

test("returns low/medium/high for Opus 4.5", () => {
expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5")).toEqual([
"low",
"medium",
"high",
]);
expect(getThinkingPolicyForModel("anthropic:claude-opus-4-5-20251101")).toEqual([
"low",
"medium",
"high",
]);
});

test("returns low/high for Gemini 3", () => {
expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
});

test("returns all levels for other providers", () => {
expect(getThinkingPolicyForModel("anthropic:claude-opus-4")).toEqual([
"off",
Expand All @@ -46,7 +63,6 @@ describe("getThinkingPolicyForModel", () => {
"medium",
"high",
]);
expect(getThinkingPolicyForModel("google:gemini-3-pro-preview")).toEqual(["low", "high"]);
});
});

Expand Down Expand Up @@ -78,6 +94,22 @@ describe("enforceThinkingPolicy", () => {
expect(enforceThinkingPolicy("openai:gpt-5-pro", "low")).toBe("high");
});
});

describe("Opus 4.5 (no off option)", () => {
test("allows low/medium/high levels", () => {
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "low")).toBe("low");
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "medium")).toBe("medium");
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "high")).toBe("high");
});

test("falls back to high when off is requested", () => {
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5", "off")).toBe("high");
});

test("falls back to high when off is requested (versioned model)", () => {
expect(enforceThinkingPolicy("anthropic:claude-opus-4-5-20251101", "off")).toBe("high");
});
});
});

// Note: Tests for invalid levels removed - TypeScript type system prevents invalid
Expand Down
17 changes: 15 additions & 2 deletions src/browser/utils/thinking/policy.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ export type ThinkingPolicy = readonly ThinkingLevel[];
*
* Rules:
* - openai:gpt-5-pro β†’ ["high"] (only supported level)
* - anthropic:claude-opus-4-5 β†’ ["low", "medium", "high"] (effort parameter only)
* - gemini-3 β†’ ["low", "high"] (thinking level only)
* - default β†’ ["off", "low", "medium", "high"] (all levels selectable)
*
* Tolerates version suffixes (e.g., gpt-5-pro-2025-10-06).
Expand All @@ -37,6 +39,12 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
return ["high"];
}

// Claude Opus 4.5 only supports effort parameter: low, medium, high (no "off")
// Match "anthropic:" followed by "claude-opus-4-5" with optional version suffix
if (modelString.includes("opus-4-5")) {
return ["low", "medium", "high"];
}

// Gemini 3 Pro only supports "low" and "high" reasoning levels
if (modelString.includes("gemini-3")) {
return ["low", "high"];
Expand All @@ -51,8 +59,8 @@ export function getThinkingPolicyForModel(modelString: string): ThinkingPolicy {
*
* Fallback strategy:
* 1. If requested level is allowed, use it
* 2. If "medium" is allowed, use it (reasonable default)
* 3. Otherwise use first allowed level
* 2. For Opus 4.5: prefer "high" (best experience for reasoning model)
* 3. Otherwise: prefer "medium" if allowed, else use first allowed level
*/
export function enforceThinkingPolicy(
modelString: string,
Expand All @@ -64,6 +72,11 @@ export function enforceThinkingPolicy(
return requested;
}

// Special case: Opus 4.5 defaults to "high" for best experience
if (modelString.includes("opus-4-5") && allowed.includes("high")) {
return "high";
}

// Fallback: prefer "medium" if allowed, else use first allowed level
return allowed.includes("medium") ? "medium" : allowed[0];
}