Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions packages/types/src/providers/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,25 @@ export const anthropicModels = {
},
],
},
"claude-sonnet-5": {
maxTokens: 128_000, // Overridden to 8k if `enableReasoningEffort` is false.
contextWindow: 1_000_000, // 1M context window native (no beta header required)
supportsImages: true,
supportsPromptCache: true,
inputPrice: 2.0, // $2 per million input tokens (introductory pricing through Aug 31, 2026)
outputPrice: 10.0, // $10 per million output tokens (introductory pricing through Aug 31, 2026)
cacheWritesPrice: 2.5, // $2.50 per million tokens (introductory pricing through Aug 31, 2026)
cacheReadsPrice: 0.2, // $0.20 per million tokens (introductory pricing through Aug 31, 2026)
// Sonnet 5 uses the same adaptive-thinking / binary-toggle convention as
// Opus 4.7+ and Fable 5 on the direct Anthropic provider path. Manual
// extended thinking (budget_tokens) is removed and returns a 400, and
// setting sampling parameters (temperature/top_p/top_k) returns a 400.
supportsReasoningBudget: true,
supportsReasoningBinary: true,
supportsTemperature: false,
description:
"Claude Sonnet 5 is the best combination of speed and intelligence, optimized for coding, tool use, and agentic workflows.",
},
"claude-sonnet-4-5": {
maxTokens: 64_000, // Overridden to 8k if `enableReasoningEffort` is false.
contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'
Expand Down
20 changes: 20 additions & 0 deletions packages/types/src/providers/bedrock.ts
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,24 @@ export const bedrockModels = {
},
],
},
"anthropic.claude-sonnet-5": {
maxTokens: 8192,
contextWindow: 1_000_000, // 1M context window native (no beta header required)
supportsImages: true,
supportsPromptCache: true,
supportsReasoningBudget: true,
supportsReasoningBinary: true,
supportsTemperature: false,
inputPrice: 2.0, // $2 per million input tokens (introductory pricing through Aug 31, 2026)
outputPrice: 10.0, // $10 per million output tokens (introductory pricing through Aug 31, 2026)
cacheWritesPrice: 2.5, // $2.50 per million tokens (introductory pricing through Aug 31, 2026)
cacheReadsPrice: 0.2, // $0.20 per million tokens (introductory pricing through Aug 31, 2026)
minTokensPerCachePoint: 1024,
maxCachePoints: 4,
cachableFields: ["system", "messages", "tools"],
description:
"Claude Sonnet 5 is the best combination of speed and intelligence, optimized for coding, tool use, and agentic workflows.",
},
"amazon.nova-pro-v1:0": {
maxTokens: 5000,
contextWindow: 300_000,
Expand Down Expand Up @@ -602,6 +620,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
// - Claude Sonnet 4
// - Claude Sonnet 4.5
// - Claude Sonnet 4.6
// - Claude Sonnet 5
// - Claude Haiku 4.5
// - Claude Opus 4.5
// - Claude Opus 4.6
Expand All @@ -611,6 +630,7 @@ export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
"anthropic.claude-sonnet-4-20250514-v1:0",
"anthropic.claude-sonnet-4-5-20250929-v1:0",
"anthropic.claude-sonnet-4-6",
"anthropic.claude-sonnet-5",
"anthropic.claude-haiku-4-5-20251001-v1:0",
"anthropic.claude-opus-4-5-20251101-v1:0",
"anthropic.claude-opus-4-6-v1",
Expand Down
2 changes: 2 additions & 0 deletions packages/types/src/providers/openrouter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([
"anthropic/claude-sonnet-4",
"anthropic/claude-sonnet-4.5",
"anthropic/claude-sonnet-4.6",
"anthropic/claude-sonnet-5",
"anthropic/claude-opus-4",
"anthropic/claude-opus-4.1",
"anthropic/claude-opus-4.5",
Expand Down Expand Up @@ -79,6 +80,7 @@ export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([
"anthropic/claude-sonnet-4",
"anthropic/claude-sonnet-4.5",
"anthropic/claude-sonnet-4.6",
"anthropic/claude-sonnet-5",
"anthropic/claude-haiku-4.5",
"google/gemini-2.5-pro-preview",
"google/gemini-2.5-pro",
Expand Down
2 changes: 2 additions & 0 deletions packages/types/src/providers/vercel-ai-gateway.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export const VERCEL_AI_GATEWAY_PROMPT_CACHING_MODELS = new Set([
"anthropic/claude-fable-5",
"anthropic/claude-sonnet-4",
"anthropic/claude-sonnet-4.6",
"anthropic/claude-sonnet-5",
"openai/gpt-4.1",
"openai/gpt-4.1-mini",
"openai/gpt-4.1-nano",
Expand Down Expand Up @@ -59,6 +60,7 @@ export const VERCEL_AI_GATEWAY_VISION_AND_TOOLS_MODELS = new Set([
"anthropic/claude-fable-5",
"anthropic/claude-sonnet-4",
"anthropic/claude-sonnet-4.6",
"anthropic/claude-sonnet-5",
"google/gemini-1.5-flash",
"google/gemini-1.5-pro",
"google/gemini-2.0-flash",
Expand Down
15 changes: 15 additions & 0 deletions packages/types/src/providers/vertex.ts
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,21 @@ export const vertexModels = {
},
],
},
"claude-sonnet-5": {
maxTokens: 8192,
contextWindow: 1_000_000, // 1M context window native (no beta header required)
supportsImages: true,
supportsPromptCache: true,
inputPrice: 2.0, // $2 per million input tokens (introductory pricing through Aug 31, 2026)
outputPrice: 10.0, // $10 per million output tokens (introductory pricing through Aug 31, 2026)
cacheWritesPrice: 2.5, // $2.50 per million tokens (introductory pricing through Aug 31, 2026)
cacheReadsPrice: 0.2, // $0.20 per million tokens (introductory pricing through Aug 31, 2026)
supportsReasoningBudget: true,
supportsReasoningBinary: true,
supportsTemperature: false,
description:
"Claude Sonnet 5 is the best combination of speed and intelligence, optimized for coding, tool use, and agentic workflows.",
},
"claude-haiku-4-5@20251001": {
maxTokens: 8192,
contextWindow: 200_000,
Expand Down
46 changes: 46 additions & 0 deletions src/api/providers/__tests__/anthropic-vertex.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1033,6 +1033,23 @@ describe("VertexHandler", () => {
expect(model.info.supportsTemperature).toBe(false)
})

it("should return Claude Sonnet 5 model info", () => {
const handler = new AnthropicVertexHandler({
apiModelId: "claude-sonnet-5",
vertexProjectId: "test-project",
vertexRegion: "us-central1",
})

const model = handler.getModel()
expect(model.id).toBe("claude-sonnet-5")
expect(model.info.maxTokens).toBe(8192)
expect(model.info.contextWindow).toBe(1_000_000)
expect(model.info.supportsReasoningBinary).toBe(true)
expect(model.info.supportsReasoningBudget).toBe(true)
expect(model.info.supportsPromptCache).toBe(true)
expect(model.info.supportsTemperature).toBe(false)
})

it("should not enable 1M context when flag is disabled", () => {
const handler = new AnthropicVertexHandler({
apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0],
Expand Down Expand Up @@ -1309,6 +1326,35 @@ describe("VertexHandler", () => {
expect(request.thinking).not.toHaveProperty("budget_tokens")
expect(request.temperature).toBeUndefined()
})

it("should use adaptive thinking for Claude Sonnet 5", async () => {
const sonnetHandler = new AnthropicVertexHandler({
apiModelId: "claude-sonnet-5",
vertexProjectId: "test-project",
vertexRegion: "us-central1",
enableReasoningEffort: true,
})

const mockCreate = vitest.fn().mockImplementation(async () => ({
async *[Symbol.asyncIterator]() {
yield { type: "message_start", message: { usage: { input_tokens: 10, output_tokens: 5 } } }
},
}))
;(sonnetHandler["client"].messages as any).create = mockCreate

await sonnetHandler.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]).next()

expect(mockCreate).toHaveBeenCalledWith(
expect.objectContaining({
thinking: { type: "adaptive" },
}),
undefined,
)

const request = mockCreate.mock.calls[0][0]
expect(request.thinking).not.toHaveProperty("budget_tokens")
expect(request.temperature).toBeUndefined()
})
})

describe("native tool calling", () => {
Expand Down
44 changes: 44 additions & 0 deletions src/api/providers/__tests__/anthropic.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -428,6 +428,33 @@ describe("AnthropicHandler", () => {
expect(requestBody?.max_tokens).toBe(32768)
expect(requestOptions?.headers?.["anthropic-beta"]).toContain("prompt-caching-2024-07-31")
})

it("should use adaptive thinking for Claude Sonnet 5 when reasoning is enabled", async () => {
const sonnetHandler = new AnthropicHandler({
apiKey: "test-api-key",
apiModelId: "claude-sonnet-5",
enableReasoningEffort: true,
modelMaxTokens: 32768,
})

const stream = sonnetHandler.createMessage(systemPrompt, [
{
role: "user",
content: [{ type: "text" as const, text: "Hello" }],
},
])

for await (const _chunk of stream) {
// Consume stream
}

const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0]
const requestOptions = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[1]
expect(requestBody?.thinking).toEqual({ type: "adaptive" })
expect(requestBody?.temperature).toBeUndefined()
expect(requestBody?.max_tokens).toBe(32768)
expect(requestOptions?.headers?.["anthropic-beta"]).toContain("prompt-caching-2024-07-31")
})
})

describe("completePrompt", () => {
Expand Down Expand Up @@ -589,6 +616,23 @@ describe("AnthropicHandler", () => {
expect(model.reasoningBudget).toBeUndefined()
})

it("should handle Claude Sonnet 5 model correctly", () => {
const handler = new AnthropicHandler({
apiKey: "test-api-key",
apiModelId: "claude-sonnet-5",
})
const model = handler.getModel()
expect(model.id).toBe("claude-sonnet-5")
expect(model.info.maxTokens).toBe(128000)
expect(model.info.contextWindow).toBe(1000000)
expect(model.maxTokens).toBe(8192)
expect(model.info.supportsReasoningBinary).toBe(true)
expect(model.info.supportsReasoningBudget).toBe(true)
expect(model.info.supportsPromptCache).toBe(true)
expect(model.info.supportsTemperature).toBe(false)
expect(model.reasoningBudget).toBeUndefined()
})

it("should enable 1M context for Claude 4.5 Sonnet when beta flag is set", () => {
const handler = new AnthropicHandler({
apiKey: "test-api-key",
Expand Down
63 changes: 63 additions & 0 deletions src/api/providers/__tests__/bedrock.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -724,6 +724,37 @@ describe("AwsBedrockHandler", () => {
const model = handler.getModel()
expect(model.id).toBe("global.anthropic.claude-fable-5")
})

it("should return Claude Sonnet 5 model info", () => {
const handler = new AwsBedrockHandler({
apiModelId: "anthropic.claude-sonnet-5",
awsAccessKey: "test",
awsSecretKey: "test",
awsRegion: "us-east-1",
})

const model = handler.getModel()
expect(model.id).toBe("anthropic.claude-sonnet-5")
expect(model.info.contextWindow).toBe(1_000_000)
expect(model.info.supportsReasoningBinary).toBe(true)
expect(model.info.supportsReasoningBudget).toBe(true)
expect(model.info.supportsPromptCache).toBe(true)
expect(model.info.supportsTemperature).toBe(false)
expect(model.maxTokens).toBe(8192)
})

it("should apply global inference prefix for Claude Sonnet 5 when awsUseGlobalInference is true", () => {
const handler = new AwsBedrockHandler({
apiModelId: "anthropic.claude-sonnet-5",
awsAccessKey: "test",
awsSecretKey: "test",
awsRegion: "us-east-1",
awsUseGlobalInference: true,
})

const model = handler.getModel()
expect(model.id).toBe("global.anthropic.claude-sonnet-5")
})

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Opus 4.7 and 4.8 each have a createMessage test that asserts inferenceConfig.temperature is absent and additionalModelRequestFields.thinking.type === "adaptive" (lines ~1410–1458). Could we add an equivalent for anthropic.claude-sonnet-5? The isAdaptiveThinkingModel unit test confirms the predicate returns true, but a regression in the handler branch for this specific model wouldn't be caught without an end-to-end request test.

})

describe("1M context beta feature", () => {
Expand Down Expand Up @@ -1426,6 +1457,36 @@ describe("AwsBedrockHandler", () => {
expect(commandArg.inferenceConfig?.temperature).toBeUndefined()
})

it("should send adaptive thinking with effort xhigh for Claude Sonnet 5 when reasoning is enabled", async () => {
// End-to-end regression guard for the Sonnet 5 handler branch. The
// isAdaptiveThinkingModel predicate is unit-covered, but a regression in
// the createMessage adaptive-thinking branch for this specific model
// wouldn't be caught without a request-level test (see review feedback).
const sonnet5Handler = new AwsBedrockHandler({
apiModelId: "anthropic.claude-sonnet-5",
awsAccessKey: "test-access-key",
awsSecretKey: "test-secret-key",
awsRegion: "us-east-1",
enableReasoningEffort: true,
})

const generator = sonnet5Handler.createMessage("System prompt", messages)
await generator.next()

expect(mockConverseStreamCommand).toHaveBeenCalled()
const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any

// Sonnet 5 uses the same adaptive-thinking contract as Opus 4.7/4.8 —
// budget_tokens causes a 400, so thinking.type is "adaptive" with effort.
expect(commandArg.additionalModelRequestFields?.thinking).toEqual({
type: "adaptive",
display: "summarized",
})
expect(commandArg.additionalModelRequestFields?.output_config).toEqual({ effort: "xhigh" })
// Sonnet 5 rejects sampling parameters: temperature must be omitted entirely.
expect(commandArg.inferenceConfig?.temperature).toBeUndefined()
})

it("should omit thinking and temperature for Claude Opus 4.8 when reasoning is disabled", async () => {
const opus48Handler = new AwsBedrockHandler({
apiModelId: "anthropic.claude-opus-4-8",
Expand Down Expand Up @@ -1558,6 +1619,7 @@ describe("AwsBedrockHandler", () => {
expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-7")).toBe(true)
expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-8")).toBe(true)
expect(isAdaptiveThinkingModel("anthropic.claude-fable-5")).toBe(true)
expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-5")).toBe(true)
// Future-proof Sonnet patterns — guarded even before a registry entry exists.
expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-7")).toBe(true)
expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-8")).toBe(true)
Expand All @@ -1566,6 +1628,7 @@ describe("AwsBedrockHandler", () => {
it("returns true when the id carries a cross-region or global prefix", () => {
expect(isAdaptiveThinkingModel("us.anthropic.claude-opus-4-8")).toBe(true)
expect(isAdaptiveThinkingModel("global.anthropic.claude-fable-5")).toBe(true)
expect(isAdaptiveThinkingModel("global.anthropic.claude-sonnet-5")).toBe(true)
expect(isAdaptiveThinkingModel("eu.anthropic.claude-sonnet-4-7")).toBe(true)
expect(isAdaptiveThinkingModel("global.anthropic.claude-opus-4-8")).toBe(true)
})
Expand Down
Loading
Loading