Zoo-Code-Org · edelauna · Jul 1, 2026 · Jul 1, 2026 · Jul 1, 2026 · edelauna
@@ -28,6 +28,25 @@ export const anthropicModels = {
 			},
 		],
 	},
+	"claude-sonnet-5": {
+		maxTokens: 128_000, // Overridden to 8k if `enableReasoningEffort` is false.
+		contextWindow: 1_000_000, // 1M context window native (no beta header required)
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 2.0, // $2 per million input tokens (introductory pricing through Aug 31, 2026)
+		outputPrice: 10.0, // $10 per million output tokens (introductory pricing through Aug 31, 2026)
+		cacheWritesPrice: 2.5, // $2.50 per million tokens (introductory pricing through Aug 31, 2026)
+		cacheReadsPrice: 0.2, // $0.20 per million tokens (introductory pricing through Aug 31, 2026)
+		// Sonnet 5 uses the same adaptive-thinking / binary-toggle convention as
+		// Opus 4.7+ and Fable 5 on the direct Anthropic provider path. Manual
+		// extended thinking (budget_tokens) is removed and returns a 400, and
+		// setting sampling parameters (temperature/top_p/top_k) returns a 400.
+		supportsReasoningBudget: true,
+		supportsReasoningBinary: true,
+		supportsTemperature: false,
+		description:
+			"Claude Sonnet 5 is the best combination of speed and intelligence, optimized for coding, tool use, and agentic workflows.",
+	},
 	"claude-sonnet-4-5": {
 		maxTokens: 64_000, // Overridden to 8k if `enableReasoningEffort` is false.
 		contextWindow: 200_000, // Default 200K, extendable to 1M with beta flag 'context-1m-2025-08-07'

@@ -51,6 +51,24 @@ export const bedrockModels = {
 			},
 		],
 	},
+	"anthropic.claude-sonnet-5": {
+		maxTokens: 8192,
+		contextWindow: 1_000_000, // 1M context window native (no beta header required)
+		supportsImages: true,
+		supportsPromptCache: true,
+		supportsReasoningBudget: true,
+		supportsReasoningBinary: true,
+		supportsTemperature: false,
+		inputPrice: 2.0, // $2 per million input tokens (introductory pricing through Aug 31, 2026)
+		outputPrice: 10.0, // $10 per million output tokens (introductory pricing through Aug 31, 2026)
+		cacheWritesPrice: 2.5, // $2.50 per million tokens (introductory pricing through Aug 31, 2026)
+		cacheReadsPrice: 0.2, // $0.20 per million tokens (introductory pricing through Aug 31, 2026)
+		minTokensPerCachePoint: 1024,
+		maxCachePoints: 4,
+		cachableFields: ["system", "messages", "tools"],
+		description:
+			"Claude Sonnet 5 is the best combination of speed and intelligence, optimized for coding, tool use, and agentic workflows.",
+	},
 	"amazon.nova-pro-v1:0": {
 		maxTokens: 5000,
 		contextWindow: 300_000,
@@ -602,6 +620,7 @@ export const BEDROCK_1M_CONTEXT_MODEL_IDS = [
 // - Claude Sonnet 4
 // - Claude Sonnet 4.5
 // - Claude Sonnet 4.6
+// - Claude Sonnet 5
 // - Claude Haiku 4.5
 // - Claude Opus 4.5
 // - Claude Opus 4.6
@@ -611,6 +630,7 @@ export const BEDROCK_GLOBAL_INFERENCE_MODEL_IDS = [
 	"anthropic.claude-sonnet-4-20250514-v1:0",
 	"anthropic.claude-sonnet-4-5-20250929-v1:0",
 	"anthropic.claude-sonnet-4-6",
+	"anthropic.claude-sonnet-5",
 	"anthropic.claude-haiku-4-5-20251001-v1:0",
 	"anthropic.claude-opus-4-5-20251101-v1:0",
 	"anthropic.claude-opus-4-6-v1",

@@ -39,6 +39,7 @@ export const OPEN_ROUTER_PROMPT_CACHING_MODELS = new Set([
 	"anthropic/claude-sonnet-4",
 	"anthropic/claude-sonnet-4.5",
 	"anthropic/claude-sonnet-4.6",
+	"anthropic/claude-sonnet-5",
 	"anthropic/claude-opus-4",
 	"anthropic/claude-opus-4.1",
 	"anthropic/claude-opus-4.5",
@@ -79,6 +80,7 @@ export const OPEN_ROUTER_REASONING_BUDGET_MODELS = new Set([
 	"anthropic/claude-sonnet-4",
 	"anthropic/claude-sonnet-4.5",
 	"anthropic/claude-sonnet-4.6",
+	"anthropic/claude-sonnet-5",
 	"anthropic/claude-haiku-4.5",
 	"google/gemini-2.5-pro-preview",
 	"google/gemini-2.5-pro",

@@ -16,6 +16,7 @@ export const VERCEL_AI_GATEWAY_PROMPT_CACHING_MODELS = new Set([
 	"anthropic/claude-fable-5",
 	"anthropic/claude-sonnet-4",
 	"anthropic/claude-sonnet-4.6",
+	"anthropic/claude-sonnet-5",
 	"openai/gpt-4.1",
 	"openai/gpt-4.1-mini",
 	"openai/gpt-4.1-nano",
@@ -59,6 +60,7 @@ export const VERCEL_AI_GATEWAY_VISION_AND_TOOLS_MODELS = new Set([
 	"anthropic/claude-fable-5",
 	"anthropic/claude-sonnet-4",
 	"anthropic/claude-sonnet-4.6",
+	"anthropic/claude-sonnet-5",
 	"google/gemini-1.5-flash",
 	"google/gemini-1.5-pro",
 	"google/gemini-2.0-flash",

@@ -355,6 +355,21 @@ export const vertexModels = {
 			},
 		],
 	},
+	"claude-sonnet-5": {
+		maxTokens: 8192,
+		contextWindow: 1_000_000, // 1M context window native (no beta header required)
+		supportsImages: true,
+		supportsPromptCache: true,
+		inputPrice: 2.0, // $2 per million input tokens (introductory pricing through Aug 31, 2026)
+		outputPrice: 10.0, // $10 per million output tokens (introductory pricing through Aug 31, 2026)
+		cacheWritesPrice: 2.5, // $2.50 per million tokens (introductory pricing through Aug 31, 2026)
+		cacheReadsPrice: 0.2, // $0.20 per million tokens (introductory pricing through Aug 31, 2026)
+		supportsReasoningBudget: true,
+		supportsReasoningBinary: true,
+		supportsTemperature: false,
+		description:
+			"Claude Sonnet 5 is the best combination of speed and intelligence, optimized for coding, tool use, and agentic workflows.",
+	},
 	"claude-haiku-4-5@20251001": {
 		maxTokens: 8192,
 		contextWindow: 200_000,

@@ -1033,6 +1033,23 @@ describe("VertexHandler", () => {
 			expect(model.info.supportsTemperature).toBe(false)
 		})
 
+		it("should return Claude Sonnet 5 model info", () => {
+			const handler = new AnthropicVertexHandler({
+				apiModelId: "claude-sonnet-5",
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+			})
+
+			const model = handler.getModel()
+			expect(model.id).toBe("claude-sonnet-5")
+			expect(model.info.maxTokens).toBe(8192)
+			expect(model.info.contextWindow).toBe(1_000_000)
+			expect(model.info.supportsReasoningBinary).toBe(true)
+			expect(model.info.supportsReasoningBudget).toBe(true)
+			expect(model.info.supportsPromptCache).toBe(true)
+			expect(model.info.supportsTemperature).toBe(false)
+		})
+
 		it("should not enable 1M context when flag is disabled", () => {
 			const handler = new AnthropicVertexHandler({
 				apiModelId: VERTEX_1M_CONTEXT_MODEL_IDS[0],
@@ -1309,6 +1326,35 @@ describe("VertexHandler", () => {
 			expect(request.thinking).not.toHaveProperty("budget_tokens")
 			expect(request.temperature).toBeUndefined()
 		})
+
+		it("should use adaptive thinking for Claude Sonnet 5", async () => {
+			const sonnetHandler = new AnthropicVertexHandler({
+				apiModelId: "claude-sonnet-5",
+				vertexProjectId: "test-project",
+				vertexRegion: "us-central1",
+				enableReasoningEffort: true,
+			})
+
+			const mockCreate = vitest.fn().mockImplementation(async () => ({
+				async *[Symbol.asyncIterator]() {
+					yield { type: "message_start", message: { usage: { input_tokens: 10, output_tokens: 5 } } }
+				},
+			}))
+			;(sonnetHandler["client"].messages as any).create = mockCreate
+
+			await sonnetHandler.createMessage("You are a helpful assistant", [{ role: "user", content: "Hello" }]).next()
+
+			expect(mockCreate).toHaveBeenCalledWith(
+				expect.objectContaining({
+					thinking: { type: "adaptive" },
+				}),
+				undefined,
+			)
+
+			const request = mockCreate.mock.calls[0][0]
+			expect(request.thinking).not.toHaveProperty("budget_tokens")
+			expect(request.temperature).toBeUndefined()
+		})
 	})
 
 	describe("native tool calling", () => {

@@ -428,6 +428,33 @@ describe("AnthropicHandler", () => {
 			expect(requestBody?.max_tokens).toBe(32768)
 			expect(requestOptions?.headers?.["anthropic-beta"]).toContain("prompt-caching-2024-07-31")
 		})
+
+		it("should use adaptive thinking for Claude Sonnet 5 when reasoning is enabled", async () => {
+			const sonnetHandler = new AnthropicHandler({
+				apiKey: "test-api-key",
+				apiModelId: "claude-sonnet-5",
+				enableReasoningEffort: true,
+				modelMaxTokens: 32768,
+			})
+
+			const stream = sonnetHandler.createMessage(systemPrompt, [
+				{
+					role: "user",
+					content: [{ type: "text" as const, text: "Hello" }],
+				},
+			])
+
+			for await (const _chunk of stream) {
+				// Consume stream
+			}
+
+			const requestBody = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[0]
+			const requestOptions = mockCreate.mock.calls[mockCreate.mock.calls.length - 1]?.[1]
+			expect(requestBody?.thinking).toEqual({ type: "adaptive" })
+			expect(requestBody?.temperature).toBeUndefined()
+			expect(requestBody?.max_tokens).toBe(32768)
+			expect(requestOptions?.headers?.["anthropic-beta"]).toContain("prompt-caching-2024-07-31")
+		})
 	})
 
 	describe("completePrompt", () => {
@@ -589,6 +616,23 @@ describe("AnthropicHandler", () => {
 			expect(model.reasoningBudget).toBeUndefined()
 		})
 
+		it("should handle Claude Sonnet 5 model correctly", () => {
+			const handler = new AnthropicHandler({
+				apiKey: "test-api-key",
+				apiModelId: "claude-sonnet-5",
+			})
+			const model = handler.getModel()
+			expect(model.id).toBe("claude-sonnet-5")
+			expect(model.info.maxTokens).toBe(128000)
+			expect(model.info.contextWindow).toBe(1000000)
+			expect(model.maxTokens).toBe(8192)
+			expect(model.info.supportsReasoningBinary).toBe(true)
+			expect(model.info.supportsReasoningBudget).toBe(true)
+			expect(model.info.supportsPromptCache).toBe(true)
+			expect(model.info.supportsTemperature).toBe(false)
+			expect(model.reasoningBudget).toBeUndefined()
+		})
+
 		it("should enable 1M context for Claude 4.5 Sonnet when beta flag is set", () => {
 			const handler = new AnthropicHandler({
 				apiKey: "test-api-key",

@@ -724,6 +724,37 @@ describe("AwsBedrockHandler", () => {
 			const model = handler.getModel()
 			expect(model.id).toBe("global.anthropic.claude-fable-5")
 		})
+
+		it("should return Claude Sonnet 5 model info", () => {
+			const handler = new AwsBedrockHandler({
+				apiModelId: "anthropic.claude-sonnet-5",
+				awsAccessKey: "test",
+				awsSecretKey: "test",
+				awsRegion: "us-east-1",
+			})
+
+			const model = handler.getModel()
+			expect(model.id).toBe("anthropic.claude-sonnet-5")
+			expect(model.info.contextWindow).toBe(1_000_000)
+			expect(model.info.supportsReasoningBinary).toBe(true)
+			expect(model.info.supportsReasoningBudget).toBe(true)
+			expect(model.info.supportsPromptCache).toBe(true)
+			expect(model.info.supportsTemperature).toBe(false)
+			expect(model.maxTokens).toBe(8192)
+		})
+
+		it("should apply global inference prefix for Claude Sonnet 5 when awsUseGlobalInference is true", () => {
+			const handler = new AwsBedrockHandler({
+				apiModelId: "anthropic.claude-sonnet-5",
+				awsAccessKey: "test",
+				awsSecretKey: "test",
+				awsRegion: "us-east-1",
+				awsUseGlobalInference: true,
+			})
+
+			const model = handler.getModel()
+			expect(model.id).toBe("global.anthropic.claude-sonnet-5")
+		})
 	})
 
 	describe("1M context beta feature", () => {
@@ -1426,6 +1457,36 @@ describe("AwsBedrockHandler", () => {
 			expect(commandArg.inferenceConfig?.temperature).toBeUndefined()
 		})
 
+		it("should send adaptive thinking with effort xhigh for Claude Sonnet 5 when reasoning is enabled", async () => {
+			// End-to-end regression guard for the Sonnet 5 handler branch. The
+			// isAdaptiveThinkingModel predicate is unit-covered, but a regression in
+			// the createMessage adaptive-thinking branch for this specific model
+			// wouldn't be caught without a request-level test (see review feedback).
+			const sonnet5Handler = new AwsBedrockHandler({
+				apiModelId: "anthropic.claude-sonnet-5",
+				awsAccessKey: "test-access-key",
+				awsSecretKey: "test-secret-key",
+				awsRegion: "us-east-1",
+				enableReasoningEffort: true,
+			})
+
+			const generator = sonnet5Handler.createMessage("System prompt", messages)
+			await generator.next()
+
+			expect(mockConverseStreamCommand).toHaveBeenCalled()
+			const commandArg = mockConverseStreamCommand.mock.calls[0][0] as any
+
+			// Sonnet 5 uses the same adaptive-thinking contract as Opus 4.7/4.8 —
+			// budget_tokens causes a 400, so thinking.type is "adaptive" with effort.
+			expect(commandArg.additionalModelRequestFields?.thinking).toEqual({
+				type: "adaptive",
+				display: "summarized",
+			})
+			expect(commandArg.additionalModelRequestFields?.output_config).toEqual({ effort: "xhigh" })
+			// Sonnet 5 rejects sampling parameters: temperature must be omitted entirely.
+			expect(commandArg.inferenceConfig?.temperature).toBeUndefined()
+		})
+
 		it("should omit thinking and temperature for Claude Opus 4.8 when reasoning is disabled", async () => {
 			const opus48Handler = new AwsBedrockHandler({
 				apiModelId: "anthropic.claude-opus-4-8",
@@ -1558,6 +1619,7 @@ describe("AwsBedrockHandler", () => {
 				expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-7")).toBe(true)
 				expect(isAdaptiveThinkingModel("anthropic.claude-opus-4-8")).toBe(true)
 				expect(isAdaptiveThinkingModel("anthropic.claude-fable-5")).toBe(true)
+				expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-5")).toBe(true)
 				// Future-proof Sonnet patterns — guarded even before a registry entry exists.
 				expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-7")).toBe(true)
 				expect(isAdaptiveThinkingModel("anthropic.claude-sonnet-4-8")).toBe(true)
@@ -1566,6 +1628,7 @@ describe("AwsBedrockHandler", () => {
 			it("returns true when the id carries a cross-region or global prefix", () => {
 				expect(isAdaptiveThinkingModel("us.anthropic.claude-opus-4-8")).toBe(true)
 				expect(isAdaptiveThinkingModel("global.anthropic.claude-fable-5")).toBe(true)
+				expect(isAdaptiveThinkingModel("global.anthropic.claude-sonnet-5")).toBe(true)
 				expect(isAdaptiveThinkingModel("eu.anthropic.claude-sonnet-4-7")).toBe(true)
 				expect(isAdaptiveThinkingModel("global.anthropic.claude-opus-4-8")).toBe(true)
 			})